package de.julielab.jcore.reader.xml;

import de.julielab.costosys.configuration.FieldConfig;
import de.julielab.costosys.dbconnection.CoStoSysConnection;
import de.julielab.jcore.reader.db.DBMultiplier;
import de.julielab.jcore.reader.db.DBReader;
import de.julielab.jcore.reader.xmlmapper.mapper.XMLMapper;
import de.julielab.jcore.types.casflow.ToVisit;
import de.julielab.jcore.types.casmultiplier.RowBatch;
import de.julielab.jcore.types.ext.DBProcessingMetaData;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.AbstractCas;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.StringArray;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name = "JCoRe XML Database Multiplier", description = "This CAS multiplier receives information about documents to be read from an instance of the XML Database Multiplier reader from the jcore-db-reader project. The multiplier employs the jcore-xml-mapper to map the document XML structure into CAS instances. It also supports additional tables sent by the DB Multiplier Reader that are then joined to the main table. This mechanism is used to load separate data from additional database tables and populate the CAS with them via the 'RowMapping' parameter. This component is part of the Jena Document Information System, JeDIS.", vendor = "JULIE Lab Jena, Germany", copyright = "JULIE Lab Jena, Germany")
@TypeCapability(inputs = {"de.julielab.jcore.types.casmultiplier.RowBatch"}, outputs = {"de.julielab.jcore.types.casflow.ToVisit"})
/* loaded from: input_file:de/julielab/jcore/reader/xml/XMLDBMultiplier.class */
public class XMLDBMultiplier extends DBMultiplier {
    public static final String PARAM_ROW_MAPPING = "RowMapping";
    public static final String PARAM_MAPPING_FILE = "MappingFile";
    public static final String PARAM_ADD_SHA_HASH = "AddShaHash";
    public static final String PARAM_TABLE_DOCUMENT = "DocumentTable";
    public static final String PARAM_TABLE_DOCUMENT_SCHEMA = "DocumentTableSchema";
    public static final String PARAM_TO_VISIT_KEYS = "ToVisitKeys";
    public static final String PARAM_ADD_TO_VISIT_KEYS = "AddToVisitKeys";
    public static final String PARAM_ADD_UNCHANGED_DOCUMENT_TEXT_FLAG = "AddUnchangedDocumentTextFlag";
    public static final String PARAM_TRUNCATE_AT_SIZE = "TruncateAtSize";
    private static final Logger log = LoggerFactory.getLogger(XMLDBMultiplier.class);
    protected XMLMapper xmlMapper;

    @ConfigurationParameter(name = "RowMapping", mandatory = false, description = XMLDBReader.DESC_ROW_MAPPING)
    protected String[] rowMappingArray;

    @ConfigurationParameter(name = "MappingFile", description = XMLDBReader.DESC_MAPPING_FILE)
    protected String mappingFileStr;

    @ConfigurationParameter(name = PARAM_ADD_SHA_HASH, mandatory = false, description = "For use with AnnotationDefinedFlowController and XMIDBWriter. Possible values: document_text, defaults to 'document_text' and thus doesn't need to be specified manually at the moment. This parameter needs to match the value for the same parameter given to the XMIDBWriter in this pipeline. Then, a comparison between the existing hash in the database and the new hash of the CAS read in this pipeline can be made. In case the hashes match, the CAS is directly routed to the components specified in the ToVisitKeys parameter, skipping all other components. Note that this only works with AAEs where the first component is an 'AnnotationControlledFlow'. Additionally, the DBProcessingMetaData#hasDocumentHashChanged is set. This can be used by the XMIDBWriter to omit the reset of mirror subsets when updating the base document when the actual CAS text stayed the same.")
    private String documentItemToHash;

    @ConfigurationParameter(name = PARAM_TABLE_DOCUMENT, mandatory = false, description = "For use with AnnotationDefinedFlowController. String parameter indicating the name of the table where the XMI data and, thus, the hash is stored. The name must be schema qualified. Note that in this component, only the ToVisit annotation is created that determines which components to apply to a CAS with matching (unchanged) hash. The logic to actually control the CAS flow is contained in the AnnotationDefinedFlowController.")
    private String xmiStorageDataTable;

    @ConfigurationParameter(name = PARAM_TABLE_DOCUMENT_SCHEMA, mandatory = false, description = "For use with AnnotationDefinedFlowController. The name of the schema that the document table - given with the DocumentTable parameter - adheres to. Only the primary key part is required for hash value retrieval.")
    private String xmiStorageDataTableSchema;

    @ConfigurationParameter(name = PARAM_TO_VISIT_KEYS, mandatory = false, description = "For use with AnnotationDefinedFlowController. Specifies the delegate AE keys of the AEs this CAS should still applied on although the hash has not changed. Can be null or empty indicating that no component should be applied to the CAS. The task of the AnnotationDefinedFlowController is then to read those annotations and route the CAS accordingly. The parameter values will only be added to the CAS if AddToVisitKeys is set to true, the document text hash is available in the database (see the AddShaHash parameter) and the hash in the database equals to the hash of the current document in the pipeline.")
    private String[] toVisitKeys;

    @ConfigurationParameter(name = PARAM_ADD_TO_VISIT_KEYS, mandatory = false, description = "Toggles the creation of annotations for the AnnotationDefinedFlowController. Only needed when such a flow controller is used in the pipeline. For details, see the description of ToVisitKeys.")
    private boolean addToVisitKeys;

    @ConfigurationParameter(name = PARAM_ADD_UNCHANGED_DOCUMENT_TEXT_FLAG, mandatory = false, description = "Toggles the addition of the 'document text is unchanged' flag. The value of this flag is determined via a SHA256 hash of the CAS document text. When DocumentTable and DocumentTableSchema are specified, the hash value of the document in storage is retrieved and compared to the current value. The flag is then set with respect to the comparison result.")
    private boolean addUnchangedDocumentTextFlag;

    @ConfigurationParameter(name = PARAM_TRUNCATE_AT_SIZE, mandatory = false, description = "Specify size in bytes of the XML document size. If the document surpasses that size, it is not populated from XMI but given some placeholder information. This can be necessary when large documents cannot be handled by subsequent components in the pipeline.")
    private int truncationSize;
    private Row2CasMapper row2CasMapper;
    private CasPopulator casPopulator;
    private Map<String, String> docId2HashMap;
    private boolean initialized;

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.mappingFileStr = (String) uimaContext.getConfigParameterValue("MappingFile");
        this.rowMappingArray = (String[]) uimaContext.getConfigParameterValue("RowMapping");
        this.xmiStorageDataTable = (String) uimaContext.getConfigParameterValue(PARAM_TABLE_DOCUMENT);
        this.xmiStorageDataTableSchema = (String) uimaContext.getConfigParameterValue(PARAM_TABLE_DOCUMENT_SCHEMA);
        this.documentItemToHash = (String) Optional.ofNullable((String) uimaContext.getConfigParameterValue(PARAM_ADD_SHA_HASH)).orElse("document_text");
        this.toVisitKeys = (String[]) uimaContext.getConfigParameterValue(PARAM_TO_VISIT_KEYS);
        this.addToVisitKeys = ((Boolean) Optional.ofNullable(uimaContext.getConfigParameterValue(PARAM_ADD_TO_VISIT_KEYS)).orElse(false)).booleanValue();
        this.addUnchangedDocumentTextFlag = ((Boolean) Optional.ofNullable(uimaContext.getConfigParameterValue(PARAM_ADD_UNCHANGED_DOCUMENT_TEXT_FLAG)).orElse(false)).booleanValue();
        this.truncationSize = ((Integer) Optional.ofNullable((Integer) uimaContext.getConfigParameterValue(PARAM_TRUNCATE_AT_SIZE)).orElse(Integer.MAX_VALUE)).intValue();
        this.xmlMapper = new Initializer(this.mappingFileStr, null, null).getXmlMapper();
        this.initialized = false;
        if (this.addToVisitKeys || this.addUnchangedDocumentTextFlag) {
            if (this.xmiStorageDataTable == null && this.xmiStorageDataTableSchema == null) {
                return;
            }
            if (this.xmiStorageDataTable == null || this.xmiStorageDataTableSchema == null || this.documentItemToHash == null) {
                String format = String.format("From the parameters '%s' and '%s' some are specified and some aren't. To activate hash value comparison in order to add aggregate component keys for CAS visit, specify all those parameters. Otherwise, specify none.", PARAM_TABLE_DOCUMENT, PARAM_TABLE_DOCUMENT_SCHEMA);
                log.error(format);
                throw new ResourceInitializationException(new IllegalArgumentException(format));
            }
        }
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        super.process(jCas);
        this.docId2HashMap = fetchCurrentHashesFromDatabase((RowBatch) JCasUtil.selectSingle(jCas, RowBatch.class));
    }

    public AbstractCas next() throws AnalysisEngineProcessException {
        JCas emptyJCas = getEmptyJCas();
        try {
            if (this.documentDataIterator.hasNext()) {
                if (!this.initialized) {
                    try {
                        this.row2CasMapper = new Row2CasMapper(this.rowMappingArray, () -> {
                            return getAllRetrievedColumns();
                        });
                        if (this.xmiStorageDataTable != null && !this.dbc.withConnectionQueryBoolean(dataBaseConnector -> {
                            return Boolean.valueOf(dataBaseConnector.tableExists(this.xmiStorageDataTable));
                        })) {
                            throw new AnalysisEngineProcessException(new IllegalArgumentException("The data table" + this.xmiStorageDataTable + " to retrieve hash values from for document text change detection does not exist in the database: " + this.dbc.getDbURL()));
                        }
                        this.casPopulator = new CasPopulator(this.dbc, this.xmlMapper, this.row2CasMapper, this.rowMappingArray, this.truncationSize);
                        this.initialized = true;
                    } catch (ResourceInitializationException e) {
                        throw new AnalysisEngineProcessException(e);
                    }
                }
                populateCas(emptyJCas, (byte[][]) this.documentDataIterator.next());
                setToVisitAnnotation(emptyJCas);
            }
            return emptyJCas;
        } catch (Exception e2) {
            log.error("Exception occurred: ", e2);
            throw e2;
        }
    }

    private void setToVisitAnnotation(JCas jCas) {
        if (this.addToVisitKeys || this.addUnchangedDocumentTextFlag) {
            DBProcessingMetaData selectSingle = JCasUtil.selectSingle(jCas, DBProcessingMetaData.class);
            String join = String.join(",", selectSingle.getPrimaryKey().toArray());
            String str = this.docId2HashMap.get(join);
            if (str == null) {
                log.trace("No existing hash was found for document {}", join);
                return;
            }
            if (str.equals(getHash(jCas))) {
                if (log.isTraceEnabled()) {
                    log.trace("Document {} has a document text hash that equals the one present in the database. Creating a ToVisit annotation routing it only to the components with delegate keys {}.", join, this.toVisitKeys);
                }
                if (this.addUnchangedDocumentTextFlag) {
                    selectSingle.setIsDocumentHashUnchanged(true);
                }
                if (this.addToVisitKeys) {
                    ToVisit toVisit = new ToVisit(jCas);
                    if (this.toVisitKeys != null && this.toVisitKeys.length != 0) {
                        StringArray stringArray = new StringArray(jCas, this.toVisitKeys.length);
                        stringArray.copyFromArray(this.toVisitKeys, 0, 0, this.toVisitKeys.length);
                        toVisit.setDelegateKeys(stringArray);
                    }
                    toVisit.addToIndexes();
                }
            }
        }
    }

    private String getHash(JCas jCas) {
        return Base64.encodeBase64String(DigestUtils.sha256(jCas.getDocumentText().getBytes()));
    }

    private void populateCas(JCas jCas, byte[][] bArr) throws AnalysisEngineProcessException {
        try {
            this.casPopulator.populateCas(jCas, bArr, (bArr2, jCas2) -> {
                DBReader.setDBProcessingMetaData(this.dbc, this.readDataTable, this.tableName, bArr2, jCas2);
            });
        } catch (CasPopulationException e) {
            throw new AnalysisEngineProcessException(e);
        }
    }

    protected List<Map<String, Object>> getAllRetrievedColumns() {
        return (List) ((List) this.dbc.getNumColumnsAndFields(this.tables.length > 1, this.schemaNames).getRight()).stream().map(HashMap::new).collect(Collectors.toList());
    }

    private Map<String, String> fetchCurrentHashesFromDatabase(RowBatch rowBatch) throws AnalysisEngineProcessException {
        if ((!this.addToVisitKeys && !this.addUnchangedDocumentTextFlag) || rowBatch.getIdentifiers() == null || rowBatch.getIdentifiers().size() <= 0) {
            return null;
        }
        String str = this.documentItemToHash + "_sha256";
        ArrayList arrayList = new ArrayList(rowBatch.getIdentifiers().size());
        Iterator it = rowBatch.getIdentifiers().iterator();
        while (it.hasNext()) {
            arrayList.add(((StringArray) it.next()).toStringArray());
        }
        HashMap hashMap = new HashMap(arrayList.size());
        String str2 = null;
        try {
            CoStoSysConnection obtainOrReserveConnection = this.dbc.obtainOrReserveConnection();
            try {
                FieldConfig fieldConfiguration = this.dbc.getFieldConfiguration(this.xmiStorageDataTableSchema);
                Stream map = arrayList.stream().map(strArr -> {
                    return (String[]) Arrays.stream(strArr).map(str3 -> {
                        return "%s='" + str3 + "'";
                    }).toArray(i -> {
                        return new String[i];
                    });
                });
                Objects.requireNonNull(fieldConfiguration);
                str2 = String.format("SELECT %s,%s FROM %s WHERE %s", fieldConfiguration.getPrimaryKeyString(), str, this.xmiStorageDataTable, (String) map.map(fieldConfiguration::expandPKNames).map(strArr2 -> {
                    return String.join(" AND ", strArr2);
                }).collect(Collectors.joining(" OR ")));
                ResultSet executeQuery = obtainOrReserveConnection.createStatement().executeQuery(str2);
                while (executeQuery.next()) {
                    StringBuilder sb = new StringBuilder();
                    for (int i = 0; i < fieldConfiguration.getPrimaryKey().length; i++) {
                        sb.append(executeQuery.getString(i + 1)).append(',');
                    }
                    sb.deleteCharAt(sb.length() - 1);
                    hashMap.put(sb.toString(), executeQuery.getString(fieldConfiguration.getPrimaryKey().length + 1));
                }
                if (obtainOrReserveConnection != null) {
                    obtainOrReserveConnection.close();
                }
                return hashMap;
            } finally {
            }
        } catch (SQLException e) {
            log.error("Could not retrieve hashes from the database. SQL query was '{}':", str2, e);
            throw new AnalysisEngineProcessException(e);
        }
    }
}
