package de.julielab.jcore.reader.xmi;

import de.julielab.costosys.cli.TableNotFoundException;
import de.julielab.costosys.configuration.FieldConfig;
import de.julielab.costosys.dbconnection.CoStoSysConnection;
import de.julielab.costosys.dbconnection.DataBaseConnector;
import de.julielab.jcore.reader.db.DBMultiplierReader;
import de.julielab.jcore.types.casmultiplier.RowBatch;
import de.julielab.jcore.utility.JCoReTools;
import java.io.ByteArrayInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
import org.apache.uima.UimaContext;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name = "JCoRe XMI Database Multiplier Reader", description = "This is an extension of the DBMultiplierReader to handle JeDIS XMI annotation module data.")
/* loaded from: input_file:de/julielab/jcore/reader/xmi/XmiDBMultiplierReader.class */
public class XmiDBMultiplierReader extends DBMultiplierReader {
    public static final String PARAM_STORE_XMI_ID = "StoreMaxXmiId";
    public static final String PARAM_READS_BASE_DOCUMENT = "ReadsBaseDocument";
    public static final String PARAM_INCREASED_ATTRIBUTE_SIZE = "IncreasedAttributeSize";
    public static final String PARAM_XERCES_ATTRIBUTE_BUFFER_SIZE = "XercesAttributeBufferSize";
    public static final String PARAM_ANNOTATIONS_TO_LOAD = "AnnotationsToLoad";
    public static final String PARAM_XMI_META_SCHEMA = "XmiMetaTablesSchema";
    private static final Logger log = LoggerFactory.getLogger(XmiDBMultiplierReader.class);

    @ConfigurationParameter(name = "AnnotationsToLoad", mandatory = false, description = "An array of qualified UIMA type names. The provided names will be converted to database table column names in an equivalent manner as the XMIDBWriter does when storing the annotations. Thus, by default the columns of the XMI table holding annotation module information are named by lowercased UIMA type name where dots are replaced by underscores.. This can be overwritten by appending '<schema>:' to a table name. The given type names will be converted to valid Postgres columns names by replacing dots with underscores and the colon will be converted to the dollar character. From the resolved columns, annotation modules in segmented XMI format are read where an annotation module contains all annotation instances of a specific type in a specific document. All annotation modules read this way are merged with the base document, resulting in valid XMI data which is then deserialized into the CAS.")
    protected String[] qualifiedAnnotationColumnNames;

    @ConfigurationParameter(name = "ReadsBaseDocument", description = "Indicates if this reader reads segmented annotation data. If set to false, the XMI data is expected to represent complete annotated documents. If it is set to true, a segmented annotation graph is expected and the table given with the 'Table' parameter will contain the document text together with some basic annotations. What exactly is stored in which manner is determined by the jcore-xmi-db-consumer used to write the data into the database.")
    private Boolean readsBaseDocument;

    @ConfigurationParameter(name = "StoreMaxXmiId", mandatory = false, description = "This parameter is required to be set to true, if this reader is contained in a pipeline that also contains a jcore-xmi-db-writer andthe writer will segment the CAS annotation graph and store only parts of it. Then, it is important to keep track of the free XMI element IDs that may be assigned to new annotation elements to avoid ID clashes when assembling an XMI document from separately stored annotation graph segments.")
    private Boolean storeMaxXmiId;

    @ConfigurationParameter(name = "IncreasedAttributeSize", mandatory = false, description = "Maxmimum XML attribute size in bytes. Since the CAS document text is stored as an XMI attribute, it might happen for large documents that there is an error because the maximum attribute size is exceeded. This parameter allows to specify the maxmimum  attribute size in order to avoid such errors. Should only be set if required.")
    private int maxXmlAttributeSize;

    @ConfigurationParameter(name = "XercesAttributeBufferSize", mandatory = false, description = "Initial XML parser buffer size in bytes. For large documents, it can happen that XMI parsing is extremely slow. By employing monitoring tools like the jconsole or (j)visualvm, the hot spots of work can be identified. If one of those is the XML attribute buffer resizing, this parameter should be set to a size that makes buffer resizing unnecessary.")
    private int xercesAttributeBufferSize;

    @ConfigurationParameter(name = "XmiMetaTablesSchema", mandatory = false, defaultValue = {"public"}, description = "Each XMI file defines a number of XML namespaces according to the types used in the document. Those namespaces are stored in a table named '_xmi_namespaces' when splitting annotations in annotation modules by the XMI DB writer. This parameter allows to specify in which Postgres schema this table should be looked for. Also, the table listing the annotation tables is stored in this Postgres schema. Defaults to 'public'.")
    private String xmiMetaSchema;
    private boolean doGzip;
    private String[] additionalTableNames;
    private boolean useBinaryFormat;

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        this.qualifiedAnnotationColumnNames = (String[]) Optional.ofNullable((String[]) uimaContext.getConfigParameterValue("AnnotationsToLoad")).orElse(new String[0]);
        adaptReaderConfigurationForXmiData();
        super.initialize(uimaContext);
        this.readsBaseDocument = (Boolean) (uimaContext.getConfigParameterValue("ReadsBaseDocument") == null ? false : uimaContext.getConfigParameterValue("ReadsBaseDocument"));
        this.storeMaxXmiId = (Boolean) (uimaContext.getConfigParameterValue("StoreMaxXmiId") == null ? false : uimaContext.getConfigParameterValue("StoreMaxXmiId"));
        this.readsBaseDocument = (Boolean) (uimaContext.getConfigParameterValue("ReadsBaseDocument") == null ? false : uimaContext.getConfigParameterValue("ReadsBaseDocument"));
        Optional.ofNullable((Integer) uimaContext.getConfigParameterValue("IncreasedAttributeSize")).ifPresent(num -> {
            this.maxXmlAttributeSize = num.intValue();
        });
        Optional.ofNullable((Integer) uimaContext.getConfigParameterValue("XercesAttributeBufferSize")).ifPresent(num2 -> {
            this.xercesAttributeBufferSize = num2.intValue();
        });
        this.xmiMetaSchema = (String) Optional.ofNullable((String) uimaContext.getConfigParameterValue("XmiMetaTablesSchema")).orElse("public");
        super.initialize(uimaContext);
    }

    public void getNext(JCas jCas) throws CollectionException, IOException {
        try {
            super.getNext(jCas);
            RowBatch selectSingle = JCasUtil.selectSingle(jCas, RowBatch.class);
            selectSingle.setReadsBaseXmiDocument(this.readsBaseDocument.booleanValue());
            if (this.qualifiedAnnotationColumnNames != null) {
                selectSingle.setXmiAnnotationModuleNames(JCoReTools.newStringArray(jCas, this.qualifiedAnnotationColumnNames));
            }
            selectSingle.setStoreMaxXmiId(this.storeMaxXmiId.booleanValue());
            selectSingle.setIncreasedAttributeSize(this.maxXmlAttributeSize);
            selectSingle.setXercesAttributeBufferSize(this.xercesAttributeBufferSize);
            selectSingle.setXmiMetaTablesPostgresSchema(this.xmiMetaSchema);
        } catch (Throwable th) {
            log.error("Exception occurred while trying to get the next document", th);
            throw th;
        }
    }

    private void adaptReaderConfigurationForXmiData() throws ResourceInitializationException {
        this.costosysConfig = (String) getConfigParameterValue("CostosysConfigFile");
        try {
            this.dbc = new DataBaseConnector(this.costosysConfig);
            if (this.dbc.getMaxConnections() < 3) {
                this.dbc.setMaxConnections(3);
            }
            CoStoSysConnection obtainOrReserveConnection = this.dbc.obtainOrReserveConnection();
            try {
                List list = (List) this.dbc.getActiveTableFieldConfiguration().getPrimaryKeyFields().collect(Collectors.toList());
                String str = (String) getConfigParameterValue("Table");
                if (!this.dbc.tableExists(str)) {
                    throw new ResourceInitializationException(new TableNotFoundException("Table " + str + " does not exist in database " + this.dbc.getDbURL()));
                }
                if (((Boolean) getConfigParameterValue("ReadsBaseDocument")).booleanValue()) {
                    determineDataFormat(str);
                    ArrayList arrayList = new ArrayList();
                    for (String str2 : this.qualifiedAnnotationColumnNames) {
                        String replace = str2.toLowerCase().replace('.', '_').replace(':', '$');
                        String[] strArr = new String[8];
                        strArr[0] = "name";
                        strArr[1] = replace;
                        strArr[2] = "gzip";
                        strArr[3] = String.valueOf(this.doGzip);
                        strArr[4] = "retrieve";
                        strArr[5] = "true";
                        strArr[6] = "type";
                        strArr[7] = (this.doGzip || this.useBinaryFormat) ? "bytea" : "xml";
                        arrayList.add(FieldConfig.createField(strArr));
                    }
                    FieldConfig addXmiTextFieldConfiguration = this.dbc.addXmiTextFieldConfiguration(list, arrayList, this.doGzip);
                    this.dbc.setActiveTableSchema(addXmiTextFieldConfiguration.getName());
                    XmiReaderUtils.checkXmiTableSchema(this.dbc, this.tableName, addXmiTextFieldConfiguration, getMetaData().getName());
                } else {
                    determineDataFormat(str);
                    this.dbc.setActiveTableSchema(this.dbc.addXmiDocumentFieldConfiguration(list, this.doGzip).getName());
                }
                if (obtainOrReserveConnection != null) {
                    obtainOrReserveConnection.close();
                }
            } catch (Throwable th) {
                if (obtainOrReserveConnection != null) {
                    try {
                        obtainOrReserveConnection.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
                throw th;
            }
        } catch (FileNotFoundException e) {
            throw new ResourceInitializationException(e);
        }
    }

    private void determineDataFormat(String str) throws ResourceInitializationException {
        this.doGzip = true;
        this.useBinaryFormat = true;
        this.dataTable = this.dbc.getNextOrThisDataTable(str);
        log.debug("Fetching a single row from data table {} in order to determine whether data is in GZIP format", this.dataTable);
        try {
            CoStoSysConnection obtainOrReserveConnection = this.dbc.obtainOrReserveConnection();
            try {
                ResultSet executeQuery = obtainOrReserveConnection.createStatement().executeQuery(String.format("SELECT %s FROM %s LIMIT 1", "base_document", this.dataTable));
                while (executeQuery.next()) {
                    byte[] bytes = executeQuery.getBytes("base_document");
                    try {
                        GZIPInputStream gZIPInputStream = new GZIPInputStream(new ByteArrayInputStream(bytes));
                        try {
                            byte[] bArr = new byte[2];
                            gZIPInputStream.read(bArr);
                            checkForJeDISBinaryFormat(bArr);
                            gZIPInputStream.close();
                        } catch (Throwable th) {
                            try {
                                gZIPInputStream.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                            throw th;
                            break;
                        }
                    } catch (IOException e) {
                        log.debug("Attempt to read XMI data in GZIP format failed. Assuming non-gzipped XMI data.");
                        this.doGzip = false;
                        checkForJeDISBinaryFormat(bytes);
                    }
                }
                if (obtainOrReserveConnection != null) {
                    obtainOrReserveConnection.close();
                }
            } finally {
            }
        } catch (SQLException e2) {
            if (e2.getMessage().contains("does not exist")) {
                log.error("An exception occurred when trying to read the xmi column of the data table \"{}\". It seems the table does not contain XMI data and this is invalid to use with this reader.", this.dataTable);
            }
            throw new ResourceInitializationException(e2);
        }
    }

    private void checkForJeDISBinaryFormat(byte[] bArr) {
        if (((short) ((bArr[0] << 8) | (255 & bArr[1]))) == 24981) {
            log.debug("Is data encoded in JeDIS binary format: true");
        } else {
            this.useBinaryFormat = false;
            log.debug("Is data encoded in JeDIS binary format: false");
        }
    }
}
