package de.julielab.jcore.consumer.xmi;

import de.julielab.jcore.types.Header;
import de.julielab.jcore.types.XmiMetaData;
import de.julielab.jcore.types.ext.DBProcessingMetaData;
import de.julielab.xml.StaxXmiSplitter;
import de.julielab.xml.WholeXmiStaxSplitter;
import de.julielab.xml.XmiSplitter;
import de.julielab.xml.XmiSplitterResult;
import de.julielab.xml.util.XMISplitterException;
import de.julielab.xmlData.cli.TableNotFoundException;
import de.julielab.xmlData.dataBase.DataBaseConnector;
import de.julielab.xmlData.dataBase.util.TableSchemaMismatchException;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.GZIPOutputStream;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.impl.XmiCasSerializer;
import org.apache.uima.ducc.Workitem;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

@ResourceMetaData(name = "JCoRe XMI Database Writer", vendor = "JULIE Lab Jena, Germany", description = "This component is capable of storing the standard UIMA serialization of documents in one or even multiple database tables. The UIMA serialization format is XMI, an XML format that expressed an annotation graph. This component either stores the whole annotation graph in XMI format in a database row, together with the document ID. Alternatively, it makes use of the jcore-xmi-splitter to segment the annotation graph with respect to a user specified list of annotation types. Then, the XMI data of each annotation type is extracted from the document XMI data and stored in a separate table. The tables are created automatically according to the primary key of the active table schema in the Corpus Storage System (CoStoSys) configuration file that is also given as a parameter. The jcore-xmi-db-reader is capable of reading this kind of distributed annotation graph and reassemble a valid XMI document which then cas be deserialized into a CAS. This consumer is UIMA DUCC compatible. It requires the collection reader to forward the work item CAS to the consumer. This is required so the consumer knows that a work item has been finished and that all cached data - in this case the XMI data - should be flushed. This is important! Without the forwarding of the work item CAS, the last batch of cached XMI data will not be written into the database. This component is part of the Jena Document Information System, JeDIS.")
/* loaded from: input_file:de/julielab/jcore/consumer/xmi/XMIDBWriter.class */
public class XMIDBWriter extends JCasAnnotator_ImplBase {
    public static final String PARAM_COSTOSYS_CONFIG = "CostosysConfigFile";
    public static final String PARAM_UPDATE_MODE = "UpdateMode";
    public static final String PARAM_DO_GZIP = "PerformGZIP";
    public static final String PARAM_STORE_ALL = "StoreEntireXmiData";
    public static final String PARAM_TABLE_DOCUMENT = "DocumentTable";
    public static final String PARAM_ANNOS_TO_STORE = "AnnotationsToStore";
    public static final String PARAM_STORE_RECURSIVELY = "StoreRecursively";
    public static final String PARAM_BASE_DOCUMENT_ANNOTATION_TYPES = "BaseDocumentAnnotationTypes";
    public static final String PARAM_DELETE_OBSOLETE_ANNOTATIONS = "DeleteObsoleteAnnotations";
    public static final String PARAM_ATTRIBUTE_SIZE = "IncreasedAttributeSize";
    public static final String PARAM_ANNO_STORAGE_PG_SCHEMA = "AnnotationStoragePostgresSchema";
    public static final String PARAM_COMPONENT_DB_NAME = "ComponentDbName";
    public static final String PARAM_STORE_BASE_DOCUMENT = "StoreBaseDocument";
    public static final String PARAM_WRITE_BATCH_SIZE = "WriteBatchSize";
    private static final Logger log = LoggerFactory.getLogger(XMIDBWriter.class);
    private DataBaseConnector dbc;

    @ConfigurationParameter(name = PARAM_UPDATE_MODE, description = "If set to false, the attempt to write new data into an XMI document or annotation table that already has data for the respective document, will result in an error. If set to true, there will first occur a check if there already is XMI data for the currently written document and, if so, the contents will be updated. It is important to keep in mind that the update also includes empty data. That is, if an annotation type is specified in 'AnnotationsToStore' for which the current does not have data, possibly existing data will just be deleted.")
    private Boolean updateMode;

    @ConfigurationParameter(name = PARAM_DELETE_OBSOLETE_ANNOTATIONS, mandatory = false, defaultValue = {"false"}, description = "Only in effect if 'StoreBaseDocument' is set to 'true'. Then, already existing annotation tables are retrieved from an internal database table the is specifically maintained to list existing annotation tables. When storing the base document, the annotations in these tables are removed for the document if this parameter is set to 'true', except tables specified in 'AnnotationsToStore'. The idea is that when storing the base document, all existing annotations become obsolete since they refer to a base document that no longer exists.")
    private Boolean deleteObsolete;

    @ConfigurationParameter(name = PARAM_DO_GZIP, description = "Determines if the XMI data should be stored compressed or uncompressed. Without compression, the data will be directly viewable in a database browser, whereas compressed data appears as opaque byte sequence. Compression is supposed to reduce traffic over the network and save storage space on the database server.")
    private Boolean doGzip;

    @ConfigurationParameter(name = PARAM_ATTRIBUTE_SIZE, mandatory = false, description = "Integer that defines the maximum attribute size for the XMIs. Standard (parser wise) is 65536 * 8. It may be necessary to rise this value for larger documents since the document text is stored as an attribute of an XMI element.")
    private Integer attributeSize;

    @ConfigurationParameter(name = PARAM_STORE_ALL, description = "Boolean parameter indicating if the whole document should be stored as one large XMI data block. In this case there must not be any annotations specified for selection and the 'StoreBaseDocument' parameter will have no effect.")
    private Boolean storeAll;

    @ConfigurationParameter(name = PARAM_TABLE_DOCUMENT, description = "String parameter indicating the name of the table where the XMI data will be stored (if StoreEntireXmiData is true) or where the base document is (to be) stored (if the base document or annotation data is written). If the name is schema qualified, i.e. contains a dot, the table name will be used as provided. If no schema is qualified, the active data postgres schema as configured in the CoStoSys configuration will be used to find or create the table.")
    private String docTableParamValue;
    private List<String> annotationsToStore;

    @ConfigurationParameter(name = PARAM_STORE_RECURSIVELY, description = "Only in effect when storing annotations separately from the base document. If set to true, annotations that are referenced by other annotations, i.e. are (direct or indirect) features of other annotations, they will be stored in the same table as the referencing annotation. For example, POS tags may be store together with tokens this way. If, however, a referenced annotation type is itself to be stored, it will be segmented away and stored in its own table.")
    private Boolean recursively;

    @ConfigurationParameter(name = PARAM_STORE_BASE_DOCUMENT, description = "Boolean parameter indicating if the base document should be stored as well when annotations are specified for selection. The base document is the part of the XMI file that includes the document text. If you want to store annotations right with the base document, specify those in the 'BaseDocumentAnnotationTypes' parameter.")
    private Boolean storeBaseDocument;

    @ConfigurationParameter(name = PARAM_BASE_DOCUMENT_ANNOTATION_TYPES, mandatory = false, description = "Array parameter that takes Java annotation type names. These names will be stored with the base document, if the 'StoreBaseDocument' parameter is set to true.")
    private Set<String> baseDocumentAnnotationTypes;

    @ConfigurationParameter(name = PARAM_ANNO_STORAGE_PG_SCHEMA, mandatory = false, description = "This optional parameter specifies the Postgres schema in which the XMI annotation storage tables are located by default. If omitted, the active data schema from the CoStoSys configuration is used. The tables derived from the annotation types specified with the 'AnnotationsToStore' parameter will be stored in this postgres schema. The default can be overwritten for individual types. See the description of the 'AnnotationsToStore' parameter.")
    private String annotationStorageSchema;

    @ConfigurationParameter(name = PARAM_WRITE_BATCH_SIZE, mandatory = false, defaultValue = {"50"}, description = "The number of processed CASes after which the XMI data should be flushed into the database. Defaults to 50.")
    private int writeBatchSize;
    private XmiSplitter splitter;
    private String schemaDocument;
    private String schemaAnnotation;
    private String effectiveDocTableName;
    private MetaTableManager metaTableManager;
    private AnnotationTableManager annotationTableManager;
    private XmiDataInserter annotationInserter;

    @ConfigurationParameter(name = PARAM_COMPONENT_DB_NAME, description = " Subset tables store the name of the last component that has sent data for a document. This parameter allows to specify a custom name for each CAS DB Consumer. Defaults to the implementation class name.", defaultValue = {"XMIDBWriter"})
    private String componentDbName;
    private String subsetTable;

    @ConfigurationParameter(name = PARAM_COSTOSYS_CONFIG, description = "File path or classpath resource location of a Corpus Storage System (CoStoSys) configuration file. This file specifies the database to write the XMI data into and the data table schema. This schema must at least define the primary key columns that the storage tables should have for each document. The primary key is currently just the document ID. Thus, at the moment, primary keys can only consist of a single element when using this component. This is a shortcoming of this specific component and must be changed here, if necessary.")
    private String dbcConfigPath;

    @ConfigurationParameter(name = PARAM_ANNOS_TO_STORE, mandatory = false, description = "An array of qualified UIMA type names, for instance de.julielab.jcore.types.Sentence. Annotations of those types are segmented away from the serialized document annotation graph in XMI format for storage in separate tables. When the 'StoreRecursively' parameter is set to true, annotations are stored together with referenced annotations, if those are not specified in the list of additional tables themselves. The table names are directly derived from the annotation type names by converting dots to underlines and adding a postgres schema qualification according to the active data postgres schema defined in the CoStoSys configuration. If an annotation table should be stored or looked up in another postgres schema, prepend the type name with the string '<schema>:', e.g. 'myschema:de.julielab.jcore.types.Token.")
    private String[] annotations;
    private LinkedHashMap<String, List<XmiData>> serializedCASes = new LinkedHashMap<>();
    private Map<String, List<DocumentId>> tablesWithoutData = new HashMap();
    private int headerlessDocuments = 0;
    private int currentBatchSize = 0;

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        checkParameters(uimaContext);
        this.dbcConfigPath = (String) uimaContext.getConfigParameterValue(PARAM_COSTOSYS_CONFIG);
        try {
            this.dbc = new DataBaseConnector(this.dbcConfigPath);
            this.updateMode = Boolean.valueOf(uimaContext.getConfigParameterValue(PARAM_UPDATE_MODE) == null ? false : ((Boolean) uimaContext.getConfigParameterValue(PARAM_UPDATE_MODE)).booleanValue());
            this.deleteObsolete = Boolean.valueOf(uimaContext.getConfigParameterValue(PARAM_DELETE_OBSOLETE_ANNOTATIONS) == null ? false : ((Boolean) uimaContext.getConfigParameterValue(PARAM_DELETE_OBSOLETE_ANNOTATIONS)).booleanValue());
            this.doGzip = Boolean.valueOf(uimaContext.getConfigParameterValue(PARAM_DO_GZIP) == null ? false : ((Boolean) uimaContext.getConfigParameterValue(PARAM_DO_GZIP)).booleanValue());
            this.storeAll = Boolean.valueOf(((Boolean) uimaContext.getConfigParameterValue(PARAM_STORE_ALL)) == null ? false : ((Boolean) uimaContext.getConfigParameterValue(PARAM_STORE_ALL)).booleanValue());
            this.docTableParamValue = (String) uimaContext.getConfigParameterValue(PARAM_TABLE_DOCUMENT);
            this.storeBaseDocument = Boolean.valueOf(uimaContext.getConfigParameterValue(PARAM_STORE_BASE_DOCUMENT) == null ? false : ((Boolean) uimaContext.getConfigParameterValue(PARAM_STORE_BASE_DOCUMENT)).booleanValue());
            this.baseDocumentAnnotationTypes = (Set) Arrays.stream((String[]) Optional.ofNullable((String[]) uimaContext.getConfigParameterValue(PARAM_BASE_DOCUMENT_ANNOTATION_TYPES)).orElse(new String[0])).collect(Collectors.toSet());
            this.attributeSize = (Integer) uimaContext.getConfigParameterValue(PARAM_ATTRIBUTE_SIZE);
            this.writeBatchSize = ((Integer) Optional.ofNullable((Integer) uimaContext.getConfigParameterValue(PARAM_WRITE_BATCH_SIZE)).orElse(50)).intValue();
            this.componentDbName = (String) Optional.ofNullable((String) uimaContext.getConfigParameterValue(PARAM_COMPONENT_DB_NAME)).orElse(getClass().getSimpleName());
            this.annotationStorageSchema = (String) Optional.ofNullable((String) uimaContext.getConfigParameterValue(PARAM_ANNO_STORAGE_PG_SCHEMA)).orElse(this.dbc.getActiveDataPGSchema());
            ArrayList arrayList = new ArrayList();
            this.annotationsToStore = Collections.emptyList();
            if (this.storeAll.booleanValue()) {
                this.schemaDocument = this.dbc.addXmiDocumentFieldConfiguration((List) this.dbc.getActiveTableFieldConfiguration().getPrimaryKeyFields().collect(Collectors.toList()), this.doGzip.booleanValue()).getName();
            } else {
                this.schemaDocument = this.dbc.addXmiTextFieldConfiguration((List) this.dbc.getActiveTableFieldConfiguration().getPrimaryKeyFields().collect(Collectors.toList()), this.doGzip.booleanValue()).getName();
                this.schemaAnnotation = this.dbc.addXmiAnnotationFieldConfiguration((List) this.dbc.getActiveTableFieldConfiguration().getPrimaryKeyFields().collect(Collectors.toList()), this.doGzip.booleanValue()).getName();
                this.annotations = (String[]) uimaContext.getConfigParameterValue(PARAM_ANNOS_TO_STORE);
                if (null != this.annotations) {
                    this.annotationsToStore = new ArrayList(Arrays.asList(this.annotations));
                } else {
                    this.annotationsToStore = Collections.emptyList();
                }
                this.recursively = Boolean.valueOf(((Boolean) uimaContext.getConfigParameterValue(PARAM_STORE_RECURSIVELY)) == null ? false : ((Boolean) uimaContext.getConfigParameterValue(PARAM_STORE_RECURSIVELY)).booleanValue());
            }
            this.dbc.reserveConnection();
            try {
                this.annotationTableManager = new AnnotationTableManager(this.dbc, this.docTableParamValue, this.annotationsToStore, this.schemaDocument, this.schemaAnnotation, this.storeAll, this.storeBaseDocument, this.annotationStorageSchema);
                this.effectiveDocTableName = this.annotationTableManager.getEffectiveDocumentTableName(this.docTableParamValue);
                if (this.storeBaseDocument.booleanValue() || this.storeAll.booleanValue()) {
                    this.serializedCASes.put(this.effectiveDocTableName, new ArrayList());
                }
                if (!this.storeAll.booleanValue()) {
                    Iterator<String> it = this.annotationsToStore.iterator();
                    while (it.hasNext()) {
                        String convertAnnotationTypeToTableName = this.annotationTableManager.convertAnnotationTypeToTableName(it.next(), this.storeAll.booleanValue());
                        if (this.dbc.tableExists(convertAnnotationTypeToTableName)) {
                            checkTableDefinition(convertAnnotationTypeToTableName, this.schemaAnnotation);
                        }
                        this.serializedCASes.put(convertAnnotationTypeToTableName, new ArrayList());
                        this.tablesWithoutData.put(convertAnnotationTypeToTableName, new ArrayList());
                        arrayList.add(convertAnnotationTypeToTableName);
                    }
                }
                if (this.dbc.tableExists(this.effectiveDocTableName)) {
                    checkTableDefinition(this.effectiveDocTableName, this.schemaDocument);
                }
                if (this.updateMode.booleanValue()) {
                    List<String> obsoleteAnnotationTableNames = this.annotationTableManager.getObsoleteAnnotationTableNames();
                    if (!obsoleteAnnotationTableNames.isEmpty()) {
                        log.info("Annotations from the following tables will be obsolete by updating the base document and will be deleted: {}", obsoleteAnnotationTableNames);
                        Iterator<String> it2 = obsoleteAnnotationTableNames.iterator();
                        while (it2.hasNext()) {
                            this.tablesWithoutData.put(it2.next(), new ArrayList());
                        }
                    }
                }
                if (this.storeAll.booleanValue()) {
                    if (null != this.attributeSize) {
                        this.splitter = new WholeXmiStaxSplitter(this.docTableParamValue, this.attributeSize.intValue());
                    } else {
                        this.splitter = new WholeXmiStaxSplitter(this.docTableParamValue);
                    }
                } else if (null != this.attributeSize) {
                    this.splitter = new StaxXmiSplitter(new HashSet(this.annotationsToStore), this.recursively.booleanValue(), this.storeBaseDocument.booleanValue(), this.docTableParamValue, this.baseDocumentAnnotationTypes, this.attributeSize.intValue());
                } else {
                    this.splitter = new StaxXmiSplitter(new HashSet(this.annotationsToStore), this.recursively.booleanValue(), this.storeBaseDocument.booleanValue(), this.docTableParamValue, this.baseDocumentAnnotationTypes);
                }
                log.info(XMIDBWriter.class.getName() + " initialized.");
                log.info("Effective document table name: {}", this.effectiveDocTableName);
                log.info("Is base document stored: {}", this.storeBaseDocument);
                log.info("CAS XMI data will be GZIPed: {}", this.doGzip);
                log.info("Is the whole, unsplit XMI document stored: {}", this.storeAll);
                log.info("Annotations belonging to the base document: {}", this.baseDocumentAnnotationTypes);
                log.info("Annotation types to store in separate tables: {}", this.annotationsToStore);
                log.info("Store annotations recursively: {}", this.recursively);
                log.info("Update mode: {}", this.updateMode);
                log.info("Base document table schema: {}", this.schemaDocument);
                log.info("Annotation table schema (only required if annotations are stored separatly): {}", this.schemaAnnotation);
                log.info("Batch size of cached documents sent to database: {}", Integer.valueOf(this.writeBatchSize));
                this.metaTableManager = new MetaTableManager(this.dbc);
                this.annotationInserter = new XmiDataInserter(arrayList, this.effectiveDocTableName, this.dbc, this.schemaDocument, this.schemaAnnotation, this.storeAll, this.storeBaseDocument, this.updateMode, this.componentDbName);
                this.dbc.releaseConnections();
            } catch (TableSchemaMismatchException e) {
                throw new ResourceInitializationException(e);
            }
        } catch (FileNotFoundException e2) {
            throw new ResourceInitializationException(e2);
        }
    }

    private void checkTableDefinition(String str, String str2) throws ResourceInitializationException {
        try {
            this.dbc.checkTableDefinition(str, str2);
        } catch (TableSchemaMismatchException | TableNotFoundException e) {
            throw new ResourceInitializationException(e);
        }
    }

    private void checkParameters(UimaContext uimaContext) throws ResourceInitializationException {
        if (uimaContext.getConfigParameterValue(PARAM_COSTOSYS_CONFIG) == null) {
            throw new ResourceInitializationException(new IllegalStateException("The database configuration file is null. You must provide the path to a valid configuration file."));
        }
        if (uimaContext.getConfigParameterValue(PARAM_TABLE_DOCUMENT) == null) {
            throw new ResourceInitializationException(new IllegalStateException("The document table is null. You must provide it to either store the entire xmi data, to store the base document  or to update the next possible xmi id."));
        }
        String[] strArr = (String[]) uimaContext.getConfigParameterValue(PARAM_ANNOS_TO_STORE);
        if (((Boolean) uimaContext.getConfigParameterValue(PARAM_STORE_ALL)) == null && strArr == null && ((Boolean) uimaContext.getConfigParameterValue(PARAM_STORE_BASE_DOCUMENT)) == null) {
            throw new ResourceInitializationException(new IllegalStateException("The parameter to store the entire xmi data is not checked, but there are no annotations specified to store instead. You must provide the names of the selected annotations, if you do not want to  write the entire CAS data."));
        }
        if (uimaContext.getConfigParameterValue(PARAM_STORE_ALL) != null && ((Boolean) uimaContext.getConfigParameterValue(PARAM_STORE_ALL)).booleanValue() && strArr != null && strArr.length > 0) {
            throw new ResourceInitializationException(new IllegalStateException("The parameter to store the entire xmi data is checked and there are annotations specified to store. You can only either write the entire CAS data or select annotations, but not both."));
        }
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        try {
            try {
                Workitem selectSingle = JCasUtil.selectSingle(jCas, Workitem.class);
                log.trace("Work item feature structure found in the current CAS. Sending data to the database and returning.");
                if (selectSingle.getLastBlock()) {
                    collectionProcessComplete();
                } else {
                    batchProcessComplete();
                }
            } catch (IllegalArgumentException e) {
                DocumentId documentId = getDocumentId(jCas);
                if (documentId == null) {
                    return;
                }
                int determineNextXmiId = determineNextXmiId(jCas, documentId);
                Map<String, Integer> originalSofaIdMappings = getOriginalSofaIdMappings(jCas, documentId);
                Collection select = JCasUtil.select(jCas, XmiMetaData.class);
                if (select.size() > 1) {
                    throw new AnalysisEngineProcessException(new IllegalArgumentException("There are multiple XmiMetaData annotations in the cas for document " + documentId + "."));
                }
                select.forEach((v0) -> {
                    v0.removeFromIndexes();
                });
                if (this.subsetTable == null) {
                    Collection select2 = JCasUtil.select(jCas, DBProcessingMetaData.class);
                    if (!select2.isEmpty()) {
                        if (select2.size() > 1) {
                            throw new AnalysisEngineProcessException(new IllegalArgumentException("There is more than one type of DBProcessingMetaData in document " + documentId));
                        }
                        this.subsetTable = ((DBProcessingMetaData) select2.stream().findAny().get()).getSubsetTable();
                    }
                }
                try {
                    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
                    XmiCasSerializer.serialize(jCas.getCas(), byteArrayOutputStream);
                    byteArrayOutputStream.close();
                    byte[] byteArray = byteArrayOutputStream.toByteArray();
                    try {
                        if (this.storeAll.booleanValue()) {
                            this.serializedCASes.get(this.effectiveDocTableName).add(new DocumentXmiData(documentId, handleDataZipping(byteArray, this.schemaDocument), 0, null));
                        } else {
                            XmiSplitterResult process = this.splitter.process(byteArray, jCas, determineNextXmiId, originalSofaIdMappings);
                            LinkedHashMap linkedHashMap = process.xmiData;
                            HashMap hashMap = new HashMap();
                            for (Map.Entry entry : linkedHashMap.entrySet()) {
                                if (((String) entry.getKey()).equals(this.docTableParamValue)) {
                                    hashMap.put(this.effectiveDocTableName, (ByteArrayOutputStream) entry.getValue());
                                } else {
                                    hashMap.put(this.annotationTableManager.convertAnnotationTypeToTableName((String) entry.getKey(), this.storeAll.booleanValue()), (ByteArrayOutputStream) entry.getValue());
                                }
                            }
                            Integer valueOf = Integer.valueOf(process.maxXmiId);
                            Map<String, String> map = process.namespaces;
                            Map map2 = process.currentSofaIdMap;
                            this.metaTableManager.manageXMINamespaces(map);
                            if (map2.isEmpty()) {
                                throw new IllegalStateException("The XmiSplitter returned an empty Sofa XMI ID map. This is a critical errors since it means that the splitter was not able to resolve the correct Sofa XMI IDs for the annotations that should be stored now.");
                            }
                            log.trace("Updating max xmi id of document {}. New max xmi id: {}", documentId, valueOf);
                            log.trace("Sofa ID map for this document: {}", map2);
                            for (String str : this.serializedCASes.keySet()) {
                                boolean equals = str.equals(this.effectiveDocTableName);
                                ByteArrayOutputStream byteArrayOutputStream2 = (ByteArrayOutputStream) hashMap.get(str);
                                if (null != byteArrayOutputStream2) {
                                    Object handleDataZipping = handleDataZipping(byteArrayOutputStream2.toByteArray(), equals ? this.schemaDocument : this.schemaAnnotation);
                                    if (this.storeBaseDocument.booleanValue() && equals) {
                                        this.serializedCASes.get(str).add(new DocumentXmiData(documentId, handleDataZipping, valueOf, map2));
                                    } else {
                                        this.serializedCASes.get(str).add(new XmiData(documentId, handleDataZipping));
                                        if (!this.storeBaseDocument.booleanValue()) {
                                            this.annotationInserter.putXmiIdMapping(documentId, valueOf);
                                        }
                                    }
                                } else if (this.updateMode.booleanValue()) {
                                    this.tablesWithoutData.get(str).add(documentId);
                                }
                            }
                            if (this.deleteObsolete.booleanValue()) {
                                Iterator<String> it = this.annotationTableManager.getObsoleteAnnotationTableNames().iterator();
                                while (it.hasNext()) {
                                    this.tablesWithoutData.get(it.next()).add(documentId);
                                }
                            }
                        }
                        this.annotationInserter.addProcessedDocumentId(documentId);
                        this.currentBatchSize++;
                        if (this.currentBatchSize % this.writeBatchSize == 0) {
                            log.trace("Document nr {} processed, filling batch nr {} of size {}, sending to database.", new Object[]{Integer.valueOf(this.currentBatchSize), Integer.valueOf(this.currentBatchSize / this.writeBatchSize), Integer.valueOf(this.writeBatchSize)});
                            batchProcessComplete();
                        }
                    } catch (IOException | XMISplitterException e2) {
                        throw new AnalysisEngineProcessException(e2);
                    }
                } catch (IOException e3) {
                    e3.printStackTrace();
                    throw new AnalysisEngineProcessException(e3);
                } catch (SAXParseException e4) {
                    log.error("Serialization error occurred, skipping this document: ", e4);
                } catch (SAXException e5) {
                    e5.printStackTrace();
                    throw new AnalysisEngineProcessException(e5);
                }
            }
        } catch (Throwable th) {
            String str2 = "<unknown>";
            try {
                str2 = JCasUtil.selectSingle(jCas, Header.class).getDocId();
            } catch (Exception e6) {
            }
            log.error("Error occurred at document {}: ", str2, th);
            throw th;
        }
    }

    private DocumentId getDocumentId(JCas jCas) {
        DocumentId documentId = null;
        try {
            documentId = new DocumentId(JCasUtil.selectSingle(jCas, DBProcessingMetaData.class));
        } catch (IllegalArgumentException e) {
            log.debug("Could not find the primary key in the DBProcessingMetaData due to exception: {}. Using the document ID as primary key.", DBProcessingMetaData.class.getSimpleName());
        }
        if (documentId == null) {
            FSIterator it = jCas.getAnnotationIndex(Header.type).iterator();
            if (!it.hasNext()) {
                log.warn("Got document without a header and without DBProcessingMetaData; cannot obtain document ID. This document will not be written into the database. Document text begins with: {}", jCas.getDocumentText().substring(0, Math.min(100, jCas.getDocumentText().length())));
                this.headerlessDocuments++;
                return null;
            }
            documentId = new DocumentId(((Header) it.next()).getDocId());
        }
        return documentId;
    }

    private Map<String, Integer> getOriginalSofaIdMappings(JCas jCas, DocumentId documentId) {
        if (this.storeAll.booleanValue()) {
            return Collections.emptyMap();
        }
        try {
            XmiMetaData selectSingle = JCasUtil.selectSingle(jCas, XmiMetaData.class);
            if (selectSingle.getSofaIdMappings() == null) {
                return Collections.emptyMap();
            }
            Map<String, Integer> map = (Map) Stream.of((Object[]) selectSingle.getSofaIdMappings().toArray()).map(str -> {
                return str.split("=");
            }).collect(Collectors.toMap(strArr -> {
                return strArr[1];
            }, strArr2 -> {
                return Integer.valueOf(Integer.parseInt(strArr2[0]));
            }));
            log.trace("Got Sofa XMI map from the CAS: {} for document {}", map, documentId);
            return map;
        } catch (IllegalArgumentException e) {
            return Collections.emptyMap();
        }
    }

    private int determineNextXmiId(JCas jCas, DocumentId documentId) throws AnalysisEngineProcessException {
        int i = 0;
        try {
            i = JCasUtil.selectSingle(jCas, XmiMetaData.class).getMaxXmiId();
        } catch (IllegalArgumentException e) {
            if (!this.storeBaseDocument.booleanValue() && !this.storeAll.booleanValue()) {
                throw new AnalysisEngineProcessException(new NullPointerException("Error: Could not find the max XMI ID in the CAS. Explanation: The option to store the base document (i.e. the document and possible same basic document meta data annotations) is set to false. Thus, it is assumed that the XMI DB Reader was used to read an existing base document and that only annotation data should be written now. In this case, the current maximum XMI ID for the respective document is required to be found in the CAS to keep this XMI ID unique for each annotation. This information is written into the CAS by the XMI DB Reader, if the respective configuration parameter is set to true. This seems not to be the case since the max XMI ID could not be found. Make sure that the reader adds the max XMI IDto the CAS and run the pipeline again."));
            }
        }
        if (this.storeAll.booleanValue() || this.storeBaseDocument.booleanValue() || this.annotationsToStore.isEmpty()) {
            i = 0;
            log.trace("Counting XMI IDs from 0 for document {} since the whole document is stored or the base document is stored or no additional annotations are stored.", documentId);
        } else {
            log.trace("Counting XMI IDs from {} for document {}.", Integer.valueOf(i), documentId);
            if (i == 0) {
                log.warn("XMI IDs are counted from 0 for document {}. This is most probably a mistake since annotations should be stored but not the base document. In the base document are always some annotation elements with XMI IDs so those IDs will most probably already be taken and should not be assigned to new annotations.", documentId);
            }
        }
        return i;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v27, types: [byte[]] */
    protected Object handleDataZipping(byte[] bArr, String str) throws IOException {
        String str2;
        Map map = (Map) this.dbc.getFieldConfiguration(str).getFields().get(1);
        String str3 = (String) map.get("type");
        if (this.doGzip.booleanValue()) {
            if (!str3.equalsIgnoreCase("bytea")) {
                log.warn("The table schema \"" + str + "\" specifies the data type \"" + str3 + "\" for the field \"" + ((String) map.get("name")) + "\" which is supposed to be filled with gzipped XMI data. However, binary data should go to a field of type bytea.");
            }
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
            GZIPOutputStream gZIPOutputStream = new GZIPOutputStream(byteArrayOutputStream);
            gZIPOutputStream.write(bArr);
            gZIPOutputStream.close();
            str2 = byteArrayOutputStream.toByteArray();
        } else {
            if (!str3.equalsIgnoreCase("text") && !str3.equalsIgnoreCase("xml")) {
                log.warn("The table schema \"" + str + "\" specifies the data type \"" + str3 + "\" for the field \"" + ((String) map.get("name")) + "\" and the contents to be written should be XML. Please use the field type xml or text for such contents.");
            }
            str2 = new String(bArr, "UTF-8");
        }
        return str2;
    }

    public void batchProcessComplete() throws AnalysisEngineProcessException {
        super.batchProcessComplete();
        log.debug("Running batchProcessComplete.");
        try {
            this.annotationInserter.sendXmiDataToDatabase(this.serializedCASes, this.tablesWithoutData, this.subsetTable);
            Iterator<List<XmiData>> it = this.serializedCASes.values().iterator();
            while (it.hasNext()) {
                it.next().clear();
            }
            Iterator<List<DocumentId>> it2 = this.tablesWithoutData.values().iterator();
            while (it2.hasNext()) {
                it2.next().clear();
            }
        } catch (XmiDataInsertionException e) {
            throw new AnalysisEngineProcessException(e);
        }
    }

    public void collectionProcessComplete() throws AnalysisEngineProcessException {
        super.collectionProcessComplete();
        log.debug("Running collectionProcessComplete.");
        try {
            this.annotationInserter.sendXmiDataToDatabase(this.serializedCASes, this.tablesWithoutData, this.subsetTable);
            Iterator<List<XmiData>> it = this.serializedCASes.values().iterator();
            while (it.hasNext()) {
                it.next().clear();
            }
            Iterator<List<DocumentId>> it2 = this.tablesWithoutData.values().iterator();
            while (it2.hasNext()) {
                it2.next().clear();
            }
            log.info("{} documents without a head occured overall. Those could not be written into the database.", Integer.valueOf(this.headerlessDocuments));
            this.dbc.close();
        } catch (XmiDataInsertionException e) {
            throw new AnalysisEngineProcessException(e);
        }
    }
}
