package de.julielab.jcore.consumer.txt;

import de.julielab.java.utilities.FileUtilities;
import de.julielab.jcore.types.Header;
import de.julielab.jcore.types.POSTag;
import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.types.Token;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
import java.util.zip.ZipOutputStream;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.CASRuntimeException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/julielab/jcore/consumer/txt/SentenceTokenConsumer.class */
public class SentenceTokenConsumer extends JCasAnnotator_ImplBase {
    public static final String PARAM_OUTPUT_DIR = "outDirectory";
    public static final String PARAM_DELIMITER = "delimiter";
    public static final String PARAM_LOWERCASE = "lowercase";
    public static final String PARAM_MODE = "mode";
    public static final String PARAM_GZIP = "gzip";
    public static final String PARAM_ZIP_ARCHIVE = "zipArchive";
    public static final String PARAM_ZIP_MAX_SIZE = "maxZipSize";
    public static final String PARAM_ZIP_PREFIX = "zipFilePrefix";
    private static final String DEFAULT_DELIMITER = "";
    private static final boolean DEFAULT_PARAM_POS_TAG = false;

    @ConfigurationParameter(name = PARAM_OUTPUT_DIR, description = "The directory where to write the text files to.")
    private File directory;

    @ConfigurationParameter(name = PARAM_DELIMITER, mandatory = false, description = "If this parameter is given, each token will have its part of speech tag appended where the PoS tag is delimited from the token by the string given with this parameter.")
    private String delimiter;

    @ConfigurationParameter(name = PARAM_LOWERCASE, mandatory = false, defaultValue = {"false"}, description = "If set to true, this parameter causes all written text output to be lowercased. Defaults to false.")
    private Boolean lowercase;

    @ConfigurationParameter(name = PARAM_MODE, mandatory = false, description = "Possible values: TOKEN and DOCUMENT. The first prints out tokens with one sentence per line, the second just prints out the CAS document text without changing it in any way.")
    private Mode mode;

    @ConfigurationParameter(name = PARAM_GZIP, mandatory = false, defaultValue = {"false"}, description = "If set to true, the output files are stored in the GZIP format. The .gz extension is automatically appended. Defaults to false.")
    private Boolean gzip;

    @ConfigurationParameter(name = PARAM_ZIP_ARCHIVE, mandatory = false, defaultValue = {"false"}, description = "If set to true, this parameter causes the output files to be stored in ZIP archives. The maximum size in terms of entries of each archive is given by the maxZipSize parameter and defaults to 10,000. The archive names are built using the prefix specified with the zipFilePrefix parameter followed by a serially added number and the host name.")
    private Boolean zip;

    @ConfigurationParameter(name = PARAM_ZIP_MAX_SIZE, mandatory = false, defaultValue = {"10000"}, description = "If the parameter zipArchive is set to true, ZIP archives will be written with a maximum number of entries to be specified with this paramter. Defaults to 10,000.")
    private Integer zipSize;

    @ConfigurationParameter(name = PARAM_ZIP_PREFIX, mandatory = false, defaultValue = {"TXTConsumerArchive"}, description = "Specifies the base name for ZIP archives that are created in case the zipArchive parameter is enabled.")
    private String zipFilePrefix;
    private boolean addPOSTAG;
    private OutputStream currentArchive;
    private static final Logger LOGGER = LoggerFactory.getLogger(SentenceTokenConsumer.class);
    private static final byte[] linesepBytes = System.getProperty("line.separator").getBytes(StandardCharsets.UTF_8);
    int docs = DEFAULT_PARAM_POS_TAG;
    private int archiveNumber = 1;
    private int currentArchiveSize = DEFAULT_PARAM_POS_TAG;

    /* loaded from: input_file:de/julielab/jcore/consumer/txt/SentenceTokenConsumer$Mode.class */
    private enum Mode {
        TOKEN,
        DOCUMENT
    }

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        LOGGER.info("INITIALIZING TXT Consumer ...");
        this.directory = new File((String) uimaContext.getConfigParameterValue(PARAM_OUTPUT_DIR));
        if (!this.directory.exists()) {
            this.directory.mkdir();
        }
        LOGGER.info("Writing txt files to output directory '" + this.directory + "'");
        this.delimiter = (String) uimaContext.getConfigParameterValue(PARAM_DELIMITER);
        if (this.delimiter == null) {
            this.delimiter = DEFAULT_DELIMITER;
        }
        this.lowercase = (Boolean) Optional.ofNullable(uimaContext.getConfigParameterValue(PARAM_LOWERCASE)).orElse(false);
        this.gzip = (Boolean) uimaContext.getConfigParameterValue(PARAM_GZIP);
        if (this.gzip == null) {
            this.gzip = false;
        }
        if (uimaContext.getConfigParameterValue(PARAM_DELIMITER) != null) {
            this.addPOSTAG = true;
            LOGGER.info("Adding POSTags ...");
        } else {
            this.addPOSTAG = false;
        }
        this.zip = (Boolean) Optional.ofNullable(uimaContext.getConfigParameterValue(PARAM_ZIP_ARCHIVE)).orElse(false);
        this.zipSize = (Integer) Optional.ofNullable(uimaContext.getConfigParameterValue(PARAM_ZIP_MAX_SIZE)).orElse(10000);
        this.zipFilePrefix = (String) Optional.ofNullable(uimaContext.getConfigParameterValue(PARAM_ZIP_PREFIX)).orElse("TXTConsumerArchive");
        String str = (String) uimaContext.getConfigParameterValue(PARAM_MODE);
        if (str == null) {
            str = Mode.TOKEN.name();
        }
        this.mode = Mode.valueOf(str);
    }

    private OutputStream createNextArchiveStream() throws IOException {
        this.currentArchive = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(new File(this.directory.getCanonicalPath() + File.separator + this.zipFilePrefix + this.archiveNumber + "-" + getHostName() + "-" + Thread.currentThread().getName() + ".zip"))));
        this.archiveNumber++;
        this.currentArchiveSize = DEFAULT_PARAM_POS_TAG;
        return this.currentArchive;
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        LOGGER.trace("Processing next document ... ");
        try {
            String docID = getDocID(jCas);
            if (docID == null) {
                int i = this.docs;
                this.docs = i + 1;
                docID = new Integer(i).toString();
            }
            if (this.mode == Mode.TOKEN) {
                FSIterator it = jCas.getAnnotationIndex(Sentence.type).iterator();
                AnnotationIndex annotationIndex = jCas.getAnnotationIndex(Token.type);
                ArrayList arrayList = new ArrayList();
                while (it.hasNext()) {
                    FSIterator subiterator = annotationIndex.subiterator((Sentence) it.next());
                    String str = DEFAULT_DELIMITER;
                    while (subiterator.hasNext()) {
                        str = this.addPOSTAG ? returnWithPOSTAG(subiterator, str) : returnWithoutPOSTAG(subiterator, str);
                    }
                    arrayList.add(str);
                }
                writeSentences2File(docID, arrayList);
            } else if (this.mode == Mode.DOCUMENT) {
                LOGGER.trace("Writing the verbatim CAS document text to {}", new File(this.directory.getCanonicalPath() + File.separator + docID + ".txt" + (this.gzip.booleanValue() ? ".gz" : DEFAULT_DELIMITER)));
                writeSentences2File(docID, Arrays.asList(jCas.getDocumentText()));
            }
        } catch (CASRuntimeException | CASException | IOException e) {
            throw new AnalysisEngineProcessException(e);
        }
    }

    private String returnWithoutPOSTAG(FSIterator fSIterator, String str) {
        String coveredText = ((Token) fSIterator.next()).getCoveredText();
        return str.equals(DEFAULT_DELIMITER) ? coveredText : str + " " + coveredText;
    }

    private String returnWithPOSTAG(FSIterator fSIterator, String str) {
        Token token = (Token) fSIterator.next();
        String coveredText = token.getCoveredText();
        POSTag pOSTag = DEFAULT_PARAM_POS_TAG;
        FSArray posTag = token.getPosTag();
        if (posTag != null && posTag.size() > 0) {
            pOSTag = (POSTag) posTag.get(DEFAULT_PARAM_POS_TAG);
        }
        String value = pOSTag.getValue();
        return str.equals(DEFAULT_DELIMITER) ? coveredText + this.delimiter + value : str + " " + coveredText + this.delimiter + value;
    }

    public String getDocID(JCas jCas) throws CASException {
        String str = DEFAULT_PARAM_POS_TAG;
        FSIterator it = jCas.getJFSIndexRepository().getAnnotationIndex(Header.type).iterator();
        while (it.hasNext()) {
            str = ((Header) it.next()).getDocId();
        }
        return str;
    }

    public void collectionProcessComplete() throws AnalysisEngineProcessException {
        if (this.currentArchive != null) {
            try {
                this.currentArchive.close();
            } catch (IOException e) {
                throw new AnalysisEngineProcessException();
            }
        }
    }

    private void writeSentences2File(String str, List<String> list) throws IOException {
        OutputStream outputStream = DEFAULT_PARAM_POS_TAG;
        boolean z = DEFAULT_PARAM_POS_TAG;
        try {
            File file = new File(this.directory.getCanonicalPath() + File.separator + str + ".txt" + (this.gzip.booleanValue() ? ".gz" : DEFAULT_DELIMITER));
            outputStream = this.zip.booleanValue() ? this.currentArchive : FileUtilities.getOutputStreamToFile(file);
            if (this.zip.booleanValue()) {
                if (outputStream == null) {
                    outputStream = createNextArchiveStream();
                }
                try {
                    ((ZipOutputStream) outputStream).putNextEntry(new ZipEntry(file.getName()));
                    z = true;
                } catch (ZipException e) {
                    if (!e.getMessage().contains("duplicate")) {
                        throw e;
                    }
                    LOGGER.warn("The file {} is already present in the current ZIP archive. Thus, the current file is omitted.", file.getName());
                }
            }
            if (!this.zip.booleanValue() || z) {
                for (String str2 : list) {
                    byte[] bytes = this.lowercase.booleanValue() ? str2.toLowerCase().getBytes(StandardCharsets.UTF_8) : str2.getBytes(StandardCharsets.UTF_8);
                    outputStream.write(bytes, DEFAULT_PARAM_POS_TAG, bytes.length);
                    outputStream.write(linesepBytes, DEFAULT_PARAM_POS_TAG, linesepBytes.length);
                }
            }
        } finally {
            if (z) {
                ((ZipOutputStream) outputStream).closeEntry();
                this.currentArchiveSize++;
                if (this.currentArchiveSize >= this.zipSize.intValue()) {
                    outputStream.close();
                    createNextArchiveStream();
                }
            } else if (!this.zip.booleanValue()) {
                outputStream.close();
            }
        }
    }

    private String getHostName() {
        try {
            return InetAddress.getLocalHost().getHostName();
        } catch (UnknownHostException e) {
            throw new IllegalStateException(e);
        }
    }
}
