package de.julielab.jcore.reader.xml;

import de.julielab.jcore.types.casmultiplier.JCoReURI;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
import java.nio.file.FileVisitOption;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.Iterator;
import java.util.Optional;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.ducc.Workitem;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name = "JCoRe XML Multiplier Reader", description = "Reads Medline/PubMed XML blobs as downloaded from the NCBI FTP. Each blob is one large XML file containing a PubmedArticleSet. This component is UIMA DUCC compatible and forwards the work item CAS to the CAS consumer in order to indicate the finishing of the current XML blob. It also sets the work item feature 'lastBlock' to true if there are not more work items and, thus, the processing comes to an end.")
/* loaded from: input_file:de/julielab/jcore/reader/xml/XMLMultiplierReader.class */
public class XMLMultiplierReader extends CollectionReader_ImplBase {
    public static final String PARAM_INPUT_DIR = "InputDirectory";
    public static final String PARAM_INPUT_FILE = "InputFile";
    public static final String PARAM_FILE_NAME_REGEX = "FileNameRegex";
    public static final String PARAM_SEARCH_IN_ZIP = "SearchInZipFiles";
    public static final String PARAM_SEND_CAS_TO_LAST = "SendCasToLast";
    private static Logger LOGGER = LoggerFactory.getLogger(XMLMultiplierReader.class);
    private Deque<URI> inputUris;

    @ConfigurationParameter(name = "InputDirectory", mandatory = false)
    private String directoryName;

    @ConfigurationParameter(name = "InputFile", mandatory = false)
    private String isSingleFileProcessing;

    @ConfigurationParameter(name = PARAM_SEARCH_IN_ZIP, mandatory = false, description = "If set to true, contents of ZIP files in the given input directory will also be searched for files matching the specified file name regular expression. Defaults to false.", defaultValue = {"false"})
    private boolean searchZip;

    @ConfigurationParameter(name = PARAM_SEND_CAS_TO_LAST, mandatory = false, defaultValue = {"false"}, description = "UIMA DUCC relevant parameter when using a CAS multiplier. When set to true, the worker CAS from the collection reader is forwarded to the last component in the pipeline. This can be used to send information about the progress to the CAS consumer in order to have it perform batch operations. For this purpose, a feature structure of type WorkItem from the DUCC library is added to the worker CAS. This feature structure has information about the current progress.")
    private boolean sendCasToLast;
    private int currentIndex = 0;

    @ConfigurationParameter(name = PARAM_FILE_NAME_REGEX, description = "If a directory is given, all inputUris with a name matching one of these regular expressions will be read, others will be discarded. Defaults to {'.*\\.xml', '.*\\.xml.gz'}.", defaultValue = {".*\\.xml", ".*\\xml\\.gz"})
    private String[] fileNameRegex = {".*\\.xml", ".*\\.xml.gz"};

    public void initialize() throws ResourceInitializationException {
        try {
            if (LOGGER.isInfoEnabled()) {
                LOGGER.info("Component configuration:");
                for (String str : getUimaContext().getConfigParameterNames()) {
                    LOGGER.info("{}: {}", str, getConfigParameterValue(str));
                }
            }
            this.sendCasToLast = ((Boolean) Optional.ofNullable(getConfigParameterValue(PARAM_SEND_CAS_TO_LAST)).orElse(false)).booleanValue();
            getInputFiles();
        } catch (Throwable th) {
            LOGGER.error("Exception or error while initializing reader: ", th);
            throw th;
        }
    }

    public void getNext(CAS cas) throws CollectionException {
        try {
            URI removeFirst = this.inputUris.removeFirst();
            LOGGER.debug("Reading URI " + removeFirst.toString());
            try {
                JCoReURI jCoReURI = new JCoReURI(cas.getJCas());
                jCoReURI.setUri(removeFirst.toString());
                jCoReURI.addToIndexes();
                if (this.sendCasToLast) {
                    Workitem workitem = new Workitem(cas.getJCas());
                    workitem.setSendToLast(true);
                    workitem.setBlockindex(this.currentIndex);
                    if (!hasNext()) {
                        workitem.setLastBlock(true);
                    }
                    workitem.addToIndexes();
                }
                this.currentIndex++;
            } catch (Exception e) {
                LOGGER.error("Exception with URI: " + removeFirst.toString(), e);
                throw new CollectionException(e);
            }
        } catch (CASException e2) {
            LOGGER.error("Could not get the JCAS from the CAS: ", e2);
            throw new CollectionException(e2);
        } catch (Throwable th) {
            LOGGER.warn("Exception or error while filling CAS: ", th);
            throw th;
        }
    }

    private void getInputFiles() throws ResourceInitializationException {
        this.inputUris = new ArrayDeque();
        this.currentIndex = 0;
        if (isSingleProcessing()) {
            getSingleFile();
            return;
        }
        this.directoryName = (String) getConfigParameterValue("InputDirectory");
        if (this.directoryName == null) {
            throw new ResourceInitializationException("resource_data_not_valid", new Object[]{"null", "InputDirectory"});
        }
        if (getConfigParameterValue(PARAM_FILE_NAME_REGEX) != null) {
            this.fileNameRegex = (String[]) getConfigParameterValue(PARAM_FILE_NAME_REGEX);
        }
        this.searchZip = ((Boolean) Optional.ofNullable((Boolean) getConfigParameterValue(PARAM_SEARCH_IN_ZIP)).orElse(false)).booleanValue();
        File file = new File(this.directoryName.trim());
        if (!file.exists()) {
            throw new ResourceInitializationException(new FileNotFoundException("The directory " + file.getAbsolutePath() + " does not exist."));
        }
        if (!file.isDirectory()) {
            throw new ResourceInitializationException(new IllegalArgumentException("The file " + file.getAbsolutePath() + " is not a directory."));
        }
        for (File file2 : file.listFiles((file3, str) -> {
            return matchesFileNameRegex(str);
        })) {
            URI uri = file2.toURI();
            if (uri.toString().toLowerCase().endsWith(".zip")) {
                LOGGER.debug("Searching ZIP archive {} for eligible documents", uri);
                try {
                    FileSystem newFileSystem = FileSystems.newFileSystem(Paths.get(uri), (ClassLoader) null);
                    Throwable th = null;
                    try {
                        try {
                            Iterator<Path> it = newFileSystem.getRootDirectories().iterator();
                            while (it.hasNext()) {
                                Files.walk(it.next(), new FileVisitOption[0]).filter(path -> {
                                    return Files.isRegularFile(path, new LinkOption[0]);
                                }).forEach(path2 -> {
                                    LOGGER.trace("Current ZIP archive entry: {}", path2.toString());
                                    if (matchesFileNameRegex(path2.getFileName().toString())) {
                                        this.inputUris.push(path2.toUri());
                                    }
                                });
                            }
                            if (newFileSystem != null) {
                                if (0 != 0) {
                                    try {
                                        newFileSystem.close();
                                    } catch (Throwable th2) {
                                        th.addSuppressed(th2);
                                    }
                                } else {
                                    newFileSystem.close();
                                }
                            }
                        } finally {
                        }
                    } finally {
                    }
                } catch (IOException e) {
                    LOGGER.error("Could not read from {}", uri);
                    throw new ResourceInitializationException(e);
                }
            } else {
                this.inputUris.push(uri);
            }
        }
        LOGGER.debug("Found {} input files.", Integer.valueOf(this.inputUris.size()));
    }

    private boolean matchesFileNameRegex(String str) {
        for (String str2 : this.fileNameRegex) {
            if (str.matches(str2)) {
                return true;
            }
            if (this.searchZip && str.toLowerCase().endsWith("zip")) {
                return true;
            }
        }
        return false;
    }

    private void getSingleFile() throws ResourceInitializationException {
        LOGGER.info("XML reader is used in SINGLE FILE mode.");
        String str = (String) getConfigParameterValue("InputFile");
        if (str == null) {
            return;
        }
        File file = new File(str.trim());
        if (!file.exists() || file.isDirectory()) {
            throw new ResourceInitializationException("resource_data_not_valid", new Object[]{"file does not exist or is a directoryInputFile"});
        }
        this.inputUris.push(file.toURI());
    }

    private boolean isSingleProcessing() {
        Object configParameterValue = getConfigParameterValue("InputFile");
        if (null != configParameterValue) {
            this.isSingleFileProcessing = (String) configParameterValue;
        }
        return this.isSingleFileProcessing != null;
    }

    public boolean hasNext() {
        return !this.inputUris.isEmpty();
    }

    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.currentIndex, this.inputUris.size(), "entities")};
    }

    public void close() {
    }
}
