package de.unistuttgart.quadrama.io.core;

import de.unistuttgart.ims.drama.api.Drama;
import java.io.File;
import java.io.FileReader;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.IOUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;
import org.apache.uima.util.Progress;

/* loaded from: input_file:de/unistuttgart/quadrama/io/core/AbstractDramaUrlReader.class */
public abstract class AbstractDramaUrlReader extends JCasCollectionReader_ImplBase {
    public static final String PARAM_INPUT = "Input";
    public static final String PARAM_LANGUAGE = "Language";
    public static final String PARAM_CLEANUP = "Cleanup";
    public static final String PARAM_ID_PREFIX = "Id Prefix";

    @ConfigurationParameter(name = PARAM_ID_PREFIX, mandatory = false, defaultValue = {""})
    String idPrefix;
    static final String idSeparator = ":";

    @ConfigurationParameter(name = PARAM_INPUT, mandatory = false)
    String input = null;

    @ConfigurationParameter(name = PARAM_LANGUAGE, mandatory = false, defaultValue = {"de"})
    String language = "de";

    @ConfigurationParameter(name = PARAM_CLEANUP, mandatory = false)
    boolean cleanUp = false;
    List<URL> urls = new LinkedList();
    int currentUrlIndex = 0;

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        File file = new File(this.input);
        if (file.isDirectory()) {
            try {
                for (File file2 : file.listFiles(new FilenameFilter() { // from class: de.unistuttgart.quadrama.io.core.AbstractDramaUrlReader.1
                    @Override // java.io.FilenameFilter
                    public boolean accept(File file3, String str) {
                        return str.endsWith(".xml") || str.endsWith(".tei");
                    }
                })) {
                    this.urls.add(file2.toURI().toURL());
                }
                return;
            } catch (Exception e) {
                throw new ResourceInitializationException(e);
            }
        }
        if (this.input.endsWith(".xml") || this.input.endsWith(".tei")) {
            try {
                this.urls.add(file.toURI().toURL());
                return;
            } catch (MalformedURLException e2) {
                e2.printStackTrace();
                return;
            }
        }
        CSVParser cSVParser = null;
        try {
            try {
                cSVParser = new CSVParser(new FileReader(file), CSVFormat.TDF);
                Iterator it = cSVParser.getRecords().iterator();
                while (it.hasNext()) {
                    String str = ((CSVRecord) it.next()).get(0);
                    if (str.startsWith("/")) {
                        this.urls.add(new File(str).toURI().toURL());
                    } else {
                        this.urls.add(new URL(str));
                    }
                }
                getLogger().log(Level.FINE, "Found " + this.urls.size() + " URLs.");
                IOUtils.closeQuietly(cSVParser);
            } catch (Exception e3) {
                throw new ResourceInitializationException(e3);
            }
        } catch (Throwable th) {
            IOUtils.closeQuietly(cSVParser);
            throw th;
        }
    }

    public boolean hasNext() throws IOException, CollectionException {
        return this.currentUrlIndex < this.urls.size();
    }

    public Progress[] getProgress() {
        return null;
    }

    public void getNext(JCas jCas) throws IOException, CollectionException {
        List<URL> list = this.urls;
        int i = this.currentUrlIndex;
        this.currentUrlIndex = i + 1;
        URL url = list.get(i);
        getLogger().debug("Processing url " + url);
        Drama drama = new Drama(jCas);
        if (this.idPrefix.isEmpty()) {
            drama.setDocumentId(String.valueOf(this.currentUrlIndex));
        } else {
            drama.setDocumentId(this.idPrefix + idSeparator + String.valueOf(this.currentUrlIndex));
        }
        drama.setDocumentUri(url.toString());
        drama.addToIndexes();
        jCas.setDocumentLanguage(this.language);
        getNext(jCas, url.openStream(), drama);
        if (this.cleanUp) {
            DramaIOUtil.cleanUp(jCas);
        }
    }

    public abstract void getNext(JCas jCas, InputStream inputStream, Drama drama) throws IOException, CollectionException;
}
