package de.julielab.jcore.reader.dta;

import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableMap;
import com.ximpleware.ParseException;
import com.ximpleware.VTDNav;
import de.julielab.jcore.reader.dta.mapping.MappingService;
import de.julielab.jcore.types.Lemma;
import de.julielab.jcore.types.STTSPOSTag;
import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.types.Token;
import de.julielab.jcore.types.extensions.dta.Header;
import de.julielab.jcore.types.extensions.dta.PersonInfo;
import de.julielab.xml.JulieXMLTools;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.cas.StringArray;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/julielab/jcore/reader/dta/DTAFileReader.class */
public class DTAFileReader extends CollectionReader_ImplBase {
    public static final String PARAM_INPUTFILE = "inputFile";
    public static final String PARAM_NORMALIZE = "normalize";
    private static final String XPATH_TEXT_CORPUS = "/D-Spin/TextCorpus/";
    private static final String XPATH_TEI_HEADER = "/D-Spin/MetaData/source/CMD/Components/teiHeader/";
    private static final String XPATH_PROFILE_DESC = "/D-Spin/MetaData/source/CMD/Components/teiHeader/profileDesc/";
    private static final String XPATH_TITLE_STMT = "/D-Spin/MetaData/source/CMD/Components/teiHeader/fileDesc/titleStmt/";
    static final String XPATH_PUBLICATION_STMT = "/D-Spin/MetaData/source/CMD/Components/teiHeader/fileDesc/sourceDesc/biblFull/publicationStmt/";
    private static final String XPATH_YEAR = "/D-Spin/MetaData/source/CMD/Components/teiHeader/fileDesc/sourceDesc/biblFull/publicationStmt/date";

    @ConfigurationParameter(name = PARAM_INPUTFILE)
    private String inputFile;

    @ConfigurationParameter(name = PARAM_NORMALIZE)
    private boolean normalize;
    private final List<File> inputFiles = new ArrayList();
    private int counter = 0;
    static final String COMPONENT_ID = DTAFileReader.class.getCanonicalName();
    private static final Logger LOGGER = LoggerFactory.getLogger(DTAFileReader.class);
    private static final Joiner NEW_LINE_JOINER = Joiner.on("\n");

    static boolean formatIsOk(String str, VTDNav vTDNav) {
        Iterator<String> it = mapAttribute2Text(str, vTDNav, "/D-Spin/TextCorpus/POStags", "@tagset").keySet().iterator();
        while (it.hasNext()) {
            if (!it.next().equals("stts")) {
                return false;
            }
        }
        for (String[] strArr : mapAttribute2Text(str, vTDNav, "/D-Spin/MetaData/source/CMD/Components/teiHeader/profileDesc/langUsage/language", ".").values()) {
            if (strArr.length != 1 || !strArr[0].equals("German")) {
                return false;
            }
        }
        return true;
    }

    static Iterable<String> getAttributeForEach(final String str, final VTDNav vTDNav, final String str2, final String str3) {
        return new Iterable<String>() { // from class: de.julielab.jcore.reader.dta.DTAFileReader.1
            @Override // java.lang.Iterable
            public Iterator<String> iterator() {
                return new Iterator<String>() { // from class: de.julielab.jcore.reader.dta.DTAFileReader.1.1
                    final String text = "text";
                    final String attribute = "attribute";
                    final List<Map<String, String>> fields = new ArrayList();
                    final Iterator<Map<String, Object>> tokenIterator;

                    {
                        this.fields.add(ImmutableMap.of("name", "text", "xpath", "."));
                        this.fields.add(ImmutableMap.of("name", "attribute", "xpath", str3));
                        this.tokenIterator = JulieXMLTools.constructRowIterator(vTDNav, str2, this.fields, str);
                    }

                    @Override // java.util.Iterator
                    public boolean hasNext() {
                        return this.tokenIterator.hasNext();
                    }

                    /* JADX WARN: Can't rename method to resolve collision */
                    @Override // java.util.Iterator
                    public String next() {
                        return (String) this.tokenIterator.next().get("attribute");
                    }

                    @Override // java.util.Iterator
                    public void remove() {
                        throw new IllegalAccessError();
                    }
                };
            }
        };
    }

    private static String getEntry(String str, String str2, Map<String, String[]> map) {
        return getEntry(str, str2, map, null);
    }

    private static String getEntry(String str, String str2, Map<String, String[]> map, Map<String, String[]> map2) {
        String[] strArr;
        if (map2 != null && (strArr = map2.get(str2)) != null) {
            if (strArr.length != 1) {
                throw new IllegalArgumentException("ID \"" + str2 + "\" has not exactly one entry in " + str);
            }
            return strArr[0];
        }
        String[] strArr2 = map.get(str2);
        if (strArr2 == null) {
            throw new IllegalArgumentException("ID \"" + str2 + "\" has no associated entry in " + str);
        }
        if (strArr2.length != 1) {
            throw new IllegalArgumentException("ID \"" + str2 + "\" has not exactly one entry in " + str);
        }
        return strArr2[0];
    }

    static FSArray getPersons(JCas jCas, VTDNav vTDNav, String str, PersonType personType) {
        ArrayList arrayList = new ArrayList();
        String str2 = XPATH_TITLE_STMT + personType + "/persName";
        ArrayList arrayList2 = new ArrayList();
        arrayList2.add(ImmutableMap.of("name", "surname", "xpath", "surname"));
        arrayList2.add(ImmutableMap.of("name", "forename", "xpath", "forename"));
        arrayList2.add(ImmutableMap.of("name", "idno", "xpath", "idno/idno[@type='PND']"));
        Iterator constructRowIterator = JulieXMLTools.constructRowIterator(vTDNav, str2, arrayList2, str);
        while (constructRowIterator.hasNext()) {
            PersonInfo personInfo = new PersonInfo(jCas);
            Map map = (Map) constructRowIterator.next();
            personInfo.setSurename((String) map.get("surname"));
            personInfo.setForename((String) map.get("forename"));
            personInfo.setIdno((String) map.get("idno"));
            personInfo.addToIndexes();
            arrayList.add(personInfo);
        }
        FSArray fSArray = new FSArray(jCas, arrayList.size());
        fSArray.copyFromArray((FeatureStructure[]) arrayList.toArray(new PersonInfo[arrayList.size()]), 0, 0, arrayList.size());
        fSArray.addToIndexes();
        return fSArray;
    }

    static List<String> getTexts(String str, VTDNav vTDNav, String str2) {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        arrayList2.add(ImmutableMap.of("name", "text", "xpath", "."));
        Iterator constructRowIterator = JulieXMLTools.constructRowIterator(vTDNav, str2, arrayList2, str);
        while (constructRowIterator.hasNext()) {
            arrayList.add((String) ((Map) constructRowIterator.next()).get("text"));
        }
        return arrayList;
    }

    static Map<String, String[]> mapAttribute2Text(String str, VTDNav vTDNav, String str2, String str3) {
        HashMap hashMap = new HashMap();
        ArrayList arrayList = new ArrayList();
        arrayList.add(ImmutableMap.of("name", "text", "xpath", "."));
        arrayList.add(ImmutableMap.of("name", "attribute", "xpath", str3));
        Iterator constructRowIterator = JulieXMLTools.constructRowIterator(vTDNav, str2, arrayList, str);
        while (constructRowIterator.hasNext()) {
            Map map = (Map) constructRowIterator.next();
            String str4 = (String) map.get("attribute");
            if (hashMap.containsKey(str4)) {
                String[] strArr = (String[]) hashMap.get(str4);
                String[] strArr2 = (String[]) Arrays.copyOf(strArr, strArr.length + 1);
                strArr2[strArr.length] = (String) map.get("text");
                hashMap.put(str4, strArr2);
            } else {
                hashMap.put(str4, new String[]{(String) map.get("text")});
            }
        }
        return hashMap;
    }

    static void readDocument(JCas jCas, VTDNav vTDNav, String str, boolean z) throws ParseException, IOException {
        Map<String, String[]> mapAttribute2Text = mapAttribute2Text(str, vTDNav, "/D-Spin/TextCorpus/tokens/token", "@ID");
        Map<String, String[]> mapAttribute2Text2 = mapAttribute2Text(str, vTDNav, "/D-Spin/TextCorpus/lemmas/lemma", "@tokenIDs");
        Map<String, String[]> mapAttribute2Text3 = mapAttribute2Text(str, vTDNav, "/D-Spin/TextCorpus/POStags/tag", "@tokenIDs");
        Map<String, String[]> mapAttribute2Text4 = z ? mapAttribute2Text(str, vTDNav, "/D-Spin/TextCorpus/orthography/correction[@operation='replace']", "@tokenIDs") : null;
        StringBuilder sb = new StringBuilder();
        int i = 0;
        Iterator<String> it = getAttributeForEach(str, vTDNav, "/D-Spin/TextCorpus/sentences/sentence", "@tokenIDs").iterator();
        while (it.hasNext()) {
            boolean z2 = true;
            for (String str2 : it.next().split(" ")) {
                String entry = getEntry(str, str2, mapAttribute2Text, mapAttribute2Text4);
                if (entry.length() != 0) {
                    String entry2 = getEntry(str, str2, mapAttribute2Text3);
                    String entry3 = getEntry(str, str2, mapAttribute2Text2);
                    if (z2) {
                        z2 = false;
                    } else if (!entry2.equals("$,") && !entry2.equals("$.")) {
                        sb.append(" ");
                    }
                    int length = sb.length();
                    sb.append(entry);
                    int length2 = sb.length();
                    Token token = new Token(jCas, length, length2);
                    token.setComponentId(COMPONENT_ID);
                    Lemma lemma = new Lemma(jCas, length, length2);
                    lemma.setValue(entry3);
                    lemma.addToIndexes();
                    token.setLemma(lemma);
                    STTSPOSTag sTTSPOSTag = new STTSPOSTag(jCas, length, length2);
                    sTTSPOSTag.setValue(entry2);
                    sTTSPOSTag.setBegin(length);
                    sTTSPOSTag.setEnd(length2);
                    sTTSPOSTag.setComponentId(COMPONENT_ID);
                    sTTSPOSTag.addToIndexes();
                    FSArray fSArray = new FSArray(jCas, 1);
                    fSArray.set(0, sTTSPOSTag);
                    token.setPosTag(fSArray);
                    token.addToIndexes();
                }
            }
            Sentence sentence = new Sentence(jCas, i, sb.length());
            sentence.setComponentId(COMPONENT_ID);
            sentence.addToIndexes();
            sb.append("\n");
            i = sb.length();
        }
        jCas.setDocumentText(sb.subSequence(0, sb.length() - 1).toString());
    }

    static void readHeader(JCas jCas, VTDNav vTDNav, String str) throws NoSuchMethodException, SecurityException, InstantiationException, IllegalAccessException, IllegalArgumentException, InvocationTargetException {
        Header header = new Header(jCas);
        Map<String, String[]> mapAttribute2Text = mapAttribute2Text(str, vTDNav, "/D-Spin/MetaData/source/CMD/Components/teiHeader/fileDesc/titleStmt/title", "@type");
        header.setTitle(getEntry(str, "main", mapAttribute2Text));
        String[] strArr = mapAttribute2Text.get("sub");
        if (strArr != null) {
            header.setSubtitle(NEW_LINE_JOINER.join(strArr));
        }
        boolean z = false;
        for (String str2 : getAttributeForEach(str, vTDNav, "/D-Spin/MetaData/source/CMD/Components/teiHeader/fileDesc/titleStmt/title[@type='volume']", "@n")) {
            if (z) {
                throw new IllegalArgumentException(str + " has more than one volume!");
            }
            header.setVolume(str2);
            z = true;
        }
        header.setAuthors(getPersons(jCas, vTDNav, str, PersonType.author));
        header.setEditors(getPersons(jCas, vTDNav, str, PersonType.editor));
        Map<String, String[]> mapAttribute2Text2 = mapAttribute2Text(str, vTDNav, "/D-Spin/MetaData/source/CMD/Components/teiHeader/profileDesc/textClass/classCode", "@scheme");
        header.setIsCoreCorpus(MappingService.isCoreCorpus(mapAttribute2Text2));
        FSArray classifications = MappingService.getClassifications(jCas, str, mapAttribute2Text2);
        if (classifications == null) {
            throw new IllegalArgumentException(str + " missing classification!");
        }
        header.setClassifications(classifications);
        Map<String, String[]> mapAttribute2Text3 = mapAttribute2Text(str, vTDNav, XPATH_YEAR, "@type");
        String[] strArr2 = null;
        if (mapAttribute2Text3.containsKey("creation")) {
            strArr2 = mapAttribute2Text3.get("creation");
        } else if (mapAttribute2Text3.containsKey("publication")) {
            strArr2 = mapAttribute2Text3.get("publication");
        }
        if (strArr2 == null) {
            throw new IllegalArgumentException(str + " has no creation/publication year!");
        }
        if (strArr2.length > 1) {
            throw new IllegalArgumentException(str + " has multiple creation/publication years!");
        }
        header.setYear(strArr2[0]);
        List<String> texts = getTexts(str, vTDNav, "/D-Spin/MetaData/source/CMD/Components/teiHeader/fileDesc/sourceDesc/biblFull/publicationStmt/pubPlace");
        StringArray stringArray = new StringArray(jCas, texts.size());
        for (int i = 0; i < texts.size(); i++) {
            stringArray.set(i, texts.get(i));
        }
        stringArray.addToIndexes();
        header.setPublicationPlaces(stringArray);
        List<String> texts2 = getTexts(str, vTDNav, "/D-Spin/MetaData/source/CMD/Components/teiHeader/fileDesc/sourceDesc/biblFull/publicationStmt/publisher/name");
        StringArray stringArray2 = new StringArray(jCas, texts2.size());
        for (int i2 = 0; i2 < texts2.size(); i2++) {
            stringArray2.set(i2, texts2.get(i2));
        }
        stringArray2.addToIndexes();
        header.setPublishers(stringArray2);
        header.addToIndexes();
    }

    public void close() throws IOException {
    }

    public void getNext(CAS cas) throws CollectionException {
        try {
            JCas jCas = cas.getJCas();
            File file = this.inputFiles.get(this.counter);
            VTDNav vTDNav = JulieXMLTools.getVTDNav(new FileInputStream(file), 1024);
            String canonicalPath = file.getCanonicalPath();
            LOGGER.info("Reading file:" + this.counter + " - " + canonicalPath);
            if (formatIsOk(canonicalPath, vTDNav)) {
                readDocument(jCas, vTDNav, canonicalPath, this.normalize);
                readHeader(jCas, vTDNav, canonicalPath);
                LOGGER.info("Read file:" + this.counter + " - " + canonicalPath);
            } else {
                LOGGER.info("Skipping file:" + this.counter + " - " + canonicalPath);
            }
            this.counter++;
        } catch (Exception e) {
            throw new CollectionException(e);
        }
    }

    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.counter, this.inputFiles.size(), "entities")};
    }

    public boolean hasNext() throws IOException, CollectionException {
        return this.counter < this.inputFiles.size();
    }

    public void initialize() throws ResourceInitializationException {
        String str = (String) getConfigParameterValue(PARAM_INPUTFILE);
        Object configParameterValue = getConfigParameterValue(PARAM_NORMALIZE);
        if (configParameterValue != null) {
            this.normalize = ((Boolean) configParameterValue).booleanValue();
        }
        this.normalize = true;
        File file = new File(str);
        if (!file.exists()) {
            throw new IllegalArgumentException(str + " does not exist!");
        }
        if (file.isFile() && file.getName().endsWith(".tcf.xml")) {
            this.inputFiles.add(file);
        } else {
            File[] listFiles = file.listFiles();
            if (listFiles == null) {
                throw new IllegalArgumentException("Unsure if " + str + " is a directroy...");
            }
            for (File file2 : listFiles) {
                if (file2.isFile() && file2.getName().endsWith(".tcf.xml")) {
                    this.inputFiles.add(file2);
                }
            }
        }
        LOGGER.info("Input contains " + this.inputFiles.size() + " xml files.");
    }
}
