package de.julielab.jcore.reader.muc7;

import de.julielab.jcore.types.Paragraph;
import de.julielab.jcore.types.Section;
import de.julielab.jcore.types.muc7.Coref;
import de.julielab.jcore.types.muc7.ENAMEX;
import de.julielab.jcore.types.muc7.MUC7Header;
import de.julielab.jcore.types.muc7.NUMEX;
import de.julielab.jcore.types.muc7.TIMEX;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
import org.apache.uima.util.Progress;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/* loaded from: input_file:de/julielab/jcore/reader/muc7/MUC7Reader.class */
public class MUC7Reader extends CollectionReader_ImplBase {
    private static final String ELEMENT_DOCS = "DOC";
    private static final String ELEMENT_DOCID = "DOCID";
    private static final String ELEMENT_STORYID = "STORYID";
    private static final String ELEMENT_PARAGRAPH = "p";
    private static final String ELEMENT_COREF = "COREF";
    private static final String ELEMENT_TIMEX = "TIMEX";
    private static final String ELEMENT_ENAMEX = "ENAMEX";
    private static final String ELEMENT_NUMEX = "NUMEX";
    private static final String ELEMENT_NE_MIN = "MIN";
    private static final String ELEMENT_NE_TYPE = "TYPE";
    private static int startPosition;
    private static HashMap<Integer, MUC7Coreference> corefHashMap;
    private List<File> files;
    private HashMap<String, ArrayList<Node>> docIDDocNodeHash;
    private Iterator<String> keyIter;
    private JCas jcas;
    private DocumentBuilder builder;
    public static final String PARAM_INPUTDIR = "InputDirectory";
    private static Logger logger = null;
    private static final String ELEMENT_SLUG = "SLUG";
    private static final String ELEMENT_DATE = "DATE";
    private static final String ELEMENT_NWORDS = "NWORDS";
    private static final String ELEMENT_PREAMBLE = "PREAMBLE";
    private static final String ELEMENT_TEXT = "TEXT";
    private static final String ELEMENT_TRAILER = "TRAILER";
    public static final String[] ELEMENT_TEXT_TO_BE_PROCESSED = {ELEMENT_SLUG, ELEMENT_DATE, ELEMENT_NWORDS, ELEMENT_PREAMBLE, ELEMENT_TEXT, ELEMENT_TRAILER};

    private HashMap<String, ArrayList<Node>> buildDocIDDocNodeHash(List<File> list) {
        HashMap<String, ArrayList<Node>> hashMap = new HashMap<>();
        for (File file : list) {
            logger.log(Level.INFO, "buildDocIDDocNodeHash() -- Reading file " + file.getName());
            try {
                NodeList elementsByTagName = this.builder.parse(file).getElementsByTagName(ELEMENT_DOCS);
                for (int i = 0; i < elementsByTagName.getLength(); i++) {
                    Node item = elementsByTagName.item(i);
                    String textContent = getChildrenNodes(item, ELEMENT_DOCID, new ArrayList<>()).get(0).getTextContent();
                    ArrayList<Node> arrayList = hashMap.containsKey(textContent) ? hashMap.get(textContent) : new ArrayList<>();
                    arrayList.add(item);
                    hashMap.put(textContent, arrayList);
                }
            } catch (IOException e) {
                e.printStackTrace();
            } catch (SAXException e2) {
                e2.printStackTrace();
            }
        }
        return hashMap;
    }

    private String annotateTextToBeProcessed(Node node) {
        String str = "";
        new ArrayList();
        ArrayList<Node> childrenNodes = getChildrenNodes(node, ELEMENT_TEXT_TO_BE_PROCESSED, new ArrayList<>());
        for (int i = 0; i < childrenNodes.size(); i++) {
            str = str + childrenNodes.get(i).getTextContent();
        }
        String normalizeString = normalizeString(str);
        this.jcas.setDocumentText(normalizeString);
        return normalizeString;
    }

    private void annotateHeader(Node node) {
        String normalizeString = normalizeString(getChildrenNodes(node, ELEMENT_DOCID, new ArrayList<>()).get(0).getTextContent());
        String normalizeString2 = normalizeString(getChildrenNodes(node, ELEMENT_STORYID, new ArrayList<>()).get(0).getTextContent());
        boolean z = false;
        FSIterator it = this.jcas.getAnnotationIndex(MUC7Header.type).iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            MUC7Header mUC7Header = (MUC7Header) it.next();
            if (mUC7Header.getDocId().equals(normalizeString) && mUC7Header.getStoryID().equals(normalizeString2)) {
                z = true;
                break;
            }
        }
        if (z) {
            return;
        }
        MUC7Header mUC7Header2 = new MUC7Header(this.jcas);
        mUC7Header2.setDocId(normalizeString);
        mUC7Header2.setStoryID(normalizeString2);
        mUC7Header2.addToIndexes(this.jcas);
    }

    private void annotateSlug(Node node, String str) {
        new ArrayList();
        int[] iArr = {0, 0};
        ArrayList<Node> childrenNodes = getChildrenNodes(node, ELEMENT_SLUG, new ArrayList<>());
        int[] beginEndOfSequence = getBeginEndOfSequence(normalizeString(childrenNodes.get(0).getTextContent()), str, startPosition);
        startPosition = beginEndOfSequence[1];
        buildCorefHashMap(childrenNodes.get(0), beginEndOfSequence);
        annotateENAMEX(childrenNodes.get(0), beginEndOfSequence);
        annotateTIMEX(childrenNodes.get(0), beginEndOfSequence);
        annotateNUMEX(childrenNodes.get(0), beginEndOfSequence);
        boolean z = false;
        FSIterator it = this.jcas.getAnnotationIndex(Section.type).iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Section section = (Section) it.next();
            if (section.getBegin() == beginEndOfSequence[0] - 1 && section.getEnd() == beginEndOfSequence[1] && section.getSectionType().equals("Slug")) {
                z = true;
                break;
            }
        }
        if (z) {
            return;
        }
        Section section2 = new Section(this.jcas);
        section2.setSectionType("Slug");
        section2.setBegin(beginEndOfSequence[0]);
        section2.setEnd(beginEndOfSequence[1]);
        section2.addToIndexes(this.jcas);
    }

    private void annotateDate(Node node, String str) {
        new ArrayList();
        int[] iArr = {0, 0};
        ArrayList<Node> childrenNodes = getChildrenNodes(node, ELEMENT_DATE, new ArrayList<>());
        int[] beginEndOfSequence = getBeginEndOfSequence(normalizeString(childrenNodes.get(0).getTextContent()), str, startPosition);
        startPosition = beginEndOfSequence[1];
        buildCorefHashMap(childrenNodes.get(0), beginEndOfSequence);
        annotateENAMEX(childrenNodes.get(0), beginEndOfSequence);
        annotateTIMEX(childrenNodes.get(0), beginEndOfSequence);
        annotateNUMEX(childrenNodes.get(0), beginEndOfSequence);
        boolean z = false;
        FSIterator it = this.jcas.getAnnotationIndex(Section.type).iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Section section = (Section) it.next();
            if (section.getBegin() == beginEndOfSequence[0] && section.getEnd() == beginEndOfSequence[1] && section.getSectionType().equals("Date")) {
                z = true;
                break;
            }
        }
        if (z) {
            return;
        }
        Section section2 = new Section(this.jcas);
        section2.setSectionType("Date");
        section2.setBegin(beginEndOfSequence[0]);
        section2.setEnd(beginEndOfSequence[1]);
        section2.addToIndexes(this.jcas);
    }

    private void annotateNumOfWords(Node node, String str) {
        new ArrayList();
        int[] iArr = {0, 0};
        ArrayList<Node> childrenNodes = getChildrenNodes(node, ELEMENT_NWORDS, new ArrayList<>());
        int[] beginEndOfSequence = getBeginEndOfSequence(normalizeString(childrenNodes.get(0).getTextContent()), str, startPosition);
        startPosition = beginEndOfSequence[1];
        buildCorefHashMap(childrenNodes.get(0), beginEndOfSequence);
        annotateENAMEX(childrenNodes.get(0), beginEndOfSequence);
        annotateTIMEX(childrenNodes.get(0), beginEndOfSequence);
        annotateNUMEX(childrenNodes.get(0), beginEndOfSequence);
        boolean z = false;
        FSIterator it = this.jcas.getAnnotationIndex(Section.type).iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Section section = (Section) it.next();
            if (section.getBegin() == beginEndOfSequence[0] && section.getEnd() == beginEndOfSequence[1] && section.getSectionType().equals("Number of Words")) {
                z = true;
                break;
            }
        }
        if (z) {
            return;
        }
        Section section2 = new Section(this.jcas);
        section2.setSectionType("Number of Words");
        section2.setBegin(beginEndOfSequence[0]);
        section2.setEnd(beginEndOfSequence[1]);
        section2.addToIndexes(this.jcas);
    }

    private void annotatePreamble(Node node, String str) {
        new ArrayList();
        int[] iArr = {0, 0};
        ArrayList<Node> childrenNodes = getChildrenNodes(node, ELEMENT_PREAMBLE, new ArrayList<>());
        int[] beginEndOfSequence = getBeginEndOfSequence(normalizeString(childrenNodes.get(0).getTextContent()), str, startPosition);
        startPosition = beginEndOfSequence[1];
        buildCorefHashMap(childrenNodes.get(0), beginEndOfSequence);
        annotateENAMEX(childrenNodes.get(0), beginEndOfSequence);
        annotateTIMEX(childrenNodes.get(0), beginEndOfSequence);
        annotateNUMEX(childrenNodes.get(0), beginEndOfSequence);
        boolean z = false;
        FSIterator it = this.jcas.getAnnotationIndex(Section.type).iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Section section = (Section) it.next();
            if (section.getBegin() == beginEndOfSequence[0] && section.getEnd() == beginEndOfSequence[1] && section.getSectionType().equals("Preamble")) {
                z = true;
                break;
            }
        }
        if (z) {
            return;
        }
        Section section2 = new Section(this.jcas);
        section2.setSectionType("Preamble");
        section2.setBegin(beginEndOfSequence[0]);
        section2.setEnd(beginEndOfSequence[1]);
        section2.addToIndexes(this.jcas);
    }

    private void annotateText(Node node, String str) {
        new ArrayList();
        int[] iArr = {0, 0};
        ArrayList<Node> childrenNodes = getChildrenNodes(node, ELEMENT_TEXT, new ArrayList<>());
        int[] beginEndOfSequence = getBeginEndOfSequence(normalizeString(childrenNodes.get(0).getTextContent()), str, startPosition);
        startPosition = beginEndOfSequence[0];
        annotateParagraphs(childrenNodes.get(0), str);
        boolean z = false;
        FSIterator it = this.jcas.getAnnotationIndex(Section.type).iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Section section = (Section) it.next();
            if (section.getBegin() == beginEndOfSequence[0] && section.getEnd() == beginEndOfSequence[1] && section.getSectionType().equals("Text")) {
                z = true;
                break;
            }
        }
        if (z) {
            return;
        }
        Section section2 = new Section(this.jcas);
        section2.setSectionType("Text");
        section2.setBegin(beginEndOfSequence[0]);
        section2.setEnd(beginEndOfSequence[1]);
        section2.addToIndexes(this.jcas);
    }

    private void annotateTrailer(Node node, String str) {
        new ArrayList();
        int[] iArr = {0, 0};
        ArrayList<Node> childrenNodes = getChildrenNodes(node, ELEMENT_TRAILER, new ArrayList<>());
        int[] beginEndOfSequence = getBeginEndOfSequence(normalizeString(childrenNodes.get(0).getTextContent()), str, startPosition);
        startPosition = beginEndOfSequence[1];
        buildCorefHashMap(childrenNodes.get(0), beginEndOfSequence);
        annotateENAMEX(childrenNodes.get(0), beginEndOfSequence);
        annotateTIMEX(childrenNodes.get(0), beginEndOfSequence);
        annotateNUMEX(childrenNodes.get(0), beginEndOfSequence);
        boolean z = false;
        FSIterator it = this.jcas.getAnnotationIndex(Section.type).iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Section section = (Section) it.next();
            if (section.getBegin() == beginEndOfSequence[0] && section.getEnd() == beginEndOfSequence[1] && section.getSectionType().equals("Trailer")) {
                z = true;
                break;
            }
        }
        if (z) {
            return;
        }
        Section section2 = new Section(this.jcas);
        section2.setSectionType("Trailer");
        section2.setBegin(beginEndOfSequence[0]);
        section2.setEnd(beginEndOfSequence[1]);
        section2.addToIndexes(this.jcas);
    }

    private void annotateParagraphs(Node node, String str) {
        new ArrayList();
        int[] iArr = {0, 0};
        ArrayList<Node> childrenNodes = getChildrenNodes(node, ELEMENT_PARAGRAPH, new ArrayList<>());
        for (int i = 0; i < childrenNodes.size(); i++) {
            int[] beginEndOfSequence = getBeginEndOfSequence(normalizeString(childrenNodes.get(i).getTextContent()), str, startPosition);
            startPosition = beginEndOfSequence[1];
            buildCorefHashMap(childrenNodes.get(i), beginEndOfSequence);
            annotateENAMEX(childrenNodes.get(i), beginEndOfSequence);
            annotateTIMEX(childrenNodes.get(i), beginEndOfSequence);
            annotateNUMEX(childrenNodes.get(i), beginEndOfSequence);
            boolean z = false;
            FSIterator it = this.jcas.getAnnotationIndex(Paragraph.type).iterator();
            while (true) {
                if (!it.hasNext()) {
                    break;
                }
                Paragraph paragraph = (Paragraph) it.next();
                if (paragraph.getBegin() == beginEndOfSequence[0] && paragraph.getEnd() == beginEndOfSequence[1]) {
                    z = true;
                    break;
                }
            }
            if (!z) {
                Paragraph paragraph2 = new Paragraph(this.jcas);
                paragraph2.setBegin(beginEndOfSequence[0]);
                paragraph2.setEnd(beginEndOfSequence[1]);
                paragraph2.addToIndexes(this.jcas);
            }
        }
    }

    private void annotateTIMEX(Node node, int[] iArr) {
        new ArrayList();
        ArrayList<Node> childrenNodes = getChildrenNodes(node, ELEMENT_TIMEX, new ArrayList<>());
        for (int i = 0; i < childrenNodes.size(); i++) {
            Node node2 = childrenNodes.get(i);
            String normalizeString = node2.getAttributes().getNamedItem(ELEMENT_NE_MIN) != null ? normalizeString(node2.getAttributes().getNamedItem(ELEMENT_NE_MIN).getNodeValue()) : "";
            String normalizeString2 = normalizeString(node2.getAttributes().getNamedItem(ELEMENT_NE_TYPE).getNodeValue());
            String normalizeString3 = normalizeString(getLeftTextContext(node2, node));
            int[] iArr2 = {0, 0};
            int[] beginEndOfSequence = getBeginEndOfSequence(normalizeString3, normalizeString(node.getTextContent()), 0);
            int[] iArr3 = {0, 0};
            int[] beginEndOfSequence2 = getBeginEndOfSequence(normalizeString(childrenNodes.get(i).getTextContent()), normalizeString(node.getTextContent()), beginEndOfSequence[1]);
            beginEndOfSequence2[0] = beginEndOfSequence2[0] + iArr[0];
            beginEndOfSequence2[1] = beginEndOfSequence2[1] + iArr[0];
            boolean z = false;
            FSIterator it = this.jcas.getAnnotationIndex(TIMEX.type).iterator();
            while (true) {
                if (!it.hasNext()) {
                    break;
                }
                TIMEX timex = (TIMEX) it.next();
                if (timex.getBegin() == beginEndOfSequence2[0] && timex.getEnd() == beginEndOfSequence2[1] && timex.getSpecificType().equals(normalizeString2) && timex.getMin().equals(normalizeString)) {
                    z = true;
                    break;
                }
            }
            if (!z) {
                TIMEX timex2 = new TIMEX(this.jcas);
                timex2.setBegin(beginEndOfSequence2[0]);
                timex2.setEnd(beginEndOfSequence2[1]);
                timex2.setSpecificType(normalizeString2);
                timex2.setMin(normalizeString);
                timex2.addToIndexes(this.jcas);
            }
        }
    }

    private void annotateENAMEX(Node node, int[] iArr) {
        new ArrayList();
        ArrayList<Node> childrenNodes = getChildrenNodes(node, ELEMENT_ENAMEX, new ArrayList<>());
        for (int i = 0; i < childrenNodes.size(); i++) {
            Node node2 = childrenNodes.get(i);
            String nodeValue = node2.getAttributes().getNamedItem(ELEMENT_NE_MIN) != null ? node2.getAttributes().getNamedItem(ELEMENT_NE_MIN).getNodeValue() : "";
            String normalizeString = normalizeString(node2.getAttributes().getNamedItem(ELEMENT_NE_TYPE).getNodeValue());
            String normalizeString2 = normalizeString(getLeftTextContext(node2, node));
            int[] iArr2 = {0, 0};
            int[] beginEndOfSequence = getBeginEndOfSequence(normalizeString2, normalizeString(node.getTextContent()), 0);
            int[] iArr3 = {0, 0};
            int[] beginEndOfSequence2 = getBeginEndOfSequence(normalizeString(childrenNodes.get(i).getTextContent()), normalizeString(node.getTextContent()), beginEndOfSequence[1]);
            beginEndOfSequence2[0] = beginEndOfSequence2[0] + iArr[0];
            beginEndOfSequence2[1] = beginEndOfSequence2[1] + iArr[0];
            boolean z = false;
            FSIterator it = this.jcas.getAnnotationIndex(ENAMEX.type).iterator();
            while (true) {
                if (!it.hasNext()) {
                    break;
                }
                ENAMEX enamex = (ENAMEX) it.next();
                if (enamex.getBegin() == beginEndOfSequence2[0] && enamex.getEnd() == beginEndOfSequence2[1] && enamex.getSpecificType().equals(normalizeString) && enamex.getMin().equals(nodeValue)) {
                    z = true;
                    break;
                }
            }
            if (!z) {
                ENAMEX enamex2 = new ENAMEX(this.jcas);
                enamex2.setBegin(beginEndOfSequence2[0]);
                enamex2.setEnd(beginEndOfSequence2[1]);
                enamex2.setSpecificType(normalizeString);
                enamex2.setMin(nodeValue);
                enamex2.addToIndexes(this.jcas);
            }
        }
    }

    private void annotateNUMEX(Node node, int[] iArr) {
        new ArrayList();
        ArrayList<Node> childrenNodes = getChildrenNodes(node, ELEMENT_NUMEX, new ArrayList<>());
        for (int i = 0; i < childrenNodes.size(); i++) {
            Node node2 = childrenNodes.get(i);
            String replaceAll = node2.getAttributes().getNamedItem(ELEMENT_NE_MIN) != null ? normalizeString(node2.getAttributes().getNamedItem(ELEMENT_NE_MIN).getNodeValue()).replaceAll("^ +", "") : "";
            String normalizeString = normalizeString(node2.getAttributes().getNamedItem(ELEMENT_NE_TYPE).getNodeValue());
            String normalizeString2 = normalizeString(getLeftTextContext(node2, node));
            int[] iArr2 = {0, 0};
            int[] beginEndOfSequence = getBeginEndOfSequence(normalizeString2, normalizeString(node.getTextContent()), 0);
            int[] iArr3 = {0, 0};
            int[] beginEndOfSequence2 = getBeginEndOfSequence(normalizeString(childrenNodes.get(i).getTextContent()), normalizeString(node.getTextContent()), beginEndOfSequence[1]);
            beginEndOfSequence2[0] = beginEndOfSequence2[0] + iArr[0];
            beginEndOfSequence2[1] = beginEndOfSequence2[1] + iArr[0];
            boolean z = false;
            FSIterator it = this.jcas.getAnnotationIndex(NUMEX.type).iterator();
            while (true) {
                if (!it.hasNext()) {
                    break;
                }
                NUMEX numex = (NUMEX) it.next();
                if (numex.getBegin() == beginEndOfSequence2[0] && numex.getEnd() == beginEndOfSequence2[1] && numex.getSpecificType().equals(normalizeString) && numex.getMin().equals(replaceAll)) {
                    z = true;
                    break;
                }
            }
            if (!z) {
                NUMEX numex2 = new NUMEX(this.jcas);
                numex2.setBegin(beginEndOfSequence2[0]);
                numex2.setEnd(beginEndOfSequence2[1]);
                numex2.setSpecificType(normalizeString);
                numex2.setMin(replaceAll);
                numex2.addToIndexes(this.jcas);
            }
        }
    }

    private void buildCorefHashMap(Node node, int[] iArr) {
        new ArrayList();
        ArrayList<Node> childrenNodes = getChildrenNodes(node, ELEMENT_COREF, new ArrayList<>());
        for (int i = 0; i < childrenNodes.size(); i++) {
            String normalizeString = normalizeString(getLeftTextContext(childrenNodes.get(i), node));
            int[] iArr2 = {0, 0};
            int[] beginEndOfSequence = getBeginEndOfSequence(normalizeString, normalizeString(node.getTextContent()), 0);
            int[] iArr3 = {0, 0};
            int[] beginEndOfSequence2 = getBeginEndOfSequence(normalizeString(childrenNodes.get(i).getTextContent()), normalizeString(node.getTextContent()), beginEndOfSequence[1]);
            beginEndOfSequence2[0] = beginEndOfSequence2[0] + iArr[0];
            beginEndOfSequence2[1] = beginEndOfSequence2[1] + iArr[0];
            MUC7Coreference mUC7Coreference = new MUC7Coreference();
            mUC7Coreference.setBegin(beginEndOfSequence2[0]);
            mUC7Coreference.setEnd(beginEndOfSequence2[1]);
            int intValue = new Integer(childrenNodes.get(i).getAttributes().getNamedItem("ID").getNodeValue()).intValue();
            mUC7Coreference.setId(intValue);
            if (childrenNodes.get(i).getAttributes().getNamedItem("REF") != null) {
                mUC7Coreference.setRefID(new Integer(childrenNodes.get(i).getAttributes().getNamedItem("REF").getNodeValue()).intValue());
            } else {
                mUC7Coreference.setRefID(-1);
            }
            if (childrenNodes.get(i).getAttributes().getNamedItem(ELEMENT_NE_TYPE) != null) {
                mUC7Coreference.setTypeOfCoref(normalizeString(childrenNodes.get(i).getAttributes().getNamedItem(ELEMENT_NE_TYPE).getNodeValue()));
            }
            if (childrenNodes.get(i).getAttributes().getNamedItem(ELEMENT_NE_MIN) != null) {
                mUC7Coreference.setMinHead(normalizeString(childrenNodes.get(i).getAttributes().getNamedItem(ELEMENT_NE_MIN).getNodeValue()));
            }
            if (!corefHashMap.containsKey(Integer.valueOf(intValue))) {
                corefHashMap.put(Integer.valueOf(intValue), mUC7Coreference);
            }
        }
    }

    private void annotateCorefs() {
        Iterator<Integer> it = corefHashMap.keySet().iterator();
        while (it.hasNext()) {
            buildCorefFromCorefHashMap(it.next().intValue());
        }
        if (corefHashMap.size() > 0) {
            buildCorefReferences();
        }
    }

    private Coref buildCorefFromCorefHashMap(int i) {
        MUC7Coreference mUC7Coreference = corefHashMap.get(Integer.valueOf(i));
        if (getCorefFromCAS(mUC7Coreference.getBegin(), mUC7Coreference.getEnd()) == null) {
            boolean z = false;
            FSIterator it = this.jcas.getAnnotationIndex(Coref.type).iterator();
            while (true) {
                if (!it.hasNext()) {
                    break;
                }
                Coref coref = (Coref) it.next();
                if (coref.getBegin() == mUC7Coreference.getBegin() && coref.getEnd() == mUC7Coreference.getEnd() && coref.getCorefType().equals(mUC7Coreference.getTypeOfCoref()) && coref.getMin().equals(mUC7Coreference.getMinHead()) && coref.getId() == mUC7Coreference.getId()) {
                    z = true;
                    break;
                }
            }
            if (!z) {
                Coref coref2 = new Coref(this.jcas);
                coref2.setBegin(mUC7Coreference.getBegin());
                coref2.setEnd(mUC7Coreference.getEnd());
                coref2.setCorefType(mUC7Coreference.getTypeOfCoref());
                coref2.setMin(mUC7Coreference.getMinHead());
                coref2.setId(mUC7Coreference.getId());
                coref2.addToIndexes(this.jcas);
                return coref2;
            }
        }
        return new Coref(this.jcas);
    }

    private void buildCorefReferences() {
        FSIterator it = this.jcas.getJFSIndexRepository().getAnnotationIndex(Coref.type).iterator();
        while (it.hasNext()) {
            Coref coref = (Coref) it.next();
            int refID = corefHashMap.get(Integer.valueOf(coref.getId())).getRefID();
            if (refID > -1 && corefHashMap.containsKey(Integer.valueOf(refID))) {
                coref.setRef(getCorefFromCAS(corefHashMap.get(Integer.valueOf(refID)).getBegin(), corefHashMap.get(Integer.valueOf(refID)).getEnd()));
            }
        }
    }

    private Coref getCorefFromCAS(int i, int i2) {
        FSIterator it = this.jcas.getJFSIndexRepository().getAnnotationIndex(Coref.type).iterator();
        while (it.hasNext()) {
            Coref coref = (Coref) it.next();
            if (coref.getBegin() == i && coref.getEnd() == i2) {
                return coref;
            }
        }
        return null;
    }

    private String getLeftTextContext(Node node, Node node2) {
        String str = "";
        ArrayList<Node> childrenNodes = getChildrenNodes(node2, "#text", new ArrayList<>());
        Node node3 = getChildrenNodes(node, "#text", new ArrayList<>()).get(0);
        for (int i = 0; i < childrenNodes.size() && !childrenNodes.get(i).equals(node3); i++) {
            str = str + childrenNodes.get(i).getTextContent();
        }
        return str;
    }

    private List<File> getFilesFromInputDirectory() {
        ArrayList arrayList = new ArrayList();
        File file = new File(((String) getConfigParameterValue(PARAM_INPUTDIR)).trim());
        if (!file.exists() || !file.isDirectory()) {
            logger.log(Level.WARNING, "getFilesFromInputDirectory() " + file + " does not exist. Client has to set configuration parameter 'InputDirectory'.");
            return null;
        }
        File[] listFiles = file.listFiles();
        for (int i = 0; i < listFiles.length; i++) {
            if (!listFiles[i].isDirectory()) {
                arrayList.add(listFiles[i]);
            }
        }
        logger.log(Level.INFO, "MUC7 Reader found " + arrayList.size() + " files in folder " + file + ".");
        return arrayList;
    }

    private ArrayList<Node> getChildrenNodes(Node node, String str, ArrayList<Node> arrayList) {
        if (node.getNodeName().equals(str) && !node.getTextContent().equals("")) {
            arrayList.add(node);
        }
        if (node.hasChildNodes()) {
            NodeList childNodes = node.getChildNodes();
            for (int i = 0; i < childNodes.getLength(); i++) {
                getChildrenNodes(childNodes.item(i), str, arrayList);
            }
        }
        return arrayList;
    }

    private ArrayList<Node> getChildrenNodes(Node node, String[] strArr, ArrayList<Node> arrayList) {
        for (String str : strArr) {
            arrayList = getChildrenNodes(node, str, arrayList);
        }
        return arrayList;
    }

    public int[] getBeginEndOfToken(String str, String str2, int i) {
        int[] iArr = {i, 0};
        iArr[0] = str2.substring(i).indexOf(str) + i;
        iArr[1] = str.length() + iArr[0];
        return iArr;
    }

    public int[] getBeginEndOfSequence(String str, String str2, int i) {
        int[] iArr = {i, 0};
        int[] iArr2 = {0, 0};
        String[] split = str.split(" ");
        int[] beginEndOfToken = split.length > 0 ? getBeginEndOfToken(split[0], str2, i) : getBeginEndOfToken(str, str2, i);
        iArr[0] = beginEndOfToken[0];
        iArr[1] = beginEndOfToken[1];
        for (int i2 = 1; i2 < split.length; i2++) {
            beginEndOfToken = getBeginEndOfToken(split[i2], str2, beginEndOfToken[1]);
            iArr[1] = beginEndOfToken[1];
        }
        return iArr;
    }

    public String normalizeString(String str) {
        return str.replaceAll("[A-Z]+;", "").replaceAll("\n", " ").replaceAll("\\s+", " ").replaceFirst("^[\\s]+", "");
    }

    public void initialize() throws ResourceInitializationException {
        logger = getUimaContext().getLogger();
        logger.log(Level.INFO, "initialize() - Initializing MUC7 Reader...");
        try {
            this.builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        } catch (ParserConfigurationException e) {
            logger.log(Level.SEVERE, "initialize() " + e.getMessage());
        }
        this.files = getFilesFromInputDirectory();
        if (this.files == null || this.files.size() <= 0) {
            return;
        }
        this.docIDDocNodeHash = buildDocIDDocNodeHash(this.files);
        this.keyIter = this.docIDDocNodeHash.keySet().iterator();
    }

    public void getNext(CAS cas) throws IOException, CollectionException {
        ArrayList<Node> arrayList = this.docIDDocNodeHash.get(this.keyIter.next());
        try {
            this.jcas = cas.getJCas();
            this.jcas.reset();
            String annotateTextToBeProcessed = annotateTextToBeProcessed(arrayList.get(0));
            for (int i = 0; i < arrayList.size(); i++) {
                Node node = arrayList.get(i);
                corefHashMap = new HashMap<>();
                startPosition = 0;
                annotateHeader(node);
                annotateSlug(node, annotateTextToBeProcessed);
                annotateDate(node, annotateTextToBeProcessed);
                annotateNumOfWords(node, annotateTextToBeProcessed);
                annotatePreamble(node, annotateTextToBeProcessed);
                annotateText(node, annotateTextToBeProcessed);
                annotateTrailer(node, annotateTextToBeProcessed);
                annotateCorefs();
            }
        } catch (CASException e) {
            throw new CollectionException(e);
        }
    }

    public void close() throws IOException {
    }

    public Progress[] getProgress() {
        return null;
    }

    public boolean hasNext() throws IOException, CollectionException {
        return this.keyIter.hasNext();
    }
}
