package de.julielab.jcore.cr.mmax2;

import de.julielab.jcore.types.ConceptMention;
import de.julielab.jcore.types.Gene;
import de.julielab.jcore.types.Header;
import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.types.Token;
import de.julielab.jcore.utility.JCoReAnnotationTools;
import de.julielab.jules.mmax.MarkableContainer;
import de.julielab.jules.mmax.Statistics;
import de.julielab.jules.mmax.WordInformation;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.apache.uima.UimaContext;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.eml.MMAX2.annotation.markables.Markable;
import org.eml.MMAX2.discourse.MMAX2Discourse;
import org.eml.MMAX2.discourse.MMAX2DiscourseElement;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name = "JCoRe MMAX2 reader", description = "Collection reader for MMAX2 annotation projects.", vendor = "JULIE Lab Jena, Germany")
/* loaded from: input_file:de/julielab/jcore/cr/mmax2/MMAX2Reader.class */
public class MMAX2Reader extends JCasCollectionReader_ImplBase {
    public static final String PARAM_INPUT_DIR = "InputDir";
    public static final String PARAM_ANNOTATION_LEVELS = "AnnotationLevels";
    public static final String PARAM_ORIGINAL_TEXT_FILES = "OriginalTextFiles";
    public static final String PARAM_UIMA_ANNOTATION_TYPES = "UimaAnnotationTypes";
    public static final String PARAM_REMOVE_OVERLAPPING_SHORTER_ANNOTATIONS = "RemoveOverlappingShorterAnnotations";
    private static final Logger log = LoggerFactory.getLogger(MMAX2Reader.class);

    @ConfigurationParameter(name = PARAM_REMOVE_OVERLAPPING_SHORTER_ANNOTATIONS, mandatory = false, defaultValue = {"false"}, description = "If set to true, for all overlapping annotations only the longest is kept.")
    boolean removeOverlappingShorterAnnotations;

    @ConfigurationParameter(name = PARAM_INPUT_DIR, description = "Should point to the directory of which the MMAX2 projects are sub directories of.")
    private String inputDir;

    @ConfigurationParameter(name = PARAM_ANNOTATION_LEVELS, description = "The names of the MMAX2 annotation levels to create annotations for.")
    private String[] annotationLevels;

    @ConfigurationParameter(name = PARAM_UIMA_ANNOTATION_TYPES, description = "The fully qualified names of the UIMA annotation types to be used for the representation of the input annotation level. Must match the indices of AnnotationLevels, i.e. the ith level will be added to the CAS as the ith type.")
    private String[] uimaTypeNames;

    @ConfigurationParameter(name = PARAM_ORIGINAL_TEXT_FILES, mandatory = false, description = "The MMAX2 base data consists of tokenized text and does not keep track of the original text. This parameter should point to a directory containing the original text files. The file names should match the MMAX2 project IDs.")
    private String originalTextFilesDir;
    private LinkedList<File> folderList;
    private String actualPath;
    private HashMap<String, String> levels2uimaNames;
    private List<Class<?>> uimaAnnotationClasses;
    private int numDocuments;

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.inputDir = (String) uimaContext.getConfigParameterValue(PARAM_INPUT_DIR);
        this.annotationLevels = (String[]) uimaContext.getConfigParameterValue(PARAM_ANNOTATION_LEVELS);
        this.uimaTypeNames = (String[]) getUimaContext().getConfigParameterValue(PARAM_UIMA_ANNOTATION_TYPES);
        this.originalTextFilesDir = (String) uimaContext.getConfigParameterValue(PARAM_ORIGINAL_TEXT_FILES);
        this.removeOverlappingShorterAnnotations = ((Boolean) Optional.ofNullable((Boolean) uimaContext.getConfigParameterValue(PARAM_REMOVE_OVERLAPPING_SHORTER_ANNOTATIONS)).orElse(false)).booleanValue();
        this.actualPath = null;
        if (this.annotationLevels.length != this.uimaTypeNames.length) {
            throw new IllegalArgumentException("The number of annotation levels and the number of UIMA type names must match. But the given annotation levels are '" + Arrays.toString(this.annotationLevels) + "' and the UIMA types names are '" + Arrays.toString(this.uimaTypeNames) + "'.");
        }
        try {
            this.uimaAnnotationClasses = (List) Arrays.stream(this.uimaTypeNames).map(str -> {
                try {
                    return Class.forName(str);
                } catch (ClassNotFoundException e) {
                    throw new RuntimeException(e);
                }
            }).collect(Collectors.toList());
            this.levels2uimaNames = (HashMap) IntStream.range(0, this.annotationLevels.length).collect(HashMap::new, (hashMap, i) -> {
                hashMap.put(this.annotationLevels[i], this.uimaTypeNames[i]);
            }, (hashMap2, hashMap3) -> {
                hashMap2.putAll(hashMap3);
            });
            setUpFolderList();
        } catch (Exception e) {
            log.error("Could not initialize UIMA annotation classes from parameter values {}", Arrays.toString(this.uimaTypeNames));
            throw new ResourceInitializationException(e);
        }
    }

    private void setUpFolderList() throws ResourceInitializationException {
        this.folderList = new LinkedList<>();
        if (!this.inputDir.endsWith(File.separator)) {
            this.inputDir += File.separator;
        }
        File file = new File(this.inputDir);
        if (!file.exists()) {
            try {
                file = new File(new File(".").getCanonicalPath() + this.inputDir);
            } catch (IOException e) {
                e.printStackTrace();
                System.exit(1);
            }
            if (!file.exists()) {
                log.error("{} does not exist", this.inputDir);
                throw new ResourceInitializationException(new IllegalArgumentException(this.inputDir + " does not exist"));
            }
        }
        for (String str : file.list()) {
            if (!str.endsWith(File.separator)) {
                str = str + File.separator;
            }
            File file2 = new File(this.inputDir + str);
            if (file2.isDirectory()) {
                this.folderList.add(file2);
            }
        }
        this.numDocuments = this.folderList.size();
    }

    private String getPMID() throws CollectionException {
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new DataInputStream(new FileInputStream(this.actualPath + "Basedata.uri"))));
            int i = 0;
            String str = "";
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                i++;
                str = readLine;
            }
            if (i <= 1) {
                return str;
            }
            log.error("unknown data in {}Basedata.uri", this.actualPath);
            System.exit(1);
            return null;
        } catch (IOException e) {
            log.error("Error while parsing {}Basedata.uri", this.actualPath);
            throw new CollectionException(e);
        }
    }

    public void getNext(JCas jCas) throws CollectionException {
        Statistics.projects++;
        this.actualPath = this.folderList.poll().getAbsolutePath() + "/";
        new File(this.actualPath + "Styles/default_style.xsl").renameTo(new File(this.actualPath + "Styles/generic_nongui_style.xsl"));
        MMAX2Discourse buildDiscourse = MMAX2Discourse.buildDiscourse(new File(this.actualPath + "project.mmax").getAbsolutePath());
        String nextDocumentChunk = buildDiscourse.getNextDocumentChunk();
        WordInformation[] wordInformationArr = new WordInformation[buildDiscourse.getDiscourseElementCount()];
        for (MMAX2DiscourseElement mMAX2DiscourseElement : buildDiscourse.getDiscourseElements()) {
            WordInformation wordInformation = new WordInformation();
            wordInformation.setId(mMAX2DiscourseElement.getID());
            int discoursePosition = mMAX2DiscourseElement.getDiscoursePosition();
            wordInformation.setPosition(discoursePosition);
            StringBuilder sb = new StringBuilder();
            int displayEndPositionFromDiscoursePosition = buildDiscourse.getDisplayEndPositionFromDiscoursePosition(discoursePosition);
            for (int displayStartPositionFromDiscoursePosition = buildDiscourse.getDisplayStartPositionFromDiscoursePosition(discoursePosition); displayStartPositionFromDiscoursePosition <= displayEndPositionFromDiscoursePosition; displayStartPositionFromDiscoursePosition++) {
                sb.append(nextDocumentChunk.charAt(displayStartPositionFromDiscoursePosition));
            }
            wordInformation.setText(sb.toString());
            wordInformationArr[discoursePosition] = wordInformation;
        }
        produceOutput(buildDiscourse, wordInformationArr, jCas);
        new File(this.actualPath + "Styles/generic_nongui_style.xsl").renameTo(new File(this.actualPath + "Styles/default_style.xsl"));
        Statistics.projects++;
    }

    private void produceOutput(MMAX2Discourse mMAX2Discourse, WordInformation[] wordInformationArr, JCas jCas) throws CollectionException {
        StringBuilder sb = new StringBuilder();
        StringBuilder sb2 = new StringBuilder();
        String pmid = getPMID();
        if (this.originalTextFilesDir != null && this.originalTextFilesDir.length() > 0) {
            handleOriginalTextInformation(pmid, wordInformationArr);
        }
        HashMap hashMap = new HashMap();
        for (WordInformation wordInformation : wordInformationArr) {
            Token token = new Token(jCas, sb2.length(), sb2.length() + wordInformation.getText().length());
            token.setComponentId(getClass().getCanonicalName());
            token.addToIndexes();
            hashMap.put(Integer.valueOf(wordInformation.getPosition()), token);
            sb2.append(wordInformation.getText());
            if (wordInformation.isFollowedBySpace()) {
                sb.append(" ");
                sb2.append(" ");
            }
        }
        Set<Markable> ignoredMarkables = getIgnoredMarkables(mMAX2Discourse);
        for (int i = 0; i < this.annotationLevels.length; i++) {
            Stream stream = mMAX2Discourse.getMarkableLevelByName(this.annotationLevels[i], false).getMarkables().stream();
            Class<Markable> cls = Markable.class;
            Objects.requireNonNull(Markable.class);
            Stream map = stream.map(cls::cast);
            Objects.requireNonNull(ignoredMarkables);
            int i2 = 0;
            for (Markable markable : map.filter(Predicate.not(ignoredMarkables::contains)).filter(Predicate.not((v0) -> {
                return v0.isDiscontinuous();
            }))) {
                int leftmostDiscoursePosition = markable.getLeftmostDiscoursePosition();
                int rightmostDiscoursePosition = markable.getRightmostDiscoursePosition();
                int begin = ((Token) hashMap.get(Integer.valueOf(leftmostDiscoursePosition))).getBegin();
                int end = ((Token) hashMap.get(Integer.valueOf(rightmostDiscoursePosition))).getEnd();
                try {
                    ConceptMention annotationByClassName = JCoReAnnotationTools.getAnnotationByClassName(jCas, this.uimaTypeNames[i]);
                    annotationByClassName.setBegin(begin);
                    annotationByClassName.setEnd(end);
                    if (annotationByClassName instanceof ConceptMention) {
                        annotationByClassName.setSpecificType(markable.getAttributeValue(markable.getMarkableLevelName()));
                    } else if (annotationByClassName instanceof Sentence) {
                        ((Sentence) annotationByClassName).setId(String.valueOf(i2));
                    }
                    annotationByClassName.addToIndexes();
                    i2++;
                } catch (Exception e) {
                    throw new CollectionException(e);
                }
            }
        }
        for (WordInformation wordInformation2 : wordInformationArr) {
            for (MarkableContainer markableContainer : wordInformation2.getMarkables()) {
                int begin2 = markableContainer.getBegin();
                if (begin2 == wordInformation2.getPosition()) {
                    new Gene(jCas, ((Token) hashMap.get(Integer.valueOf(begin2))).getBegin(), ((Token) hashMap.get(Integer.valueOf(markableContainer.getEnd()))).getEnd()).addToIndexes();
                }
            }
        }
        jCas.setDocumentText(sb2.toString());
        Header header = new Header(jCas);
        header.setDocId(pmid);
        header.addToIndexes();
    }

    private Set<Markable> getIgnoredMarkables(MMAX2Discourse mMAX2Discourse) {
        if (!this.removeOverlappingShorterAnnotations) {
            return Collections.emptySet();
        }
        HashSet hashSet = new HashSet();
        for (int i = 0; i < this.annotationLevels.length; i++) {
            HashMap hashMap = new HashMap();
            Stream stream = mMAX2Discourse.getMarkableLevelByName(this.annotationLevels[i], false).getMarkables().stream();
            Class<Markable> cls = Markable.class;
            Objects.requireNonNull(Markable.class);
            for (Markable markable : stream.map(cls::cast).filter(Predicate.not((v0) -> {
                return v0.isDiscontinuous();
            }))) {
                IntStream.rangeClosed(markable.getLeftmostDiscoursePosition(), markable.getRightmostDiscoursePosition()).forEach(i2 -> {
                    ((Set) hashMap.compute(Integer.valueOf(i2), (num, set) -> {
                        return set != null ? set : new HashSet();
                    })).add(markable);
                });
            }
            Iterator it = hashMap.keySet().iterator();
            while (it.hasNext()) {
                Set<Markable> set = (Set) hashMap.get((Integer) it.next());
                if (set.size() > 1) {
                    int i3 = 0;
                    Markable markable2 = null;
                    for (Markable markable3 : set) {
                        hashSet.add(markable3);
                        int rightmostDiscoursePosition = (markable3.getRightmostDiscoursePosition() - markable3.getLeftmostDiscoursePosition()) + 1;
                        if (rightmostDiscoursePosition > i3) {
                            i3 = rightmostDiscoursePosition;
                            markable2 = markable3;
                        }
                    }
                    hashSet.remove(markable2);
                }
            }
        }
        return hashSet;
    }

    private void handleOriginalTextInformation(String str, WordInformation[] wordInformationArr) throws CollectionException {
        if (this.originalTextFilesDir.length() > 0 && !this.originalTextFilesDir.endsWith("/")) {
            this.originalTextFilesDir += File.separator;
        }
        File file = new File(this.originalTextFilesDir + str);
        if (!file.exists()) {
            log.warn("no original File found for {} using only mmax text.", str);
            return;
        }
        try {
            InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(file));
            int i = 0;
            try {
                WordInformation wordInformation = wordInformationArr[0];
                String text = wordInformation.getText();
                wordInformation.setFollowedBySpace(false);
                int i2 = 0;
                while (true) {
                    int read = inputStreamReader.read();
                    if (read < 0) {
                        inputStreamReader.close();
                        return;
                    }
                    if (i2 >= text.length()) {
                        i++;
                        if (i >= wordInformationArr.length) {
                            if (Character.isWhitespace(read)) {
                                return;
                            }
                            log.warn("original Text contains more words than mmax information");
                            return;
                        } else {
                            wordInformation = wordInformationArr[i];
                            text = wordInformation.getText();
                            wordInformation.setFollowedBySpace(false);
                            i2 = 0;
                        }
                    }
                    if (text.charAt(i2) == read || Character.toLowerCase(text.charAt(i2)) == Character.toLowerCase(read)) {
                        i2++;
                    } else if (Character.isWhitespace(read)) {
                        wordInformationArr[i - 1].setFollowedBySpace(true);
                    } else {
                        log.warn("there is a non whitespace character different in original text at document {} critical character is '{}' near word '{}' (MMAX2 word ID {})", new Object[]{str, Integer.valueOf(read), text, wordInformation.getId()});
                    }
                }
            } catch (IOException e) {
                log.error("Error attempting to read original text file ", e);
                throw new CollectionException(e);
            }
        } catch (Exception e2) {
            log.error("Error attempting to read original text file", e2);
            if (!(e2 instanceof CollectionException)) {
                throw new CollectionException(e2);
            }
            throw e2;
        }
    }

    public void close() {
    }

    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.numDocuments - this.folderList.size(), this.numDocuments, "document")};
    }

    public boolean hasNext() {
        return !this.folderList.isEmpty();
    }
}
