package de.julielab.geneexpbase.data;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Multimap;
import de.julielab.geneexpbase.genemodel.Acronym;
import de.julielab.geneexpbase.genemodel.AcronymLongform;
import de.julielab.geneexpbase.genemodel.Apposition;
import de.julielab.geneexpbase.genemodel.CoreferenceExpression;
import de.julielab.geneexpbase.genemodel.CoreferenceSet;
import de.julielab.geneexpbase.genemodel.DictionaryGeneIdRecord;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.genemodel.MeshHeading;
import de.julielab.geneexpbase.genemodel.PosTag;
import de.julielab.geneexpbase.genemodel.SpeciesMention;
import de.julielab.java.utilities.FileUtilities;
import de.julielab.java.utilities.spanutils.OffsetMap;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Stream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.apache.commons.lang3.Range;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/julielab/geneexpbase/data/CorpusReader.class */
public class CorpusReader {
    private static final Logger log = LoggerFactory.getLogger(CorpusReader.class);

    public static Multimap<String, GeneMention> readGoldIds(String str) throws IOException {
        HashMultimap create = HashMultimap.create();
        LineIterator lineIterator = IOUtils.lineIterator(FileUtilities.getInputStreamFromFile(new File(str)), "UTF-8");
        while (lineIterator.hasNext()) {
            String[] split = lineIterator.next().split("\t");
            GeneMention geneMention = new GeneMention();
            geneMention.setDocId(split[0]);
            geneMention.setId(split[1]);
            geneMention.setIds(List.of(split[1]));
            if (split.length > 2) {
                geneMention.setText(split[2]);
            }
            if (!GeneInformation.DISCONTINUED.contains(geneMention.getGoldMentionId())) {
                geneMention.setId(GeneInformation.REPLACED.getOrDefault(geneMention.getGoldMentionId(), geneMention.getGoldMentionId()));
                create.put(split[0], geneMention);
            }
        }
        return create;
    }

    public static Multimap<String, String> readPredictedMentions(String str) throws IOException {
        HashMultimap create = HashMultimap.create();
        File file = new File(str);
        if (!file.isDirectory()) {
            throw new IllegalArgumentException("The path \"" + str + "\" does not point to a directory. A directory holding separate files for each gene mention evaluation document is expected.");
        }
        for (File file2 : file.listFiles(new FilenameFilter() { // from class: de.julielab.geneexpbase.data.CorpusReader.1
            @Override // java.io.FilenameFilter
            public boolean accept(File file3, String str2) {
                return !str2.equals(".DS_Store");
            }
        })) {
            String substring = file2.getName().substring(0, file2.getName().indexOf(46));
            LineIterator lineIterator = IOUtils.lineIterator(FileUtilities.getInputStreamFromFile(file2), "UTF-8");
            while (lineIterator.hasNext()) {
                create.put(substring, lineIterator.next().split("\\t")[0]);
            }
        }
        return create;
    }

    public static Multimap<String, GeneMention> readMixedFileForGenesWithOffsets(String str) throws IOException {
        return readMixedFileForGenesWithOffsets(str, Collections.singletonList("Gene"), (GeneMention.GeneTagger) null);
    }

    public static Multimap<String, GeneMention> readMixedFileForGenesWithOffsets(String str, Collection<String> collection, GeneMention.GeneTagger geneTagger) throws IOException {
        return readMixedFileForGenesWithOffsets(str, collection, (Set<GeneMention.GeneTagger>) Collections.singleton(geneTagger));
    }

    public static Multimap<String, GeneMention> readMixedFileForGenesWithOffsets(String str, Collection<String> collection, Set<GeneMention.GeneTagger> set) throws IOException {
        LinkedHashMultimap create = LinkedHashMultimap.create();
        if (!new File(str).isFile()) {
            throw new IllegalArgumentException("The path \"" + str + "\" does not point to a file. A file holding one documentId, gene id, begin, end and gene mention record per line is required.");
        }
        LineIterator lineIterator = IOUtils.lineIterator(FileUtilities.getInputStreamFromFile(new File(str)), "UTF-8");
        while (lineIterator.hasNext()) {
            String nextLine = lineIterator.nextLine();
            if (!nextLine.isBlank()) {
                boolean z = collection == null || collection.isEmpty();
                if (!z) {
                    Iterator<String> it = collection.iterator();
                    while (it.hasNext()) {
                        try {
                            if (nextLine.split("\t")[6].endsWith(it.next())) {
                                z = true;
                            }
                        } catch (ArrayIndexOutOfBoundsException e) {
                            log.error("Format error 'Not enough columns' in line '{}'", nextLine, e);
                            throw e;
                        }
                    }
                }
                if (z) {
                    GeneMention createGeneMention = createGeneMention(nextLine);
                    if (set == null || set.contains(createGeneMention.getTagger())) {
                        create.put(createGeneMention.getDocId(), createGeneMention);
                    }
                }
            }
        }
        return create;
    }

    public static Multimap<String, GeneMention> readMixedFileForMentionTypesWithOffsets(String str, Set<GeneMention.SpecificType> set) throws IOException {
        return readMixedFileForMentionTypesWithOffsets(str, set, null);
    }

    public static Multimap<String, GeneMention> readMixedFileForMentionTypesWithOffsets(String str, Set<GeneMention.SpecificType> set, Set<GeneMention.GeneTagger> set2) throws IOException {
        LinkedHashMultimap create = LinkedHashMultimap.create();
        if (!new File(str).isFile()) {
            throw new IllegalArgumentException("The path \"" + str + "\" does not point to a file. A file holding one documentId, gene id, begin, end and gene mention record per line is required.");
        }
        LineIterator lineIterator = IOUtils.lineIterator(FileUtilities.getInputStreamFromFile(new File(str)), "UTF-8");
        while (lineIterator.hasNext()) {
            String nextLine = lineIterator.nextLine();
            String str2 = nextLine.split("\\t")[6];
            if (set.contains((str2.equalsIgnoreCase("protein_familiy_or_group") || str2.equalsIgnoreCase("familyname")) ? GeneMention.SpecificType.FAMILYNAME : str2.equalsIgnoreCase("domainmotif") ? GeneMention.SpecificType.DOMAINMOTIF : GeneMention.SpecificType.GENE)) {
                GeneMention createGeneMention = createGeneMention(nextLine);
                if (set2 == null || set2.isEmpty() || set2.contains(createGeneMention.getTagger())) {
                    create.put(createGeneMention.getDocId(), createGeneMention);
                }
            }
        }
        return create;
    }

    public static Multimap<String, GeneMention> readMixedFileForChunks(String str) throws IOException {
        LinkedHashMultimap create = LinkedHashMultimap.create();
        if (!new File(str).isFile()) {
            throw new IllegalArgumentException("The path \"" + str + "\" does not point to a file. A file holding one documentId, gene id, begin, end and gene mention record per line is required.");
        }
        LineIterator lineIterator = IOUtils.lineIterator(FileUtilities.getInputStreamFromFile(new File(str)), "UTF-8");
        while (lineIterator.hasNext()) {
            String nextLine = lineIterator.nextLine();
            if (nextLine.endsWith("Gene")) {
                GeneMention createGeneMention = createGeneMention(nextLine);
                create.put(createGeneMention.getDocId(), createGeneMention);
            }
        }
        return create;
    }

    public static Multimap<String, GeneMention> readMentionsWithOffsets(String str) throws IOException {
        LinkedHashMultimap create = LinkedHashMultimap.create();
        if (!new File(str).isFile()) {
            throw new IllegalArgumentException("The path \"" + str + "\" does not point to a file. A file holding one documentId, gene id, begin, end and gene mention record per line is required.");
        }
        LineIterator lineIterator = IOUtils.lineIterator(FileUtilities.getInputStreamFromFile(new File(str)), "UTF-8");
        while (lineIterator.hasNext()) {
            String next = lineIterator.next();
            if (!next.startsWith("#")) {
                GeneMention createGeneMention = createGeneMention(next);
                if (!GeneInformation.DISCONTINUED.contains(createGeneMention.getGoldMentionId())) {
                    createGeneMention.setId(GeneInformation.REPLACED.getOrDefault(createGeneMention.getGoldMentionId(), createGeneMention.getGoldMentionId()));
                    create.put(createGeneMention.getDocId(), createGeneMention);
                }
            }
        }
        return create;
    }

    public static GeneMention createGeneMention(String str) {
        String[] split = str.split("\\t");
        String str2 = split[0];
        String str3 = split[1];
        int parseInt = Integer.parseInt(split[2]);
        int parseInt2 = Integer.parseInt(split[3]);
        String str4 = null;
        GeneMention.GeneTagger geneTagger = null;
        GeneMention.SpecificType specificType = GeneMention.SpecificType.UNKNOWN;
        double d = 0.0d;
        if (split.length > 4) {
            str4 = split[4];
        }
        if (split.length > 5) {
            String str5 = split[5];
            if (str5.contains("ProteinConsistencyTagger")) {
                geneTagger = GeneMention.GeneTagger.CONSISTENCY_TAGGER;
            } else if (str5.contains("ExtendedProteinsMerger")) {
                geneTagger = GeneMention.GeneTagger.EXPANSION_TAGGER;
            } else if (str5.contains("GazetteerAnnotator")) {
                geneTagger = GeneMention.GeneTagger.GAZETTEER;
            } else if (str5.contains("EntityAnnotator")) {
                geneTagger = GeneMention.GeneTagger.JNET;
            } else if (str5.contains("JNET ConsistencyPreservation")) {
                geneTagger = GeneMention.GeneTagger.JNET;
            } else if (str5.contains("BANNER")) {
                geneTagger = GeneMention.GeneTagger.BANNER;
            } else if (str5.contains("Reader")) {
                geneTagger = GeneMention.GeneTagger.GOLD;
            } else if (str5.equalsIgnoreCase("gold")) {
                geneTagger = GeneMention.GeneTagger.GOLD;
            } else if (str5.contains("FlairNerAnnotator")) {
                geneTagger = GeneMention.GeneTagger.FLAIR;
            } else if (str5.contains("FlairBC2GMTrain1024")) {
                geneTagger = GeneMention.GeneTagger.FLAIR;
            } else if (str5.contains("FlairGNormPlusNLMIAT")) {
                geneTagger = GeneMention.GeneTagger.FLAIR_GNORMPLUSNLMIAT;
            } else if (str5.contains("FlairBC2GMTrainTest")) {
                geneTagger = GeneMention.GeneTagger.FLAIR_BC2TRAINTEST;
            } else if (str5.contains("FlairJPGCollapsedVarCompEnum")) {
                geneTagger = GeneMention.GeneTagger.FLAIR_JPG_COLLAPSED_VARCOMPENUM;
            } else if (str5.contains("FlairJPGCollapsedVar")) {
                geneTagger = GeneMention.GeneTagger.FLAIR_JPG_COLLAPSED_VAR;
            } else if (str5.contains("FlairJPGNoBC2TestNoTest")) {
                geneTagger = GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST;
            } else if (str5.contains("FlairJPGNoBc2TestNoTestCollapsedVar")) {
                geneTagger = GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR;
            } else if (str5.contains("FlairProGeneBC2TrainIsDevGNormPlusEntities")) {
                geneTagger = GeneMention.GeneTagger.FLAIR_JPG_GNP_ENTITIES;
            } else {
                if (!str5.equals("GNormPlusTagger") && !str5.equals("GNormPlus")) {
                    throw new IllegalArgumentException("The gene recognition system " + str5 + " is unknown. Mention record: " + str);
                }
                geneTagger = GeneMention.GeneTagger.GNORM_PLUS;
            }
        }
        if (split.length > 6) {
            String str6 = split[6];
            if (geneTagger != GeneMention.GeneTagger.GAZETTEER) {
                specificType = (str6.equalsIgnoreCase("protein_familiy_or_group") || str6.equalsIgnoreCase("familyname") || str6.endsWith("-222") || str6.endsWith("-333") || str6.endsWith("-444") || str6.endsWith("-555")) ? GeneMention.SpecificType.FAMILYNAME : str6.equalsIgnoreCase("domainmotif") ? GeneMention.SpecificType.DOMAINMOTIF : GeneMention.SpecificType.GENE;
            } else if (str6.contains(":")) {
                new DictionaryGeneIdRecord(str6);
            } else {
                specificType = GeneMention.SpecificType.GENE;
            }
        }
        if (split.length > 7) {
            try {
                d = Double.parseDouble(split[7]);
            } catch (NumberFormatException e) {
            }
        }
        GeneMention geneMention = new GeneMention();
        geneMention.setDocId(str2);
        if (!StringUtils.isBlank(str3) && !str3.equals("null")) {
            geneMention.setId(str3);
            geneMention.setIds(Collections.singletonList(str3));
        }
        geneMention.setOffsets(Range.between(Integer.valueOf(parseInt), Integer.valueOf(parseInt2)));
        geneMention.setText(str4);
        geneMention.setTagger(geneTagger);
        geneMention.setSpecificType(specificType);
        geneMention.setSpecificTypeConfidence(d);
        return geneMention;
    }

    public static Multimap<String, GeneMention> readMentionsWithOffsetsAndSpecies(String str) throws IOException {
        LinkedHashMultimap create = LinkedHashMultimap.create();
        if (!new File(str).isFile()) {
            throw new IllegalArgumentException("The path \"" + str + "\" does not point to a file. A file holding one documentId, gene id, begin, end and gene mention record per line is required.");
        }
        LineIterator lineIterator = IOUtils.lineIterator(FileUtilities.getInputStreamFromFile(new File(str)), "UTF-8");
        while (lineIterator.hasNext()) {
            String[] split = lineIterator.next().split("\\t");
            String str2 = split[0];
            String str3 = split[1];
            int parseInt = Integer.parseInt(split[2]);
            int parseInt2 = Integer.parseInt(split[3]);
            String str4 = null;
            String str5 = null;
            GeneMention.GeneTagger geneTagger = null;
            double d = 0.0d;
            if (split.length > 4) {
                str4 = split[4];
            }
            if (split.length > 5) {
                str5 = split[5];
            }
            if (split.length > 6) {
                String str6 = split[6];
                if (str6.contains("GazetteerAnnotator")) {
                    geneTagger = GeneMention.GeneTagger.GAZETTEER;
                } else if (str6.endsWith("EntityAnnotator")) {
                    geneTagger = GeneMention.GeneTagger.JNET;
                } else if (str6.contains("JNET ConsistencyPreservation")) {
                    geneTagger = GeneMention.GeneTagger.JNET;
                } else if (str6.contains("BANNER")) {
                    geneTagger = GeneMention.GeneTagger.BANNER;
                } else if (str6.contains("FlairNerAnnotator")) {
                    geneTagger = GeneMention.GeneTagger.FLAIR;
                } else if (str6.contains("FlairJPGCollapsedVar")) {
                    geneTagger = GeneMention.GeneTagger.FLAIR_JPG_COLLAPSED_VAR;
                } else {
                    if (!str6.contains("FlairJPGCollapsedVarCompEnum")) {
                        throw new IllegalArgumentException("The gene recognition system " + str6 + " is unknown.");
                    }
                    geneTagger = GeneMention.GeneTagger.FLAIR_JPG_COLLAPSED_VARCOMPENUM;
                }
            }
            if (split.length > 7 && split[7] != "null") {
                d = Double.parseDouble(split[7]);
            }
            GeneMention geneMention = new GeneMention();
            geneMention.setDocId(str2);
            if (!StringUtils.isBlank(str3)) {
                geneMention.setId(str3);
            }
            geneMention.setOffsets(Range.between(Integer.valueOf(parseInt), Integer.valueOf(parseInt2)));
            geneMention.setText(str4);
            geneMention.setTaxonomyId(str5);
            geneMention.setTagger(geneTagger);
            geneMention.setSpecificTypeConfidence(d);
            create.put(str2, geneMention);
        }
        return create;
    }

    public static Map<String, String> readGeneContexts(String str) throws IOException {
        HashMap hashMap = new HashMap();
        File file = new File(str);
        if (!file.isDirectory()) {
            throw new IllegalArgumentException("The path \"" + str + "\" does not point to a directory. A directory holding separate files for each gene mention evaluation document is expected.");
        }
        for (File file2 : file.listFiles((file3, str2) -> {
            return !str2.equals(".DS_Store");
        })) {
            String replaceAll = file2.getName().replaceAll("\\.txt$", "").replaceAll("\\.txt\\.gz$", "").replaceAll("\\.gz$", "");
            BufferedInputStream inputStreamFromFile = FileUtilities.getInputStreamFromFile(file2);
            try {
                hashMap.put(replaceAll, IOUtils.toString(inputStreamFromFile, StandardCharsets.UTF_8).trim());
                if (inputStreamFromFile != null) {
                    inputStreamFromFile.close();
                }
            } catch (Throwable th) {
                if (inputStreamFromFile != null) {
                    try {
                        inputStreamFromFile.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
                throw th;
            }
        }
        return hashMap;
    }

    public static Multimap<String, String> convertGoldMentionsToIdsPerDocument(Multimap<String, GeneMention> multimap) {
        HashMultimap create = HashMultimap.create();
        for (String str : multimap.keySet()) {
            Iterator it = multimap.get(str).iterator();
            while (it.hasNext()) {
                create.put(str, ((GeneMention) it.next()).getGoldMentionId());
            }
        }
        return create;
    }

    public static Multimap<String, String> convertGoldMentionsToMentionTextPerDocument(Multimap<String, GeneMention> multimap) {
        HashMultimap create = HashMultimap.create();
        for (String str : multimap.keySet()) {
            Iterator it = multimap.get(str).iterator();
            while (it.hasNext()) {
                create.put(str, ((GeneMention) it.next()).getText());
            }
        }
        return create;
    }

    public static Set<String> getIdsOfMentions(Collection<GeneMention> collection) {
        HashSet hashSet = new HashSet(collection.size());
        Iterator<GeneMention> it = collection.iterator();
        while (it.hasNext()) {
            hashSet.add(it.next().getGoldMentionId());
        }
        return hashSet;
    }

    public static List<GeneMention> getGeneMentionsInRange(Collection<GeneMention> collection, int i, int i2) {
        ArrayList arrayList = new ArrayList();
        Range between = Range.between(Integer.valueOf(i), Integer.valueOf(i2));
        for (GeneMention geneMention : collection) {
            if (Range.between(Integer.valueOf(geneMention.getBegin()), Integer.valueOf(geneMention.getEnd())).isOverlappedBy(between)) {
                arrayList.add(geneMention);
            }
        }
        return arrayList;
    }

    public static Set<GeneMention> getGeneMentionsAtPosition(GeneMention geneMention, Collection<GeneMention> collection) {
        HashSet hashSet = new HashSet();
        Range between = Range.between(Integer.valueOf(geneMention.getBegin()), Integer.valueOf(geneMention.getEnd()));
        for (GeneMention geneMention2 : collection) {
            if (Range.between(Integer.valueOf(geneMention2.getBegin()), Integer.valueOf(geneMention2.getEnd())).isOverlappedBy(between)) {
                hashSet.add(geneMention2);
            }
        }
        return hashSet;
    }

    public static Multimap<String, Acronym> readAcronymAnnotations(String str) throws IOException {
        File file = new File(str);
        HashMultimap create = HashMultimap.create();
        Stream<String> lines = FileUtilities.getReaderFromFile(file).lines();
        try {
            Iterator<String> it = lines.iterator();
            HashMap hashMap = new HashMap();
            while (it.hasNext()) {
                String[] split = it.next().split("\\t");
                String str2 = split[0];
                String str3 = split[1];
                int parseInt = Integer.parseInt(split[2]);
                int parseInt2 = Integer.parseInt(split[3]);
                if (str3.startsWith("A")) {
                    AcronymLongform acronymLongform = (AcronymLongform) hashMap.get(split[4]);
                    Acronym acronym = new Acronym();
                    acronym.setOffsets(Range.between(Integer.valueOf(parseInt), Integer.valueOf(parseInt2)));
                    acronym.setLongform(acronymLongform);
                    acronymLongform.addAcronym(acronym);
                    create.put(str2, acronym);
                }
                if (str3.startsWith("F")) {
                    AcronymLongform acronymLongform2 = new AcronymLongform();
                    acronymLongform2.setOffsets(Range.between(Integer.valueOf(parseInt), Integer.valueOf(parseInt2)));
                    hashMap.put(str3, acronymLongform2);
                }
            }
            if (lines != null) {
                lines.close();
            }
            return create;
        } catch (Throwable th) {
            if (lines != null) {
                try {
                    lines.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public static Map<String, String> readTitles(String str) throws IOException {
        File file = new File(str);
        HashMap hashMap = new HashMap();
        Stream<String> lines = FileUtilities.getReaderFromFile(file).lines();
        try {
            Iterator<String> it = lines.iterator();
            while (it.hasNext()) {
                String[] split = it.next().split("\\t");
                if (split.length != 2) {
                    throw new IllegalArgumentException(str + " should have exactly two columns.");
                }
                hashMap.put(split[0], split[1]);
            }
            if (lines != null) {
                lines.close();
            }
            return hashMap;
        } catch (Throwable th) {
            if (lines != null) {
                try {
                    lines.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public static Multimap<String, String> readMeshterms(String str) throws IOException {
        File file = new File(str);
        HashMultimap create = HashMultimap.create();
        Stream<String> lines = FileUtilities.getReaderFromFile(file).lines();
        try {
            Iterator<String> it = lines.iterator();
            while (it.hasNext()) {
                String[] split = it.next().split("\\t");
                if (split.length < 2) {
                    throw new IllegalArgumentException(str + " should have at least two columns in each line.");
                }
                String str2 = split[0];
                for (int i = 1; i < split.length; i++) {
                    create.put(str2, split[1]);
                }
            }
            if (lines != null) {
                lines.close();
            }
            return create;
        } catch (Throwable th) {
            if (lines != null) {
                try {
                    lines.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public static Multimap<String, String> readTitleSpecies(String str) throws IOException {
        File file = new File(str);
        HashMultimap create = HashMultimap.create();
        Stream<String> lines = FileUtilities.getReaderFromFile(file).lines();
        try {
            Iterator<String> it = lines.iterator();
            while (it.hasNext()) {
                String[] split = it.next().split("\\t");
                if (split.length < 2) {
                    throw new IllegalArgumentException(str + " should have exactly two columns.");
                }
                String str2 = split[0];
                for (String str3 : split[1].split(";")) {
                    create.put(str2, str3);
                }
            }
            if (lines != null) {
                lines.close();
            }
            return create;
        } catch (Throwable th) {
            if (lines != null) {
                try {
                    lines.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public static Multimap<String, String> readMeshSpecies(String str) throws IOException {
        File file = new File(str);
        HashMultimap create = HashMultimap.create();
        Stream<String> lines = FileUtilities.getReaderFromFile(file).lines();
        try {
            Iterator<String> it = lines.iterator();
            while (it.hasNext()) {
                String[] split = it.next().split("\\t");
                if (split.length < 2) {
                    throw new IllegalArgumentException(str + " should have at least two columns in each line.");
                }
                String str2 = split[0];
                for (int i = 1; i < split.length; i++) {
                    if (!split[i].equals("")) {
                        for (String str3 : split[i].split(";")) {
                            create.put(str2, str3);
                        }
                    }
                }
            }
            if (lines != null) {
                lines.close();
            }
            return create;
        } catch (Throwable th) {
            if (lines != null) {
                try {
                    lines.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public static Map<String, OffsetMap<SpeciesMention>> readMixedFileForTextSpecies(String str) throws IOException {
        File file = new File(str);
        HashMap hashMap = new HashMap();
        int i = 1;
        Stream<String> lines = FileUtilities.getReaderFromFile(file).lines();
        try {
            for (String str2 : lines) {
                if (str2.endsWith("Organism") || str2.endsWith("Species")) {
                    String[] split = str2.split("\\t");
                    if (split.length != 7) {
                        throw new IllegalArgumentException("Line " + i + ": " + str + " should have exactly seven columns in each line.");
                    }
                    String str3 = split[0];
                    String str4 = split[1];
                    int parseInt = Integer.parseInt(split[2]);
                    int parseInt2 = Integer.parseInt(split[3]);
                    Range between = Range.between(Integer.valueOf(parseInt), Integer.valueOf(parseInt2));
                    if (hashMap.containsKey(str3)) {
                        ((OffsetMap) hashMap.get(str3)).put(between, new SpeciesMention(str4, split[4], parseInt, parseInt2));
                    } else {
                        OffsetMap offsetMap = new OffsetMap();
                        offsetMap.put(between, new SpeciesMention(str4, split[4], parseInt, parseInt2));
                        hashMap.put(str3, offsetMap);
                    }
                    i++;
                }
            }
            if (lines != null) {
                lines.close();
            }
            return hashMap;
        } catch (Throwable th) {
            if (lines != null) {
                try {
                    lines.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public static Multimap<String, PosTag> readMixedFileForPosTags(String str) throws IOException {
        File file = new File(str);
        LinkedHashMultimap create = LinkedHashMultimap.create();
        int i = 1;
        Stream<String> lines = FileUtilities.getReaderFromFile(file).lines();
        try {
            for (String str2 : lines) {
                if (str2.endsWith("PennBioIEPOSTag")) {
                    String[] split = str2.split("\\t");
                    if (split.length != 7) {
                        throw new IllegalArgumentException("Line " + i + ": " + str + " should have exactly seven columns in each line.");
                    }
                    create.put(split[0], new PosTag(split[1], Range.between(Integer.valueOf(Integer.parseInt(split[2])), Integer.valueOf(Integer.parseInt(split[3])))));
                    i++;
                }
            }
            if (lines != null) {
                lines.close();
            }
            return create;
        } catch (Throwable th) {
            if (lines != null) {
                try {
                    lines.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public static Multimap<String, Range<Integer>> readMixedFileForSentenceOffsets(String str) throws IOException {
        File file = new File(str);
        HashMultimap create = HashMultimap.create();
        int i = 1;
        Stream<String> lines = FileUtilities.getReaderFromFile(file).lines();
        try {
            for (String str2 : lines) {
                String[] split = str2.split("\\t");
                try {
                    if (split[split.length - 1].contains("Sentence")) {
                        if (split.length < 4) {
                            throw new IllegalArgumentException("Line " + i + ": " + str + " should have at least four columns in each line.");
                        }
                        create.put(split[0], Range.between(Integer.valueOf(Integer.parseInt(split[2])), Integer.valueOf(Integer.parseInt(split[3]))));
                        i++;
                    }
                } catch (ArrayIndexOutOfBoundsException e) {
                    log.error("Illegal line for sentence reading: '" + str2 + "'", e);
                    throw e;
                }
            }
            if (lines != null) {
                lines.close();
            }
            return create;
        } catch (Throwable th) {
            if (lines != null) {
                try {
                    lines.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public static Multimap<String, Range<Integer>> readMixedFileForNonGenePhraseOffsets(String str) throws IOException {
        File file = new File(str);
        HashMultimap create = HashMultimap.create();
        int i = 1;
        Stream<String> lines = FileUtilities.getReaderFromFile(file).lines();
        try {
            for (String str2 : lines) {
                String[] split = str2.split("\\t");
                try {
                    if (split[split.length - 1].contains("NonGenePhrase")) {
                        if (split.length < 4) {
                            throw new IllegalArgumentException("Line " + i + ": " + str + " should have at least four columns in each line.");
                        }
                        create.put(split[0], Range.between(Integer.valueOf(Integer.parseInt(split[2])), Integer.valueOf(Integer.parseInt(split[3]))));
                        i++;
                    }
                } catch (ArrayIndexOutOfBoundsException e) {
                    log.error("Illegal line for sentence reading: '" + str2 + "'", e);
                    throw e;
                }
            }
            if (lines != null) {
                lines.close();
            }
            return create;
        } catch (Throwable th) {
            if (lines != null) {
                try {
                    lines.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public static Map<String, OffsetMap<String>> readMixedFileForChunkOffsets(String str) throws IOException {
        File file = new File(str);
        HashMap hashMap = new HashMap();
        int i = 1;
        Stream<String> lines = FileUtilities.getReaderFromFile(file).lines();
        try {
            Iterator<String> it = lines.iterator();
            while (it.hasNext()) {
                String[] split = it.next().split("\\t");
                if (split[split.length - 1].contains("Chunk")) {
                    if (split.length != 7) {
                        throw new IllegalArgumentException("Line " + i + ": " + str + " should have exactly seven columns in each line.");
                    }
                    String str2 = split[0];
                    String[] split2 = split[6].split("\\.");
                    String str3 = split2[split2.length - 1];
                    Range between = Range.between(Integer.valueOf(Integer.parseInt(split[2])), Integer.valueOf(Integer.parseInt(split[3])));
                    if (hashMap.containsKey(str2)) {
                        ((OffsetMap) hashMap.get(str2)).put(between, str3);
                    } else {
                        OffsetMap offsetMap = new OffsetMap();
                        offsetMap.put(between, str3);
                        hashMap.put(str2, offsetMap);
                    }
                    i++;
                }
            }
            if (lines != null) {
                lines.close();
            }
            return hashMap;
        } catch (Throwable th) {
            if (lines != null) {
                try {
                    lines.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public static Map<String, OffsetMap<String>> readMixedFileForOntologyClassMentions(String str) throws IOException {
        if (str == null) {
            return Collections.emptyMap();
        }
        File file = new File(str);
        if (!file.exists()) {
            return Collections.emptyMap();
        }
        HashMap hashMap = new HashMap();
        int i = 1;
        Stream<String> lines = FileUtilities.getReaderFromFile(file).lines();
        try {
            Iterator<String> it = lines.iterator();
            while (it.hasNext()) {
                String[] split = it.next().split("\\t");
                if (split[split.length - 1].contains("OntClassMention")) {
                    if (split.length != 7) {
                        throw new IllegalArgumentException("Line " + i + ": " + str + " should have exactly seven columns in each line.");
                    }
                    String str2 = split[0];
                    String str3 = split[1];
                    Range between = Range.between(Integer.valueOf(Integer.parseInt(split[2])), Integer.valueOf(Integer.parseInt(split[3])));
                    if (hashMap.containsKey(str2)) {
                        ((OffsetMap) hashMap.get(str2)).put(between, str3);
                    } else {
                        OffsetMap offsetMap = new OffsetMap();
                        offsetMap.put(between, str3);
                        hashMap.put(str2, offsetMap);
                    }
                    i++;
                }
            }
            if (lines != null) {
                lines.close();
            }
            return hashMap;
        } catch (Throwable th) {
            if (lines != null) {
                try {
                    lines.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public static Multimap<String, MeshHeading> readMeshHeadings(String str) throws IOException {
        HashMultimap create = HashMultimap.create();
        BufferedReader readerFromFile = FileUtilities.getReaderFromFile(new File(str));
        try {
            readerFromFile.lines().map(str2 -> {
                return str2.split("\t");
            }).forEach(strArr -> {
                create.put(strArr[0], new MeshHeading(strArr[1]));
            });
            if (readerFromFile != null) {
                readerFromFile.close();
            }
            return create;
        } catch (Throwable th) {
            if (readerFromFile != null) {
                try {
                    readerFromFile.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public static Multimap<String, CoreferenceSet> readCoreferenceAnnotations(String str) throws IOException {
        HashMultimap create = HashMultimap.create();
        if (str == null) {
            return create;
        }
        File file = new File(str);
        if (!file.exists()) {
            return create;
        }
        Stream<String> lines = FileUtilities.getReaderFromFile(file).lines();
        try {
            Iterator<String> it = lines.iterator();
            CoreferenceSet coreferenceSet = null;
            while (it.hasNext()) {
                String[] split = it.next().split("\\t");
                String str2 = split[0];
                String str3 = split[1];
                int parseInt = Integer.parseInt(split[2]);
                int parseInt2 = Integer.parseInt(split[3]);
                if (str3.startsWith("Ana") || str3.startsWith("Ant")) {
                    String substring = str3.substring(3);
                    if (coreferenceSet == null || !coreferenceSet.getId().equals(substring) || !coreferenceSet.getDocId().equals(str2)) {
                        coreferenceSet = new CoreferenceSet(str2, substring);
                        create.put(str2, coreferenceSet);
                    }
                    coreferenceSet.add(new CoreferenceExpression(Range.between(Integer.valueOf(parseInt), Integer.valueOf(parseInt2))));
                }
            }
            if (lines != null) {
                lines.close();
            }
            return create;
        } catch (Throwable th) {
            if (lines != null) {
                try {
                    lines.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public static Multimap<String, Apposition> readAppositionAnnotations(String str) throws IOException {
        HashMultimap create = HashMultimap.create();
        if (str == null) {
            return create;
        }
        File file = new File(str);
        if (!file.exists()) {
            return create;
        }
        Stream<String> lines = FileUtilities.getReaderFromFile(file).lines();
        try {
            Iterator<String> it = lines.iterator();
            while (it.hasNext()) {
                String[] split = it.next().split("\\t");
                String str2 = split[0];
                int parseInt = Integer.parseInt(split[1]);
                int parseInt2 = Integer.parseInt(split[2]);
                int parseInt3 = Integer.parseInt(split[3]);
                int parseInt4 = Integer.parseInt(split[4]);
                Apposition apposition = new Apposition(Range.between(Integer.valueOf(parseInt), Integer.valueOf(parseInt2)), Apposition.AppositionType.InApposition);
                Apposition apposition2 = new Apposition(Range.between(Integer.valueOf(parseInt3), Integer.valueOf(parseInt4)), Apposition.AppositionType.Appositive);
                apposition.setOther(apposition2);
                apposition2.setOther(apposition);
                create.put(str2, apposition);
                create.put(str2, apposition2);
            }
            if (lines != null) {
                lines.close();
            }
            return create;
        } catch (Throwable th) {
            if (lines != null) {
                try {
                    lines.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }
}
