package banner.eval;

import banner.eval.dataset.Dataset;
import banner.eval.uima.Util;
import banner.postprocessing.FlattenPostProcessor;
import banner.postprocessing.LocalAbbreviationPostProcessor;
import banner.postprocessing.ParenthesisPostProcessor;
import banner.postprocessing.PostProcessor;
import banner.postprocessing.SequentialPostProcessor;
import banner.tagging.CRFTagger;
import banner.tagging.FeatureSet;
import banner.tagging.TagFormat;
import banner.tagging.dictionary.DictionaryTagger;
import banner.tokenization.Tokenizer;
import banner.types.EntityType;
import banner.types.Mention;
import banner.types.Sentence;
import banner.types.Token;
import banner.util.CollectionsRand;
import banner.util.RankedList;
import dragon.nlp.tool.HeppleTagger;
import dragon.nlp.tool.MedPostTagger;
import dragon.nlp.tool.Tagger;
import dragon.nlp.tool.lemmatiser.EngLemmatiser;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.HierarchicalConfiguration;
import org.apache.commons.configuration.SubnodeConfiguration;
import org.apache.commons.configuration.XMLConfiguration;

/* loaded from: input_file:banner/eval/BANNER.class */
public class BANNER {
    static final /* synthetic */ boolean $assertionsDisabled;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:banner/eval/BANNER$Count.class */
    public static class Count {
        private int count = 0;

        public int getCount() {
            return this.count;
        }

        public void setCount(int i) {
            this.count = i;
        }

        public void incr() {
            this.count++;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:banner/eval/BANNER$DatasetCombiner.class */
    public static class DatasetCombiner extends Dataset {
        public DatasetCombiner(Collection<Dataset> collection) {
            Iterator<Dataset> it = collection.iterator();
            while (it.hasNext()) {
                this.sentences.addAll(it.next().getSentences());
            }
        }

        @Override // banner.eval.dataset.Dataset
        public List<Dataset> split(int i) {
            throw new UnsupportedOperationException();
        }

        @Override // banner.eval.dataset.Dataset
        public void load(HierarchicalConfiguration hierarchicalConfiguration) {
            throw new UnsupportedOperationException();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:banner/eval/BANNER$FontColor.class */
    public enum FontColor {
        Black,
        Blue,
        Green,
        Red,
        Purple;

        @Override // java.lang.Enum
        public String toString() {
            return name().toLowerCase();
        }

        public String changeColor(FontColor fontColor) {
            StringBuffer stringBuffer = new StringBuffer();
            if (!equals(fontColor) && !equals(Black)) {
                stringBuffer.append("</font>");
            }
            stringBuffer.append(" ");
            if (!equals(fontColor) && !fontColor.equals(Black)) {
                stringBuffer.append("<font color=\"" + fontColor.toString() + "\">");
            }
            return stringBuffer.toString();
        }
    }

    /* loaded from: input_file:banner/eval/BANNER$Function.class */
    private enum Function {
        help,
        tag,
        test,
        train,
        eval5by2,
        eval10Fold,
        describeDataset,
        testDict,
        coordEllipsis
    }

    /* loaded from: input_file:banner/eval/BANNER$MatchCriteria.class */
    public enum MatchCriteria {
        Strict,
        Left,
        Right,
        LeftOrRight,
        Approximate,
        Partial
    }

    /* loaded from: input_file:banner/eval/BANNER$Performance.class */
    public static class Performance {
        private PerformanceData overall;
        private Map<EntityType, PerformanceData> perMention;
        private Map<String, PerformanceData> perText;

        public Performance(MatchCriteria matchCriteria) {
            if (matchCriteria != MatchCriteria.Strict) {
                throw new IllegalArgumentException("Not implemented");
            }
            this.overall = new PerformanceData();
            this.perMention = new HashMap();
            this.perText = new HashMap();
        }

        private PerformanceData getMentionPerformanceData(EntityType entityType) {
            PerformanceData performanceData = this.perMention.get(entityType);
            if (performanceData == null) {
                performanceData = new PerformanceData();
                this.perMention.put(entityType, performanceData);
            }
            return performanceData;
        }

        private PerformanceData getTextPerformanceData(String str) {
            PerformanceData performanceData = this.perText.get(str);
            if (performanceData == null) {
                performanceData = new PerformanceData();
                this.perText.put(str, performanceData);
            }
            return performanceData;
        }

        public void update(Sentence sentence, Sentence sentence2) {
            HashSet<Mention> hashSet = new HashSet(sentence.getMentions(Mention.MentionType.Required));
            List<Mention> mentions = sentence.getMentions(Mention.MentionType.Allowed);
            for (Mention mention : sentence2.getMentions(Mention.MentionType.Found)) {
                boolean z = false;
                if (hashSet.contains(mention)) {
                    hashSet.remove(mention);
                    z = true;
                    this.overall.tp++;
                    getMentionPerformanceData(mention.getEntityType()).tp++;
                    getTextPerformanceData(mention.getText()).tp++;
                } else if (mentions.contains(mention)) {
                    z = true;
                    Iterator it = new HashSet(hashSet).iterator();
                    while (it.hasNext()) {
                        Mention mention2 = (Mention) it.next();
                        if (mention.overlaps(mention2)) {
                            hashSet.remove(mention2);
                            this.overall.tp++;
                            getMentionPerformanceData(mention2.getEntityType()).tp++;
                            getTextPerformanceData(mention2.getText()).tp++;
                        }
                    }
                }
                if (!z) {
                    this.overall.fp++;
                    getMentionPerformanceData(mention.getEntityType()).fp++;
                    getTextPerformanceData(mention.getText()).fp++;
                }
            }
            for (Mention mention3 : hashSet) {
                this.overall.fn++;
                getMentionPerformanceData(mention3.getEntityType()).fn++;
                getTextPerformanceData(mention3.getText()).fn++;
            }
        }

        public PerformanceData getOverall() {
            return this.overall;
        }

        public Map<EntityType, PerformanceData> getPerMention() {
            return Collections.unmodifiableMap(this.perMention);
        }

        public Map<String, PerformanceData> getPerText() {
            return Collections.unmodifiableMap(this.perText);
        }

        public void print() {
            System.out.println("OVERALL: ");
            this.overall.print();
            for (EntityType entityType : this.perMention.keySet()) {
                System.out.println();
                System.out.println("TYPE: \"" + entityType.getText() + "\"");
                this.perMention.get(entityType).print();
            }
        }
    }

    /* loaded from: input_file:banner/eval/BANNER$PerformanceData.class */
    public static class PerformanceData {
        int tp = 0;
        int fp = 0;
        int fn = 0;

        public double getPrecision() {
            return this.tp / (this.tp + this.fp);
        }

        public double getRecall() {
            return this.tp / (this.tp + this.fn);
        }

        public double getFMeasure() {
            double precision = getPrecision();
            double recall = getRecall();
            return ((2.0d * precision) * recall) / (precision + recall);
        }

        public void print() {
            System.out.println("TP: " + this.tp);
            System.out.println("FP: " + this.fp);
            System.out.println("FN: " + this.fn);
            System.out.println("precision: " + getPrecision());
            System.out.println("   recall: " + getRecall());
            System.out.println("f-measure: " + getFMeasure());
        }
    }

    public static void main(String[] strArr) throws ConfigurationException, IOException {
        if (strArr.length == 0) {
            System.out.println("Usage: banner.sh <command> <configuration> <parameters>");
            System.out.println("Exceute \"banner.sh help\" for details");
            return;
        }
        switch (Function.valueOf(strArr[0])) {
            case help:
                System.out.println("Commands:");
                System.out.println("help: Prints this help text");
                System.out.println("tag: Uses a trained model to tag sentences from an input file");
                System.out.println("\tUsage: banner.sh tag config.xml sentences.txt");
                System.out.println("\t   Or: banner.sh tag config.xml sentences.txt 0.2");
                System.out.println("test: Evaluates a previously trained model against the test data set up in the config file");
                System.out.println("\tUsage: banner.sh test config.xml");
                System.out.println("\t   Or: banner.sh test config.xml 0.2");
                System.out.println("train: Uses the training data and configuration from the config file to create a new model");
                System.out.println("\tUsage: banner.sh train config.xml");
                System.out.println("\t   Or: banner.sh train config.xml 0.2");
                System.out.println("eval5by2: Performs a 5 by 2 cross-validation on the data set up in the config file");
                System.out.println("\tUsage: banner.sh eval5by2 config.xml");
                System.out.println("\t   Or: banner.sh eval5by2 config.xml 0.2");
                System.out.println("eval10Fold: Performs a 10 fold cross-validation on the data set up in the config file");
                System.out.println("\tUsage: banner.sh eval10Fold config.xml");
                System.out.println("\t   Or: banner.sh eval10Fold config.xml 0.2");
                return;
            case tag:
                tag(new XMLConfiguration(strArr[1]), strArr[2], strArr.length > 3 ? Double.valueOf(strArr[3]) : null);
                return;
            case test:
                test(new XMLConfiguration(strArr[1]));
                return;
            case train:
                train(new XMLConfiguration(strArr[1]), strArr.length > 2 ? Double.valueOf(strArr[2]) : null);
                return;
            case eval5by2:
                eval5by2(new XMLConfiguration(strArr[1]), strArr.length > 2 ? Double.valueOf(strArr[2]) : null);
                return;
            case eval10Fold:
                eval10Fold(new XMLConfiguration(strArr[1]), strArr.length > 2 ? Double.valueOf(strArr[2]) : null);
                return;
            case describeDataset:
                describeDataset(new XMLConfiguration(strArr[1]));
                return;
            case testDict:
                testDict(new XMLConfiguration(strArr[1]));
                return;
            default:
                System.out.println("Unrecognized command \"" + strArr[0] + "\"; use \"help\" for a list of valid commands");
                return;
        }
    }

    private static void tag(HierarchicalConfiguration hierarchicalConfiguration, String str, Double d) throws IOException {
        long currentTimeMillis = System.currentTimeMillis();
        Tokenizer tokenizer = getTokenizer(hierarchicalConfiguration);
        DictionaryTagger dictionary = getDictionary(hierarchicalConfiguration);
        EngLemmatiser lemmatiser = getLemmatiser(hierarchicalConfiguration);
        Tagger posTagger = getPosTagger(hierarchicalConfiguration);
        PostProcessor postProcessor = getPostProcessor(hierarchicalConfiguration);
        String string = hierarchicalConfiguration.configurationAt(BANNER.class.getPackage().getName()).getString("modelFilename");
        System.out.println("Model: " + string);
        CRFTagger load = CRFTagger.load(BANNER.class.getClass().getResourceAsStream(string), lemmatiser, posTagger, dictionary);
        System.out.println("Completed input: " + (System.currentTimeMillis() - currentTimeMillis) + "ms");
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        String readLine = bufferedReader.readLine();
        while (true) {
            String str2 = readLine;
            if (str2 == null) {
                bufferedReader.close();
                return;
            }
            String trim = str2.trim();
            if (trim.length() > 0) {
                String[] split = trim.split("\\t");
                for (Mention mention : process(load, tokenizer, postProcessor, new Sentence(split[0], "", split[1])).getMentions()) {
                    System.out.println(split[0] + "\t" + mention.getEntityType() + "\t" + mention.getStartChar() + "\t" + mention.getEndChar() + "\t" + mention.getText());
                }
            }
            readLine = bufferedReader.readLine();
        }
    }

    public static void train(HierarchicalConfiguration hierarchicalConfiguration, Double d) throws ConfigurationException, IOException {
        long currentTimeMillis = System.currentTimeMillis();
        Dataset dataset = getDataset(hierarchicalConfiguration);
        TagFormat tagFormat = getTagFormat(hierarchicalConfiguration);
        DictionaryTagger dictionary = getDictionary(hierarchicalConfiguration);
        int cRFOrder = getCRFOrder(hierarchicalConfiguration);
        System.out.println("tagformat=" + tagFormat);
        System.out.println("crfOrder=" + cRFOrder);
        EngLemmatiser lemmatiser = getLemmatiser(hierarchicalConfiguration);
        Tagger posTagger = getPosTagger(hierarchicalConfiguration);
        Set<Mention.MentionType> mentionTypes = getMentionTypes(hierarchicalConfiguration);
        Sentence.OverlapOption sameTypeOverlapOption = getSameTypeOverlapOption(hierarchicalConfiguration);
        Sentence.OverlapOption differentTypeOverlapOption = getDifferentTypeOverlapOption(hierarchicalConfiguration);
        String simFindFilename = getSimFindFilename(hierarchicalConfiguration);
        String string = hierarchicalConfiguration.configurationAt(BANNER.class.getPackage().getName()).getString("modelFilename");
        Set<Sentence> sentences = dataset.getSentences();
        if (d != null) {
            sentences = CollectionsRand.randomSubset(sentences, d.doubleValue());
        }
        logInput(sentences, hierarchicalConfiguration);
        System.out.println("Completed input: " + (System.currentTimeMillis() - currentTimeMillis) + "ms");
        System.out.println("Training data loaded, starting training");
        CRFTagger train = CRFTagger.train(sentences, cRFOrder, tagFormat, new FeatureSet(tagFormat, lemmatiser, posTagger, dictionary, simFindFilename, mentionTypes, sameTypeOverlapOption, differentTypeOverlapOption));
        System.out.println("Training complete, saving model");
        train.write(new File(string));
    }

    public static void test(HierarchicalConfiguration hierarchicalConfiguration) throws ConfigurationException, IOException {
        long currentTimeMillis = System.currentTimeMillis();
        Dataset dataset = getDataset(hierarchicalConfiguration);
        DictionaryTagger dictionary = getDictionary(hierarchicalConfiguration);
        EngLemmatiser lemmatiser = getLemmatiser(hierarchicalConfiguration);
        Tagger posTagger = getPosTagger(hierarchicalConfiguration);
        String string = hierarchicalConfiguration.configurationAt(BANNER.class.getPackage().getName()).getString("modelFilename");
        System.out.println("Model: " + string);
        logInput(dataset.getSentences(), hierarchicalConfiguration);
        System.out.println("Completed input: " + (System.currentTimeMillis() - currentTimeMillis));
        test(dataset, CRFTagger.load(new FileInputStream(new File(string)), lemmatiser, posTagger, dictionary), hierarchicalConfiguration).print();
        System.out.println("Elapsed time: " + (System.currentTimeMillis() - currentTimeMillis) + "ms");
    }

    public static void eval5by2(HierarchicalConfiguration hierarchicalConfiguration, Double d) throws ConfigurationException, IOException {
        System.currentTimeMillis();
        Dataset dataset = getDataset(hierarchicalConfiguration);
        Map<EntityType, Integer> typeCounts = dataset.getTypeCounts();
        for (EntityType entityType : typeCounts.keySet()) {
            System.out.println(entityType.toString() + ", count=" + typeCounts.get(entityType));
        }
        TagFormat tagFormat = getTagFormat(hierarchicalConfiguration);
        DictionaryTagger dictionary = getDictionary(hierarchicalConfiguration);
        int cRFOrder = getCRFOrder(hierarchicalConfiguration);
        EngLemmatiser lemmatiser = getLemmatiser(hierarchicalConfiguration);
        Tagger posTagger = getPosTagger(hierarchicalConfiguration);
        Set<Mention.MentionType> mentionTypes = getMentionTypes(hierarchicalConfiguration);
        Sentence.OverlapOption sameTypeOverlapOption = getSameTypeOverlapOption(hierarchicalConfiguration);
        Sentence.OverlapOption differentTypeOverlapOption = getDifferentTypeOverlapOption(hierarchicalConfiguration);
        String simFindFilename = getSimFindFilename(hierarchicalConfiguration);
        String string = hierarchicalConfiguration.configurationAt(BANNER.class.getPackage().getName()).getString("modelFilename");
        for (int i = 0; i < 5; i++) {
            long currentTimeMillis = System.currentTimeMillis();
            List<Dataset> split = dataset.split(2);
            System.out.println("Created folds for run " + i + ": " + (System.currentTimeMillis() - currentTimeMillis));
            int i2 = 0;
            while (i2 < 2) {
                Dataset dataset2 = split.get(i2);
                Dataset dataset3 = split.get(i2 == 0 ? 1 : 0);
                Set<Sentence> sentences = dataset2.getSentences();
                if (d != null) {
                    sentences = CollectionsRand.randomSubset(sentences, d.doubleValue());
                }
                String str = Integer.toString(i) + Integer.toString(i2);
                logInput(sentences, hierarchicalConfiguration, str);
                long currentTimeMillis2 = System.currentTimeMillis();
                System.out.println("\tTraining data loaded, starting training");
                CRFTagger train = CRFTagger.train(sentences, cRFOrder, tagFormat, new FeatureSet(tagFormat, lemmatiser, posTagger, dictionary, simFindFilename, mentionTypes, sameTypeOverlapOption, differentTypeOverlapOption));
                System.out.println("Completed training for run " + i + " cross " + i2 + ": " + (System.currentTimeMillis() - currentTimeMillis2));
                train.write(new File(getFilename(string, str)));
                System.gc();
                long currentTimeMillis3 = System.currentTimeMillis();
                test(dataset3, train, hierarchicalConfiguration, str).print();
                System.out.println("Completed testing for run " + i + " cross " + i2 + ": " + (System.currentTimeMillis() - currentTimeMillis3) + "ms");
                System.gc();
                i2++;
            }
        }
    }

    public static void eval10Fold(HierarchicalConfiguration hierarchicalConfiguration, Double d) throws ConfigurationException, IOException {
        System.currentTimeMillis();
        Dataset dataset = getDataset(hierarchicalConfiguration);
        Map<EntityType, Integer> typeCounts = dataset.getTypeCounts();
        for (EntityType entityType : typeCounts.keySet()) {
            System.out.println(entityType.toString() + ", count=" + typeCounts.get(entityType));
        }
        TagFormat tagFormat = getTagFormat(hierarchicalConfiguration);
        DictionaryTagger dictionary = getDictionary(hierarchicalConfiguration);
        int cRFOrder = getCRFOrder(hierarchicalConfiguration);
        EngLemmatiser lemmatiser = getLemmatiser(hierarchicalConfiguration);
        Tagger posTagger = getPosTagger(hierarchicalConfiguration);
        Set<Mention.MentionType> mentionTypes = getMentionTypes(hierarchicalConfiguration);
        Sentence.OverlapOption sameTypeOverlapOption = getSameTypeOverlapOption(hierarchicalConfiguration);
        Sentence.OverlapOption differentTypeOverlapOption = getDifferentTypeOverlapOption(hierarchicalConfiguration);
        String simFindFilename = getSimFindFilename(hierarchicalConfiguration);
        String string = hierarchicalConfiguration.configurationAt(BANNER.class.getPackage().getName()).getString("modelFilename");
        long currentTimeMillis = System.currentTimeMillis();
        List<Dataset> split = dataset.split(10);
        System.out.println("Created folds: " + (System.currentTimeMillis() - currentTimeMillis));
        for (int i = 0; i < 10; i++) {
            ArrayList arrayList = new ArrayList();
            for (int i2 = 0; i2 < 10; i2++) {
                if (i2 != i) {
                    arrayList.add(split.get(i2));
                }
                System.out.println(split.get(i2).getSentences().size());
            }
            DatasetCombiner datasetCombiner = new DatasetCombiner(arrayList);
            Dataset dataset2 = split.get(i);
            Set<Sentence> sentences = datasetCombiner.getSentences();
            if (d != null) {
                sentences = CollectionsRand.randomSubset(sentences, d.doubleValue());
            }
            logInput(sentences, hierarchicalConfiguration, Integer.toString(i));
            long currentTimeMillis2 = System.currentTimeMillis();
            System.out.println("\tTraining data loaded, starting training");
            CRFTagger train = CRFTagger.train(sentences, cRFOrder, tagFormat, new FeatureSet(tagFormat, lemmatiser, posTagger, dictionary, simFindFilename, mentionTypes, sameTypeOverlapOption, differentTypeOverlapOption));
            System.out.println("Completed training for cross " + i + ": " + (System.currentTimeMillis() - currentTimeMillis2));
            train.write(new File(getFilename(string, Integer.toString(i))));
            System.gc();
            long currentTimeMillis3 = System.currentTimeMillis();
            test(dataset2, train, hierarchicalConfiguration, Integer.toString(i)).print();
            System.out.println("Completed testing for cross " + i + ": " + (System.currentTimeMillis() - currentTimeMillis3));
            System.gc();
        }
    }

    private static void describeDataset(HierarchicalConfiguration hierarchicalConfiguration) throws ConfigurationException, IOException {
        Set<Sentence> sentences = getDataset(hierarchicalConfiguration).getSentences();
        logInput(sentences, hierarchicalConfiguration);
        int i = 0;
        int i2 = 0;
        int[] iArr = new int[100];
        Integer[] numArr = new Integer[15];
        for (int i3 = 0; i3 < numArr.length; i3++) {
            numArr[i3] = new Integer(0);
        }
        Integer[] numArr2 = new Integer[50];
        for (int i4 = 0; i4 < numArr2.length; i4++) {
            numArr2[i4] = new Integer(0);
        }
        HashSet hashSet = new HashSet();
        DictionaryTagger dictionaryTagger = new DictionaryTagger();
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        for (Sentence sentence : sentences) {
            int size = sentence.getTokens().size();
            if (size < iArr.length) {
                iArr[size] = iArr[size] + 1;
            }
            i += size;
            for (Token token : sentence.getTokens()) {
                Count count = (Count) hashMap.get(token.getText());
                if (count == null) {
                    count = new Count();
                    hashMap.put(token.getText(), count);
                }
                count.incr();
            }
            List<Mention> mentions = sentence.getMentions();
            int size2 = mentions.size();
            i2 += size2;
            if (size2 < numArr.length) {
                numArr[size2] = new Integer(numArr[size2].intValue() + 1);
            }
            for (Mention mention : mentions) {
                if (mention.length() < numArr2.length) {
                    int length = mention.length();
                    numArr2[length] = Integer.valueOf(numArr2[length].intValue() + 1);
                }
                hashSet.add(mention.getText());
                ArrayList arrayList = new ArrayList();
                Iterator<Token> it = mention.getTokens().iterator();
                while (it.hasNext()) {
                    arrayList.add(it.next().getText());
                }
                dictionaryTagger.add(arrayList, Collections.singleton(mention.getEntityType()));
                for (Token token2 : mention.getTokens()) {
                    Count count2 = (Count) hashMap2.get(token2.getText());
                    if (count2 == null) {
                        count2 = new Count();
                        hashMap2.put(token2.getText(), count2);
                    }
                    count2.incr();
                }
            }
        }
        System.out.println();
        System.out.println("Number of sentences: " + sentences.size());
        System.out.println("Number of tokens: " + i);
        System.out.println("Number of mentions: " + i2);
        System.out.println("Number of sentences per sentence length: " + Arrays.toString(iArr));
        System.out.println("Number of sentences per mention frequency: " + Arrays.asList(numArr));
        System.out.println("Number of mentions per mention length: " + Arrays.asList(numArr2));
        System.out.println("Number of unique mention texts: " + hashSet.size());
        Iterator<Sentence> it2 = sentences.iterator();
        while (it2.hasNext()) {
            dictionaryTagger.tag(it2.next().copy(true, false));
        }
        System.out.println("Token frequencies:");
        RankedList rankedList = new RankedList(100);
        int[] iArr2 = new int[100];
        int[] iArr3 = new int[100];
        for (String str : hashMap.keySet()) {
            int count3 = ((Count) hashMap.get(str)).getCount();
            int count4 = hashMap2.get(str) == null ? 0 : ((Count) hashMap2.get(str)).getCount();
            int max = Math.max(0, count3 - count4);
            if (max < iArr2.length) {
                iArr2[max] = iArr2[max] + 1;
            }
            if (count4 < iArr3.length) {
                iArr3[count4] = iArr3[count4] + 1;
            }
            if (count3 > 5) {
                rankedList.add(1.0d - (count4 / count3), str);
            }
        }
        System.out.println("Number of tokens which appear in mentions with a specific frequency:" + Arrays.toString(iArr3));
        System.out.println("Number of tokens which appear non mention with a specific frequency:" + Arrays.toString(iArr2));
    }

    public static void testDict(HierarchicalConfiguration hierarchicalConfiguration) throws ConfigurationException, IOException {
        long currentTimeMillis = System.currentTimeMillis();
        Dataset dataset = getDataset(hierarchicalConfiguration);
        DictionaryTagger dictionary = getDictionary(hierarchicalConfiguration);
        logInput(dataset.getSentences(), hierarchicalConfiguration);
        System.out.println("Completed input: " + (System.currentTimeMillis() - currentTimeMillis) + "ms");
        long currentTimeMillis2 = System.currentTimeMillis();
        test(dataset, dictionary, hierarchicalConfiguration).print();
        System.out.println("Completed tagging: " + (System.currentTimeMillis() - currentTimeMillis2));
    }

    public static void logInput(Set<Sentence> set, HierarchicalConfiguration hierarchicalConfiguration) throws IOException {
        logInput(set, hierarchicalConfiguration, null);
    }

    private static void logInput(Set<Sentence> set, HierarchicalConfiguration hierarchicalConfiguration, String str) throws IOException {
        TagFormat tagFormat = getTagFormat(hierarchicalConfiguration);
        SubnodeConfiguration configurationAt = hierarchicalConfiguration.configurationAt(BANNER.class.getPackage().getName());
        String filename = getFilename(configurationAt.getString("idInputFilename"), str);
        String filename2 = getFilename(configurationAt.getString("rawInputFilename"), str);
        String filename3 = getFilename(configurationAt.getString("trainingInputFilename"), str);
        PrintWriter printWriter = new PrintWriter(new BufferedWriter(new FileWriter(filename)));
        PrintWriter printWriter2 = new PrintWriter(new BufferedWriter(new FileWriter(filename2)));
        PrintWriter printWriter3 = new PrintWriter(new BufferedWriter(new FileWriter(filename3)));
        for (Sentence sentence : set) {
            printWriter.println(sentence.getSentenceId());
            printWriter2.println(sentence.getText());
            printWriter3.println(getTrainingText(sentence, tagFormat, EnumSet.of(Mention.MentionType.Required), Sentence.OverlapOption.Raw, Sentence.OverlapOption.Raw));
        }
        printWriter.close();
        printWriter2.close();
        printWriter3.close();
    }

    public static String getTrainingText(Sentence sentence, TagFormat tagFormat, Set<Mention.MentionType> set, Sentence.OverlapOption overlapOption, Sentence.OverlapOption overlapOption2) {
        StringBuilder sb = new StringBuilder();
        List<String> tokenLabels = sentence.getTokenLabels(tagFormat, set, overlapOption, overlapOption2);
        List<Token> tokens = sentence.getTokens();
        for (int i = 0; i < tokens.size(); i++) {
            sb.append(tokens.get(i).getText());
            sb.append("|");
            sb.append(tokenLabels.get(i));
            sb.append(" ");
        }
        return sb.toString().trim();
    }

    private static String getFilename(String str, String str2) {
        if (str == null) {
            return null;
        }
        if (str2 == null) {
            return str;
        }
        int lastIndexOf = str.lastIndexOf(".");
        String str3 = str;
        String str4 = "";
        if (lastIndexOf != -1) {
            str3 = str.substring(0, lastIndexOf);
            str4 = str.substring(lastIndexOf);
        }
        return str3 + str2 + str4;
    }

    public static void outputMentions(Sentence sentence, PrintWriter printWriter, boolean z, boolean z2) {
        if (!z) {
            for (Mention mention : sentence.getMentions(Mention.MentionType.Found)) {
                printWriter.print(sentence.getSentenceId());
                printWriter.print("|");
                printWriter.print(mention.getStartChar(z2));
                printWriter.print(" ");
                printWriter.print(mention.getEndChar(z2));
                printWriter.print("|");
                printWriter.println(mention.getText());
            }
            return;
        }
        List<Token> tokens = sentence.getTokens();
        int i = 0;
        for (int i2 = 0; i2 < tokens.size(); i2++) {
            List<Mention> mentions = sentence.getMentions(tokens.get(i2), EnumSet.of(Mention.MentionType.Required));
            if (!$assertionsDisabled && mentions.size() != 0 && mentions.size() != 1) {
                throw new AssertionError();
            }
            Mention mention2 = mentions.size() > 0 ? mentions.get(0) : null;
            if (mention2 != null && i2 == mention2.getStart()) {
                printWriter.print(sentence.getSentenceId());
                printWriter.print("|");
                printWriter.print(i);
                printWriter.print(" ");
            }
            i += tokens.get(i2).length();
            if (mention2 != null && i2 == mention2.getEnd() - 1) {
                printWriter.print(i - 1);
                printWriter.print("|");
                printWriter.println(mention2.getText());
            }
        }
    }

    public static Performance test(Dataset dataset, banner.tagging.Tagger tagger, HierarchicalConfiguration hierarchicalConfiguration) throws IOException {
        return test(dataset, tagger, hierarchicalConfiguration, null);
    }

    public static Performance test(Dataset dataset, banner.tagging.Tagger tagger, HierarchicalConfiguration hierarchicalConfiguration, String str) throws IOException {
        TagFormat tagFormat = getTagFormat(hierarchicalConfiguration);
        Tokenizer tokenizer = getTokenizer(hierarchicalConfiguration);
        PostProcessor postProcessor = getPostProcessor(hierarchicalConfiguration);
        SubnodeConfiguration configurationAt = hierarchicalConfiguration.configurationAt(BANNER.class.getPackage().getName());
        String filename = getFilename(configurationAt.getString("outputFilename"), str);
        String filename2 = getFilename(configurationAt.getString("mentionFilename"), str);
        String filename3 = getFilename(configurationAt.getString("inContextAnalysisFilename"), str);
        PrintWriter printWriter = new PrintWriter(new BufferedWriter(new FileWriter(filename)));
        PrintWriter printWriter2 = new PrintWriter(new BufferedWriter(new FileWriter(filename2)));
        PrintWriter printWriter3 = filename3 != null ? new PrintWriter(new BufferedWriter(new FileWriter(filename3))) : null;
        System.out.println("\tTagging sentences");
        if (printWriter3 != null) {
            printWriter3.println("<html><body>");
        }
        int i = 0;
        Performance performance = new Performance(MatchCriteria.Strict);
        try {
            for (Sentence sentence : dataset.getSentences()) {
                if (i % 1000 == 0) {
                    System.out.println(i);
                }
                Sentence process = process(tagger, tokenizer, postProcessor, sentence);
                printWriter.println(getTrainingText(process, tagFormat, EnumSet.of(Mention.MentionType.Required), Sentence.OverlapOption.Raw, Sentence.OverlapOption.Raw));
                outputMentions(process, printWriter2, false, true);
                if (printWriter3 != null) {
                    outputAnalysis(sentence, process, printWriter3, false);
                }
                performance.update(sentence, process);
                i++;
            }
            return performance;
        } finally {
            printWriter.close();
            printWriter2.close();
            if (printWriter3 != null) {
                printWriter3.println("</body></html>");
                printWriter3.close();
            }
        }
    }

    public static Sentence process(banner.tagging.Tagger tagger, Tokenizer tokenizer, PostProcessor postProcessor, Sentence sentence) {
        Sentence copy = sentence.copy(false, false);
        tokenizer.tokenize(copy);
        tagger.tag(copy);
        postProcessor.postProcess(copy);
        return copy;
    }

    private static void outputAnalysis(Sentence sentence, Sentence sentence2, PrintWriter printWriter, boolean z) {
        FontColor fontColor;
        Sentence copy = sentence.copy(true, true);
        new FlattenPostProcessor(FlattenPostProcessor.FlattenType.Union).postProcess(copy);
        List<Mention> mentions = copy.getMentions(Mention.MentionType.Allowed);
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        HashSet hashSet3 = new HashSet();
        hashSet3.addAll(copy.getMentions(Mention.MentionType.Required));
        for (Mention mention : sentence2.getMentions(Mention.MentionType.Required)) {
            boolean z2 = false;
            if (hashSet3.contains(mention)) {
                hashSet3.remove(mention);
                hashSet.add(mention);
                z2 = true;
            } else if (mentions.contains(mention)) {
                hashSet.add(mention);
                z2 = true;
                Iterator it = new HashSet(hashSet3).iterator();
                while (it.hasNext()) {
                    Mention mention2 = (Mention) it.next();
                    if (mention.overlaps(mention2)) {
                        hashSet3.remove(mention2);
                    }
                }
            }
            if (!z2) {
                hashSet2.add(mention);
            }
        }
        boolean z3 = false;
        StringBuffer stringBuffer = new StringBuffer(sentence2.getSentenceId());
        FontColor fontColor2 = FontColor.Black;
        List<Token> tokens = sentence2.getTokens();
        for (int i = 0; i < tokens.size(); i++) {
            boolean z4 = false;
            Iterator it2 = hashSet.iterator();
            while (it2.hasNext()) {
                z4 |= ((Mention) it2.next()).contains(i);
            }
            boolean z5 = false;
            Iterator it3 = hashSet2.iterator();
            while (it3.hasNext()) {
                z5 |= ((Mention) it3.next()).contains(i);
            }
            boolean z6 = false;
            Iterator it4 = hashSet3.iterator();
            while (it4.hasNext()) {
                z6 |= ((Mention) it4.next()).contains(i);
            }
            z3 |= z6 || z5;
            if (z4) {
                if (z5 || z6) {
                    System.out.println("=============");
                    System.out.println("inFoundIncorrect: " + z5);
                    System.out.println("inNotFound: " + z6);
                    System.out.println(sentence2.getSentenceId());
                    System.out.println(sentence2.getText());
                    Mention mention3 = sentence2.getMentions(tokens.get(i), EnumSet.of(Mention.MentionType.Required)).get(0);
                    System.out.println("badMention: " + mention3);
                    System.out.println("sentenceFound.getMentions().contains(): " + sentence2.getMentions(Mention.MentionType.Required).contains(mention3));
                    System.out.println("mentionsRequired.contains(): " + sentence.getMentions(Mention.MentionType.Required).contains(mention3));
                    System.out.println("mentionsAllowed.contains(): " + mentions.contains(mention3));
                    System.out.println("mentionsFoundCorrect.contains(): " + hashSet.contains(mention3));
                    System.out.println("mentionsFoundIncorrect.contains(): " + hashSet2.contains(mention3));
                    System.out.println("mentionsNotFound.contains(): " + hashSet3.contains(mention3));
                    System.out.println("sentenceFound.getMentions(): " + sentence2.getMentions(Mention.MentionType.Required));
                    System.out.println("mentionsFoundCorrect: " + hashSet);
                    System.out.println("mentionsFoundIncorrect: " + hashSet2);
                    System.out.println("mentionsNotFound: " + hashSet3);
                    System.out.println("=============");
                }
                if (!$assertionsDisabled && z5) {
                    throw new AssertionError();
                }
                if (!$assertionsDisabled && z6) {
                    throw new AssertionError();
                }
                stringBuffer.append(fontColor2.changeColor(FontColor.Green));
                fontColor = FontColor.Green;
            } else if (z5 && z6) {
                stringBuffer.append(fontColor2.changeColor(FontColor.Purple));
                fontColor = FontColor.Purple;
            } else if (z5) {
                stringBuffer.append(fontColor2.changeColor(FontColor.Red));
                fontColor = FontColor.Red;
            } else if (z6) {
                stringBuffer.append(fontColor2.changeColor(FontColor.Blue));
                fontColor = FontColor.Blue;
            } else {
                stringBuffer.append(fontColor2.changeColor(FontColor.Black));
                fontColor = FontColor.Black;
            }
            fontColor2 = fontColor;
            stringBuffer.append(tokens.get(i).getText());
        }
        stringBuffer.append(fontColor2.changeColor(FontColor.Black));
        stringBuffer.append("<br>");
        if (z3 || z) {
            printWriter.println(stringBuffer);
        }
    }

    public static Dataset getDataset(HierarchicalConfiguration hierarchicalConfiguration) {
        Tokenizer tokenizer = getTokenizer(hierarchicalConfiguration);
        try {
            Dataset dataset = (Dataset) Class.forName(hierarchicalConfiguration.configurationAt(BANNER.class.getPackage().getName()).getString("datasetName")).newInstance();
            dataset.setTokenizer(tokenizer);
            dataset.load(hierarchicalConfiguration);
            return dataset;
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    private static TagFormat getTagFormat(HierarchicalConfiguration hierarchicalConfiguration) {
        return TagFormat.valueOf(hierarchicalConfiguration.configurationAt(BANNER.class.getPackage().getName()).getString("tagFormat"));
    }

    public static Tokenizer getTokenizer(HierarchicalConfiguration hierarchicalConfiguration) {
        try {
            String string = hierarchicalConfiguration.configurationAt(BANNER.class.getPackage().getName()).getString("tokenizer");
            System.out.println("reading tokenizer: " + string);
            return (Tokenizer) Class.forName(string).newInstance();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public static DictionaryTagger getDictionary(HierarchicalConfiguration hierarchicalConfiguration) {
        Tokenizer tokenizer = getTokenizer(hierarchicalConfiguration);
        String string = hierarchicalConfiguration.configurationAt(BANNER.class.getPackage().getName()).getString("dictionaryTagger");
        if (string == null) {
            return null;
        }
        try {
            DictionaryTagger dictionaryTagger = (DictionaryTagger) Class.forName(string).newInstance();
            dictionaryTagger.configure(hierarchicalConfiguration, tokenizer);
            dictionaryTagger.load(hierarchicalConfiguration);
            return dictionaryTagger;
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public static PostProcessor getPostProcessor(HierarchicalConfiguration hierarchicalConfiguration) {
        SubnodeConfiguration configurationAt = hierarchicalConfiguration.configurationAt(BANNER.class.getPackage().getName());
        SequentialPostProcessor sequentialPostProcessor = new SequentialPostProcessor();
        if (configurationAt.containsKey("useParenthesisPostProcessing") && configurationAt.getBoolean("useParenthesisPostProcessing")) {
            sequentialPostProcessor.addPostProcessor(new ParenthesisPostProcessor());
        }
        if (configurationAt.containsKey("useLocalAbbreviationPostProcessing") && configurationAt.getBoolean("useLocalAbbreviationPostProcessing")) {
            sequentialPostProcessor.addPostProcessor(new LocalAbbreviationPostProcessor());
        }
        return sequentialPostProcessor;
    }

    private static int getCRFOrder(HierarchicalConfiguration hierarchicalConfiguration) {
        return hierarchicalConfiguration.configurationAt(BANNER.class.getPackage().getName()).getInt("crfOrder");
    }

    public static Tagger getPosTagger(HierarchicalConfiguration hierarchicalConfiguration) {
        SubnodeConfiguration configurationAt = hierarchicalConfiguration.configurationAt(BANNER.class.getPackage().getName());
        String string = configurationAt.getString("posTagger");
        if (string == null) {
            return null;
        }
        String string2 = configurationAt.getString("posTaggerDataDirectory");
        if (string2 == null) {
            throw new IllegalArgumentException("Must specify data directory for POS tagger");
        }
        if (string.equals(HeppleTagger.class.getName())) {
            return new HeppleTagger(new Util().getFile(string2));
        }
        if (string.equals(MedPostTagger.class.getName())) {
            return new MedPostTagger(string2);
        }
        throw new IllegalArgumentException("Unknown POS tagger type: " + string);
    }

    public static EngLemmatiser getLemmatiser(HierarchicalConfiguration hierarchicalConfiguration) {
        hierarchicalConfiguration.configurationAt(BANNER.class.getPackage().getName());
        String file = Thread.currentThread().getContextClassLoader().getResource("nlpdata/lemmatiser").getFile();
        System.out.println(file + " Exists?: " + new File(file + "/adj.exec").exists());
        if (file == null) {
            return null;
        }
        return new EngLemmatiser(file, false, true);
    }

    public static String getSimFindFilename(HierarchicalConfiguration hierarchicalConfiguration) {
        return hierarchicalConfiguration.configurationAt(BANNER.class.getPackage().getName()).getString("simFindFilename");
    }

    private static Set<Mention.MentionType> getMentionTypes(HierarchicalConfiguration hierarchicalConfiguration) {
        String string = hierarchicalConfiguration.configurationAt(BANNER.class.getPackage().getName()).getString("mentionTypes");
        if (string == null) {
            throw new RuntimeException("Configuration must contain parameter \"mentionTypes\"");
        }
        HashSet hashSet = new HashSet();
        for (String str : string.split("\\s+")) {
            hashSet.add(Mention.MentionType.valueOf(str));
        }
        return EnumSet.copyOf((Collection) hashSet);
    }

    private static Sentence.OverlapOption getSameTypeOverlapOption(HierarchicalConfiguration hierarchicalConfiguration) {
        String string = hierarchicalConfiguration.configurationAt(BANNER.class.getPackage().getName()).getString("sameTypeOverlapOption");
        if (string == null) {
            throw new RuntimeException("Configuration must contain parameter \"sameTypeOverlapOption\"");
        }
        return Sentence.OverlapOption.valueOf(string);
    }

    private static Sentence.OverlapOption getDifferentTypeOverlapOption(HierarchicalConfiguration hierarchicalConfiguration) {
        String string = hierarchicalConfiguration.configurationAt(BANNER.class.getPackage().getName()).getString("differentTypeOverlapOption");
        if (string == null) {
            throw new RuntimeException("Configuration must contain parameter \"differentTypeOverlapOption\"");
        }
        return Sentence.OverlapOption.valueOf(string);
    }

    static {
        $assertionsDisabled = !BANNER.class.desiredAssertionStatus();
    }
}
