package banner.eval.dataset;

import banner.eval.dataset.Dataset;
import banner.tokenization.Tokenizer;
import banner.types.EntityType;
import banner.types.Mention;
import banner.types.Sentence;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import org.apache.commons.configuration.HierarchicalConfiguration;
import org.apache.commons.configuration.SubnodeConfiguration;

/* loaded from: input_file:banner/eval/dataset/AZDCDataset.class */
public class AZDCDataset extends Dataset {
    static final /* synthetic */ boolean $assertionsDisabled;

    public AZDCDataset(Tokenizer tokenizer) {
        this.tokenizer = tokenizer;
    }

    public AZDCDataset() {
    }

    @Override // banner.eval.dataset.Dataset
    public void load(HierarchicalConfiguration hierarchicalConfiguration) {
        SubnodeConfiguration configurationAt = hierarchicalConfiguration.configurationAt(getClass().getPackage().getName());
        String string = configurationAt.getString("sentenceFilename");
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(configurationAt.getString("mentionTestFilename")));
            HashMap<String, LinkedList<Dataset.Tag>> tags = getTags(bufferedReader);
            bufferedReader.close();
            Map<String, Set<String>> hashMap = new HashMap<>();
            Map<String, Set<String>> hashMap2 = new HashMap<>();
            BufferedReader bufferedReader2 = new BufferedReader(new FileReader(string));
            bufferedReader2.readLine();
            for (String readLine = bufferedReader2.readLine(); readLine != null; readLine = bufferedReader2.readLine()) {
                String[] split = readLine.split("\\t");
                this.sentences.add(getSentence(split[0] + "-" + split[1] + "-" + split[2], split[1], split[3], this.tokenizer, tags, hashMap, hashMap2));
            }
            bufferedReader2.close();
            int[] iArr = new int[20];
            for (String str : hashMap.keySet()) {
                int size = hashMap.get(str).size();
                iArr[size] = iArr[size] + 1;
                if (hashMap.get(str).size() > 2) {
                    System.out.println("Name " + str + " has " + hashMap.get(str).size() + " IDs: " + hashMap.get(str));
                }
            }
            System.out.print("idCountForName: [");
            for (int i : iArr) {
                System.out.print(i + ", ");
            }
            System.out.println("]");
            int[] iArr2 = new int[25];
            for (String str2 : hashMap2.keySet()) {
                if (!str2.equals("[]")) {
                    int size2 = hashMap2.get(str2).size();
                    iArr2[size2] = iArr2[size2] + 1;
                    if (hashMap2.get(str2).size() > 5) {
                        System.out.println("ID " + str2 + " has " + hashMap2.get(str2).size() + " names: " + hashMap2.get(str2));
                    }
                }
            }
            System.out.print("nameCountForId: [");
            for (int i2 : iArr2) {
                System.out.print(i2 + ", ");
            }
            System.out.println("]");
            System.out.println("Number of names without annotations: " + hashMap2.get("[]").size());
            System.out.println("Number of ids represented: " + hashMap2.size());
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    protected HashMap<String, LinkedList<Dataset.Tag>> getTags(BufferedReader bufferedReader) throws IOException {
        HashMap<String, LinkedList<Dataset.Tag>> hashMap = new HashMap<>();
        bufferedReader.readLine();
        String readLine = bufferedReader.readLine();
        int i = 0;
        while (readLine != null) {
            String[] split = readLine.split("\\t");
            if ((((((split.length >= 6) && split[4].length() > 0) && split[5].length() > 0) && split[4].matches("\\d+")) && split[5].matches("\\d+")) && Integer.parseInt(split[5]) > Integer.parseInt(split[4])) {
                String str = split[0] + "-" + split[1] + "-" + split[2];
                LinkedList<Dataset.Tag> linkedList = hashMap.get(str);
                if (linkedList == null) {
                    linkedList = new LinkedList<>();
                }
                Dataset.Tag tag = new Dataset.Tag(EntityType.getType("DISE"), Integer.parseInt(split[4]), Integer.parseInt(split[5]));
                Iterator<Dataset.Tag> it = linkedList.iterator();
                boolean z = true;
                while (it.hasNext() && z) {
                    Dataset.Tag next = it.next();
                    if (tag.contains(next)) {
                        it.remove();
                    } else if (next.contains(tag)) {
                        z = false;
                    }
                }
                if (z) {
                    if (split.length >= 10) {
                        for (String str2 : split[9].split("[\\s-,\\[\\]]+")) {
                            if (str2.length() > 0) {
                                tag.addId(str2);
                            }
                        }
                    }
                    linkedList.add(tag);
                    hashMap.put(str, linkedList);
                }
            }
            readLine = bufferedReader.readLine();
            i++;
        }
        return hashMap;
    }

    protected Sentence getSentence(String str, String str2, String str3, Tokenizer tokenizer, HashMap<String, LinkedList<Dataset.Tag>> hashMap, Map<String, Set<String>> map, Map<String, Set<String>> map2) {
        Sentence sentence = new Sentence(str, str2, str3);
        tokenizer.tokenize(sentence);
        LinkedList<Dataset.Tag> linkedList = hashMap.get(str);
        if (linkedList != null) {
            Iterator<Dataset.Tag> it = linkedList.iterator();
            while (it.hasNext()) {
                Dataset.Tag next = it.next();
                int i = next.start - 1;
                int i2 = next.end;
                int tokenIndex = sentence.getTokenIndex(i, true);
                if (!$assertionsDisabled && tokenIndex < 0) {
                    throw new AssertionError();
                }
                int tokenIndex2 = sentence.getTokenIndex(i2, false);
                if (!$assertionsDisabled && tokenIndex2 <= tokenIndex) {
                    throw new AssertionError();
                }
                Mention mention = new Mention(sentence, tokenIndex, tokenIndex2, next.type, Mention.MentionType.Required);
                sentence.addMention(mention);
                String lowerCase = mention.getText().toLowerCase();
                String obj = next.getIds().toString();
                Set<String> set = map.get(lowerCase);
                if (set == null) {
                    set = new HashSet();
                    map.put(lowerCase, set);
                }
                set.add(obj);
                Set<String> set2 = map2.get(obj);
                if (set2 == null) {
                    set2 = new HashSet();
                    map2.put(obj, set2);
                }
                set2.add(lowerCase);
            }
        }
        return sentence;
    }

    @Override // banner.eval.dataset.Dataset
    public List<Dataset> split(int i) {
        ArrayList arrayList = new ArrayList();
        for (int i2 = 0; i2 < i; i2++) {
            arrayList.add(new AZDCDataset(this.tokenizer));
        }
        ArrayList arrayList2 = new ArrayList();
        for (int i3 = 0; i3 < i; i3++) {
            arrayList2.add(new HashSet());
        }
        Random random = new Random();
        for (Sentence sentence : this.sentences) {
            String documentId = sentence.getDocumentId();
            int i4 = -1;
            for (int i5 = 0; i5 < i && i4 == -1; i5++) {
                if (((Set) arrayList2.get(i5)).contains(documentId)) {
                    i4 = i5;
                }
            }
            if (i4 == -1) {
                i4 = random.nextInt(i);
            }
            ((Dataset) arrayList.get(i4)).sentences.add(sentence);
            ((Set) arrayList2.get(i4)).add(documentId);
        }
        return arrayList;
    }

    static {
        $assertionsDisabled = !AZDCDataset.class.desiredAssertionStatus();
    }
}
