package banner.tagging.dictionary;

import banner.tagging.Tagger;
import banner.tokenization.Tokenizer;
import banner.types.EntityType;
import banner.types.Mention;
import banner.types.Sentence;
import banner.types.Token;
import banner.util.Trie;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Scanner;
import java.util.Set;
import org.apache.commons.configuration.HierarchicalConfiguration;
import org.apache.commons.configuration.SubnodeConfiguration;

/* loaded from: input_file:banner/tagging/dictionary/DictionaryTagger.class */
public class DictionaryTagger implements Tagger {
    private Tokenizer tokenizer;
    private boolean filterContainedMentions;
    protected Trie<String, Set<EntityType>> entities = new Trie<>();
    protected Trie<String, Boolean> notInclude = new Trie<>();
    private boolean normalizeMixedCase;
    private boolean normalizeDigits;
    private boolean generate2PartVariations;
    private boolean dropEndParentheticals;

    public void configure(HierarchicalConfiguration hierarchicalConfiguration, Tokenizer tokenizer) {
        SubnodeConfiguration configurationAt = hierarchicalConfiguration.configurationAt(getClass().getName());
        this.filterContainedMentions = configurationAt.getBoolean("filterContainedMentions", false);
        this.normalizeMixedCase = configurationAt.getBoolean("normalizeMixedCase", false);
        this.normalizeDigits = configurationAt.getBoolean("normalizeDigits", false);
        this.generate2PartVariations = configurationAt.getBoolean("generate2PartVariations", false);
        this.dropEndParentheticals = configurationAt.getBoolean("dropEndParentheticals", false);
        this.tokenizer = tokenizer;
    }

    public void load(HierarchicalConfiguration hierarchicalConfiguration) throws IOException {
        SubnodeConfiguration configurationAt = hierarchicalConfiguration.configurationAt(getClass().getName());
        String string = configurationAt.getString("dictionaryFile");
        if (string == null) {
            throw new IllegalArgumentException("Must specify dictionary filename");
        }
        String string2 = configurationAt.getString("dictionaryType");
        if (string2 == null) {
            throw new IllegalArgumentException("Must specify dictionary type");
        }
        String string3 = configurationAt.getString("delimiter");
        int i = configurationAt.getInt("column", -1);
        if (string3 != null && i == -1) {
            throw new IllegalArgumentException("Must specify column if delimiter specified");
        }
        EntityType type = EntityType.getType(string2);
        InputStream resourceAsStream = getClass().getResourceAsStream(string);
        if (null == resourceAsStream) {
            throw new IllegalArgumentException("Could not find dictionary at " + string);
        }
        Scanner scanner = new Scanner(resourceAsStream);
        try {
            Scanner useDelimiter = scanner.useDelimiter("\\A");
            while (useDelimiter.hasNext()) {
                String trim = useDelimiter.nextLine().trim();
                if (trim.length() > 0) {
                    if (string3 == null) {
                        add(trim, type);
                    } else {
                        add(trim.split(string3)[i], type);
                    }
                }
            }
            scanner.close();
        } catch (Throwable th) {
            try {
                scanner.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }

    protected List<String> process(String str) {
        if (str == null) {
            throw new IllegalArgumentException();
        }
        List<String> tokens = this.tokenizer.getTokens(str);
        for (int i = 0; i < tokens.size(); i++) {
            tokens.set(i, transform(tokens.get(i)));
        }
        return tokens;
    }

    protected String transform(String str) {
        String str2 = str;
        if (this.normalizeMixedCase || this.normalizeDigits) {
            char[] charArray = str.toCharArray();
            if (this.normalizeMixedCase) {
                boolean z = false;
                boolean z2 = false;
                for (int i = 0; i < charArray.length && (!z || !z2); i++) {
                    z |= Character.isUpperCase(charArray[i]);
                    z2 |= Character.isLowerCase(charArray[i]);
                }
                if (z && z2) {
                    for (int i2 = 0; i2 < charArray.length; i2++) {
                        charArray[i2] = Character.toLowerCase(charArray[i2]);
                    }
                }
            }
            if (this.normalizeDigits) {
                for (int i3 = 0; i3 < charArray.length; i3++) {
                    if (Character.isDigit(charArray[i3])) {
                        charArray[i3] = '0';
                    }
                }
            }
            str2 = new String(charArray);
        }
        return str2;
    }

    public void add(String str, EntityType entityType) {
        add(str, Collections.singleton(entityType));
    }

    public void add(String str, Collection<EntityType> collection) {
        List<String> process = process(str);
        add(process, collection);
        if (this.generate2PartVariations) {
            if (process.size() == 1 && process.get(0).matches("[A-Za-z]+[0-9]+")) {
                int i = 0;
                String str2 = process.get(0);
                while (Character.isLetter(str2.charAt(i))) {
                    i++;
                }
                add2Part(str2.substring(0, i), str2.substring(i, str2.length()), collection);
            }
            if (process.size() == 2) {
                add2Part(process.get(0), process.get(1), collection);
            }
            if (process.size() == 3) {
                if (process.get(1).equals("-") || process.get(1).equals("/")) {
                    add2Part(process.get(0), process.get(2), collection);
                }
            }
        }
    }

    private void add2Part(String str, String str2, Collection<EntityType> collection) {
        ArrayList arrayList = new ArrayList();
        arrayList.add(str + str2);
        arrayList.add(str2);
        add(arrayList, collection);
        ArrayList arrayList2 = new ArrayList();
        arrayList2.add(str);
        arrayList2.add(str2);
        add(arrayList2, collection);
        arrayList2.add(1, "-");
        add(arrayList2, collection);
        arrayList2.set(1, "/");
        add(arrayList2, collection);
    }

    public boolean add(List<String> list, Collection<EntityType> collection) {
        if (list.size() == 0) {
            throw new IllegalArgumentException("Number of tokens must be greater than zero");
        }
        if (this.notInclude.getValue(list) != null) {
            return false;
        }
        if (this.dropEndParentheticals && list.get(list.size() - 1).equals(")")) {
            int size = list.size() - 1;
            while (size > 0 && !list.get(size).equals("(")) {
                size--;
            }
            if (size <= 0) {
                return false;
            }
            list = list.subList(0, size);
        }
        Set<EntityType> value = this.entities.getValue(list);
        if (value == null) {
            value = new HashSet(1);
            this.entities.add(list, value);
        }
        return value.addAll(collection);
    }

    @Override // banner.tagging.Tagger
    public void tag(Sentence sentence) {
        List<Token> tokens = sentence.getTokens();
        LinkedList linkedList = new LinkedList();
        for (int i = 0; i < tokens.size(); i++) {
            Trie<String, Set<EntityType>> trie = this.entities;
            for (int i2 = i; i2 < tokens.size() && trie != null; i2++) {
                Set<EntityType> value = trie.getValue();
                if (value != null) {
                    Iterator<EntityType> it = value.iterator();
                    while (it.hasNext()) {
                        linkedList.add(new Mention(sentence, i, i2, it.next(), Mention.MentionType.Found));
                    }
                }
                trie = trie.getChild(transform(tokens.get(i2).getText()));
            }
        }
        if (!this.filterContainedMentions) {
            Iterator it2 = linkedList.iterator();
            while (it2.hasNext()) {
                sentence.addMention((Mention) it2.next());
            }
            return;
        }
        while (!linkedList.isEmpty()) {
            Mention mention = (Mention) linkedList.remove(0);
            int start = mention.getStart();
            int end = mention.getEnd();
            ArrayList arrayList = new ArrayList();
            Iterator it3 = linkedList.iterator();
            boolean z = true;
            while (z) {
                z = false;
                while (it3.hasNext()) {
                    Mention mention2 = (Mention) it3.next();
                    boolean z2 = end >= mention2.getStart() && start <= mention2.getEnd();
                    if (mention.getEntityType().equals(mention2.getEntityType()) && z2) {
                        arrayList.add(mention2);
                        it3.remove();
                        start = Math.min(start, mention2.getStart());
                        end = Math.max(end, mention2.getEnd());
                        z = true;
                    }
                }
            }
            sentence.addMention(new Mention(sentence, start, end, mention.getEntityType(), Mention.MentionType.Found));
        }
    }

    public void suppress(String str) {
        this.notInclude.add(process(str), Boolean.TRUE);
    }

    public int size() {
        return this.entities.size();
    }

    public Tokenizer getTokenizer() {
        return this.tokenizer;
    }

    public void setTokenizer(Tokenizer tokenizer) {
        this.tokenizer = tokenizer;
    }

    public boolean isFilterContainedMentions() {
        return this.filterContainedMentions;
    }

    public void setFilterContainedMentions(boolean z) {
        this.filterContainedMentions = z;
    }

    public boolean isNormalizeMixedCase() {
        return this.normalizeMixedCase;
    }

    public void setNormalizeMixedCase(boolean z) {
        this.normalizeMixedCase = z;
    }

    public boolean isNormalizeDigits() {
        return this.normalizeDigits;
    }

    public void setNormalizeDigits(boolean z) {
        this.normalizeDigits = z;
    }

    public boolean isGenerate2PartVariations() {
        return this.generate2PartVariations;
    }

    public void setGenerate2PartVariations(boolean z) {
        this.generate2PartVariations = z;
    }

    public boolean isDropEndParentheticals() {
        return this.dropEndParentheticals;
    }

    public void setDropEndParentheticals(boolean z) {
        this.dropEndParentheticals = z;
    }
}
