package banner.tagging;

import banner.tagging.pipe.LChar;
import banner.tagging.pipe.LemmaPOS;
import banner.tagging.pipe.LowerCaseTokenText;
import banner.tagging.pipe.Pretagger;
import banner.tagging.pipe.RChar;
import banner.tagging.pipe.Sentence2TokenSequence;
import banner.tagging.pipe.SimFind;
import banner.tagging.pipe.TokenNumberClass;
import banner.tagging.pipe.TokenWordClass;
import banner.types.Mention;
import banner.types.Sentence;
import cc.mallet.pipe.Noop;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence;
import cc.mallet.pipe.tsf.OffsetConjunctions;
import cc.mallet.pipe.tsf.RegexMatches;
import cc.mallet.pipe.tsf.TokenTextCharNGrams;
import cc.mallet.pipe.tsf.TokenTextCharPrefix;
import cc.mallet.pipe.tsf.TokenTextCharSuffix;
import dragon.nlp.tool.Lemmatiser;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Set;
import java.util.regex.Pattern;

/* loaded from: input_file:banner/tagging/FeatureSet.class */
public class FeatureSet implements Serializable {
    private static final long serialVersionUID = -4591127831978244954L;
    private static String GREEK = "(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)";
    private SerialPipes pipe;

    public FeatureSet(TagFormat tagFormat, Lemmatiser lemmatiser, dragon.nlp.tool.Tagger tagger, Tagger tagger2, String str, Set<Mention.MentionType> set, Sentence.OverlapOption overlapOption, Sentence.OverlapOption overlapOption2) {
        this.pipe = createPipe(tagFormat, lemmatiser, tagger, tagger2, str, set, overlapOption, overlapOption2);
    }

    public void setLemmatiser(Lemmatiser lemmatiser) {
        ((LemmaPOS) this.pipe.getPipe(1)).setLemmatiser(lemmatiser);
    }

    public void setPosTagger(dragon.nlp.tool.Tagger tagger) {
        ((LemmaPOS) this.pipe.getPipe(1)).setPosTagger(tagger);
    }

    public void setPreTagger(Tagger tagger) {
        ((Pretagger) this.pipe.getPipe(2)).setPreTagger(tagger);
    }

    public Pipe getPipe() {
        return this.pipe;
    }

    /* JADX WARN: Type inference failed for: r3v45, types: [int[], int[][]] */
    private SerialPipes createPipe(TagFormat tagFormat, Lemmatiser lemmatiser, dragon.nlp.tool.Tagger tagger, Tagger tagger2, String str, Set<Mention.MentionType> set, Sentence.OverlapOption overlapOption, Sentence.OverlapOption overlapOption2) {
        ArrayList arrayList = new ArrayList();
        arrayList.add(new Sentence2TokenSequence(tagFormat, set, overlapOption, overlapOption2));
        arrayList.add((lemmatiser == null && tagger == null) ? new Noop() : new LemmaPOS(lemmatiser, tagger));
        arrayList.add(tagger2 == null ? new Noop() : new Pretagger("PRETAG=", tagger2));
        arrayList.add(new LChar("LCHAR="));
        arrayList.add(new RChar("RCHAR="));
        arrayList.add(new LowerCaseTokenText("W="));
        arrayList.add(new TokenNumberClass("NC=", false));
        arrayList.add(new TokenNumberClass("BNC=", true));
        arrayList.add(new TokenWordClass("WC=", false));
        arrayList.add(new TokenWordClass("BWC=", true));
        arrayList.add(new RegexMatches("ALPHA", Pattern.compile("[A-Za-z]+")));
        arrayList.add(new RegexMatches("INITCAPS", Pattern.compile("[A-Z].*")));
        arrayList.add(new RegexMatches("UPPER-LOWER", Pattern.compile("[A-Z][a-z].*")));
        arrayList.add(new RegexMatches("LOWER-UPPER", Pattern.compile("[a-z]+[A-Z]+.*")));
        arrayList.add(new RegexMatches("ALLCAPS", Pattern.compile("[A-Z]+")));
        arrayList.add(new RegexMatches("MIXEDCAPS", Pattern.compile("[A-Z][a-z]+[A-Z][A-Za-z]*")));
        arrayList.add(new RegexMatches("SINGLECHAR", Pattern.compile("[A-Za-z]")));
        arrayList.add(new RegexMatches("SINGLEDIGIT", Pattern.compile("[0-9]")));
        arrayList.add(new RegexMatches("DOUBLEDIGIT", Pattern.compile("[0-9][0-9]")));
        arrayList.add(new RegexMatches("NUMBER", Pattern.compile("[0-9,]+")));
        arrayList.add(new RegexMatches("HASDIGIT", Pattern.compile(".*[0-9].*")));
        arrayList.add(new RegexMatches("ALPHANUMERIC", Pattern.compile(".*[0-9].*[A-Za-z].*")));
        arrayList.add(new RegexMatches("ALPHANUMERIC", Pattern.compile(".*[A-Za-z].*[0-9].*")));
        arrayList.add(new RegexMatches("NUMBERS_LETTERS", Pattern.compile("[0-9]+[A-Za-z]+")));
        arrayList.add(new RegexMatches("LETTERS_NUMBERS", Pattern.compile("[A-Za-z]+[0-9]+")));
        arrayList.add(new RegexMatches("HAS_DASH", Pattern.compile(".*-.*")));
        arrayList.add(new RegexMatches("HAS_QUOTE", Pattern.compile(".*'.*")));
        arrayList.add(new RegexMatches("HAS_SLASH", Pattern.compile(".*/.*")));
        arrayList.add(new RegexMatches("REALNUMBER", Pattern.compile("(-|\\+)?[0-9,]+(\\.[0-9]*)?%?")));
        arrayList.add(new RegexMatches("REALNUMBER", Pattern.compile("(-|\\+)?[0-9,]*(\\.[0-9]+)?%?")));
        arrayList.add(new RegexMatches("START_MINUS", Pattern.compile("-.*")));
        arrayList.add(new RegexMatches("START_PLUS", Pattern.compile("\\+.*")));
        arrayList.add(new RegexMatches("END_PERCENT", Pattern.compile(".*%")));
        arrayList.add(new TokenTextCharPrefix("2PREFIX=", 2));
        arrayList.add(new TokenTextCharPrefix("3PREFIX=", 3));
        arrayList.add(new TokenTextCharPrefix("4PREFIX=", 4));
        arrayList.add(new TokenTextCharSuffix("2SUFFIX=", 2));
        arrayList.add(new TokenTextCharSuffix("3SUFFIX=", 3));
        arrayList.add(new TokenTextCharSuffix("4SUFFIX=", 4));
        arrayList.add(new TokenTextCharNGrams("CHARNGRAM=", new int[]{2, 3}, true));
        arrayList.add(new RegexMatches("ROMAN", Pattern.compile("[IVXDLCM]+", 2)));
        arrayList.add(new RegexMatches("GREEK", Pattern.compile(GREEK, 2)));
        arrayList.add(new RegexMatches("ISPUNCT", Pattern.compile("[`~!@#$%^&*()-=_+\\[\\]\\\\{}|;':\\\",./<>?]+")));
        arrayList.add(str == null ? new Noop() : new SimFind(str));
        arrayList.add(new OffsetConjunctions((int[][]) new int[]{new int[]{-2}, new int[]{-1}, new int[]{1}, new int[]{2}}));
        arrayList.add(new TokenSequence2FeatureVectorSequence(true, true));
        return new SerialPipes(arrayList);
    }
}
