package de.julielab.jnet.tagger;

import cc.mallet.pipe.Pipe;
import cc.mallet.types.Alphabet;
import cc.mallet.types.Instance;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.types.LabelSequence;
import cc.mallet.types.Token;
import cc.mallet.types.TokenSequence;
import com.uea.stemmer.UEALite;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Properties;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.tartarus.snowball.SnowballStemmer;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:de/julielab/jnet/tagger/BasePipe.class */
public class BasePipe extends Pipe {
    private static final String UNICODE_LOWER = "\\p{Ll}";
    private static final String UNICODE_UPPER = "\\p{Lu}";
    private static final long serialVersionUID = 24;
    static Logger LOGGER = LoggerFactory.getLogger(BasePipe.class);
    Properties featureConfig;
    boolean pluralFeature;
    boolean lowerCaseFeature;
    boolean wcFeature;
    boolean bwcFeature;
    String[] customPluralSuffixes;
    Pattern UpperCaseStart;
    String snowballStemmerLanguage;
    Object stemmer;
    transient Stemmer wrappedStemmer;
    private final String[] snowballLanguage;
    private HashSet<String> determiners;

    /* loaded from: input_file:de/julielab/jnet/tagger/BasePipe$Stemmer.class */
    private class Stemmer implements Serializable {
        private static final long serialVersionUID = 666999;
        private UEALite UEAstemmer;
        private SnowballStemmer snowStemmer;

        Stemmer(UEALite uEALite) {
            this.UEAstemmer = null;
            this.snowStemmer = null;
            this.UEAstemmer = uEALite;
        }

        Stemmer(String str) {
            this.UEAstemmer = null;
            this.snowStemmer = null;
            try {
                this.snowStemmer = (SnowballStemmer) Class.forName("org.tartarus.snowball.ext." + str + "Stemmer").newInstance();
            } catch (Exception e) {
                try {
                    this.snowStemmer = (SnowballStemmer) Class.forName("org.tartarus.snowball.ext." + str.toLowerCase() + "Stemmer").newInstance();
                } catch (Exception e2) {
                    e.printStackTrace();
                    e2.printStackTrace();
                }
            }
        }

        String stem(String str) {
            if (this.UEAstemmer != null) {
                return this.UEAstemmer.stem(str).getWord();
            }
            if (this.snowStemmer == null) {
                return null;
            }
            this.snowStemmer.setCurrent(str);
            this.snowStemmer.stem();
            return this.snowStemmer.getCurrent();
        }
    }

    private void readObject(ObjectInputStream objectInputStream) throws IOException, ClassNotFoundException {
        objectInputStream.defaultReadObject();
        if (this.stemmer instanceof UEALite) {
            this.wrappedStemmer = new Stemmer((UEALite) this.stemmer);
            LOGGER.debug("Got UEALite stemmer in loaded model: {}", this.wrappedStemmer);
        } else {
            if (null == this.snowballLanguage) {
                throw new IllegalStateException("No stemmer was found in the stored model, this is invalid.");
            }
            this.wrappedStemmer = new Stemmer(this.snowballStemmerLanguage);
            LOGGER.debug("Got snowball stemmer for language {} in loaded model", this.snowballStemmerLanguage);
        }
    }

    public BasePipe(Properties properties) {
        super(new Alphabet(), new LabelAlphabet());
        this.pluralFeature = false;
        this.lowerCaseFeature = false;
        this.wcFeature = false;
        this.bwcFeature = false;
        this.featureConfig = properties;
        this.UpperCaseStart = Pattern.compile("[\\p{Lu}][^\\p{Lu}]*");
        FeatureConfiguration featureConfiguration = new FeatureConfiguration();
        if (featureConfiguration.featureActive(properties, "feat_lowercase_enabled")) {
            this.lowerCaseFeature = true;
        }
        if (featureConfiguration.featureActive(properties, "feat_plural_enabled")) {
            this.pluralFeature = true;
        }
        if (this.pluralFeature) {
            this.customPluralSuffixes = featureConfiguration.getStringArray(properties, "customPluralSuffixes");
        }
        String[] stringArray = featureConfiguration.getStringArray(properties, "determinerList");
        if (stringArray != null) {
            this.determiners = new HashSet<>(Arrays.asList(stringArray));
        } else {
            this.determiners = new HashSet<>(1);
        }
        if (featureConfiguration.featureActive(properties, "feat_wc_enabled")) {
            this.wcFeature = true;
        }
        if (featureConfiguration.featureActive(properties, "feat_bwc_enabled")) {
            this.bwcFeature = true;
        }
        this.snowballLanguage = featureConfiguration.getStringArray(properties, "SnowballStemmerLanguage");
        if (this.snowballLanguage == null) {
            this.wrappedStemmer = new Stemmer(new UEALite());
            this.stemmer = this.wrappedStemmer.UEAstemmer;
        } else {
            if (this.snowballLanguage.length != 1) {
                throw new IllegalArgumentException("Choose 1 language!");
            }
            this.wrappedStemmer = new Stemmer(this.snowballLanguage[0]);
            this.snowballStemmerLanguage = this.snowballLanguage[0];
        }
    }

    public Instance pipe(Instance instance) {
        ArrayList<Unit> units = ((Sentence) instance.getData()).getUnits();
        StringBuffer stringBuffer = new StringBuffer();
        TokenSequence tokenSequence = new TokenSequence(units.size());
        LabelSequence labelSequence = new LabelSequence(getTargetAlphabet(), units.size());
        String[] trueMetas = new FeatureConfiguration().getTrueMetas(this.featureConfig);
        String[] strArr = new String[units.size()];
        String[] strArr2 = new String[units.size()];
        for (int i = 0; i < units.size(); i++) {
            String rep = units.get(i).getRep();
            if (this.lowerCaseFeature && this.UpperCaseStart.matcher(rep).matches()) {
                rep = rep.toLowerCase();
            }
            strArr2[i] = rep;
            strArr[i] = this.wrappedStemmer.stem(strArr2[i]);
        }
        for (int i2 = 0; i2 < units.size(); i2++) {
            try {
                String str = strArr[i2];
                Token token = new Token(str);
                token.setFeatureValue("W=" + str, 1.0d);
                if (this.pluralFeature) {
                    if (this.customPluralSuffixes != null) {
                        String[] strArr3 = this.customPluralSuffixes;
                        int length = strArr3.length;
                        int i3 = 0;
                        while (true) {
                            if (i3 >= length) {
                                break;
                            }
                            if (strArr2[i2].equals(strArr[i2] + strArr3[i3])) {
                                token.setFeatureValue("PLURAL", 1.0d);
                                break;
                            }
                            i3++;
                        }
                    } else if (strArr2[i2].equals(strArr[i2] + "s")) {
                        token.setFeatureValue("PLURAL", 1.0d);
                    }
                }
                if (this.determiners != null && !this.determiners.isEmpty() && this.determiners.contains(strArr2[i2])) {
                    token.setFeatureValue("DET", 1.0d);
                }
                for (String str2 : trueMetas) {
                    String property = this.featureConfig.getProperty(str2 + "_feat_unit");
                    String metaInfo = units.get(i2).getMetaInfo(property);
                    if (metaInfo != null) {
                        token.setFeatureValue(property + "=" + metaInfo, 1.0d);
                    }
                }
                token.setText(str);
                if (this.wcFeature) {
                    token.setFeatureValue("WC=" + str.replaceAll(UNICODE_UPPER, "A").replaceAll(UNICODE_LOWER, "a").replaceAll("[0-9]", "0").replaceAll("[^\\p{Lu}\\p{Ll}0-9]", "x"), 1.0d);
                }
                if (this.bwcFeature) {
                    token.setFeatureValue("BWC=" + str.replaceAll("\\p{Lu}+", "A").replaceAll("\\p{Ll}+", "a").replaceAll("[0-9]+", "0").replaceAll("[^\\p{Lu}\\p{Ll}0-9]+", "x"), 1.0d);
                }
                stringBuffer.append(token.getText());
                stringBuffer.append(" ");
                tokenSequence.add(token);
                labelSequence.add(units.get(i2).getLabel());
            } catch (Exception e) {
                RuntimeException runtimeException = new RuntimeException(e);
                e.printStackTrace();
                LOGGER.error("", runtimeException);
                throw runtimeException;
            }
        }
        if (labelSequence.size() != tokenSequence.size()) {
            IllegalStateException illegalStateException = new IllegalStateException("Label not found... check your label definition file.");
            LOGGER.error("", illegalStateException);
            throw illegalStateException;
        }
        instance.setData(tokenSequence);
        instance.setTarget(labelSequence);
        instance.setSource(stringBuffer);
        return instance;
    }
}
