package eus.ixa.ixa.pipe.pos;

import eus.ixa.ixa.pipe.lemma.DictionaryLemmatizer;
import eus.ixa.ixa.pipe.lemma.MorfologikLemmatizer;
import eus.ixa.ixa.pipe.lemma.MultiWordMatcher;
import eus.ixa.ixa.pipe.pos.dict.DictionaryTagger;
import eus.ixa.ixa.pipe.pos.dict.MorfologikMorphoTagger;
import ixa.kaflib.KAFDocument;
import ixa.kaflib.Span;
import ixa.kaflib.Term;
import ixa.kaflib.WF;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import opennlp.tools.tokenize.TokenizerME;

/* loaded from: input_file:eus/ixa/ixa/pipe/pos/Annotate.class */
public class Annotate {
    private final MorphoTagger posTagger;
    private final String lang;
    private final MorphoFactory morphoFactory;
    private DictionaryLemmatizer dictLemmatizer;
    private final Boolean multiwords;
    private MultiWordMatcher multiWordMatcher;
    private final Boolean dictag;
    private DictionaryTagger dictMorphoTagger;

    public Annotate(Properties properties) throws IOException {
        this.lang = properties.getProperty("language");
        this.multiwords = Boolean.valueOf(properties.getProperty("multiwords"));
        this.dictag = Boolean.valueOf(properties.getProperty("dictag"));
        if (this.multiwords.booleanValue()) {
            this.multiWordMatcher = new MultiWordMatcher(properties);
        }
        if (this.dictag.booleanValue()) {
            loadMorphoTaggerDicts(properties);
        }
        loadLemmatizerDicts(properties);
        this.morphoFactory = new MorphoFactory();
        this.posTagger = new MorphoTagger(properties, this.morphoFactory);
    }

    private void loadLemmatizerDicts(Properties properties) {
        URL binaryDict = new Resources().getBinaryDict(this.lang);
        if (binaryDict == null) {
            System.err.println("ERROR: No binary lemmatizer dictionary available for language " + this.lang + " in src/main/resources!!");
            System.exit(1);
        }
        try {
            this.dictLemmatizer = new MorfologikLemmatizer(binaryDict, this.lang);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void loadMorphoTaggerDicts(Properties properties) {
        URL binaryTaggerDict = new Resources().getBinaryTaggerDict(this.lang);
        if (binaryTaggerDict == null) {
            System.err.println("ERROR: No binary POS tagger dictionary available for language " + this.lang + " in src/main/resources!!");
            System.exit(1);
        }
        try {
            this.dictMorphoTagger = new MorfologikMorphoTagger(binaryTaggerDict, this.lang);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private String mapEnglishTagSetToKaf(String str) {
        return str.startsWith("RB") ? "A" : str.equalsIgnoreCase("CC") ? "C" : (str.startsWith("D") || str.equalsIgnoreCase("PDT")) ? "D" : str.startsWith("J") ? "G" : (str.equalsIgnoreCase("NN") || str.equalsIgnoreCase("NNS")) ? "N" : str.startsWith("NNP") ? "R" : (str.equalsIgnoreCase("TO") || str.equalsIgnoreCase("IN")) ? "P" : (str.startsWith("PRP") || str.startsWith("WP")) ? "Q" : str.startsWith("V") ? "V" : "O";
    }

    private String mapSpanishTagSetToKaf(String str) {
        return (str.equalsIgnoreCase("RG") || str.equalsIgnoreCase("RN")) ? "A" : (str.equalsIgnoreCase("CC") || str.equalsIgnoreCase("CS")) ? "C" : str.startsWith("D") ? "D" : str.startsWith("A") ? "G" : str.startsWith("NC") ? "N" : str.startsWith("NP") ? "R" : str.startsWith("SP") ? "P" : str.startsWith("P") ? "Q" : str.startsWith("V") ? "V" : "O";
    }

    private String mapGalicianTagSetToKaf(String str) {
        return str.startsWith("R") ? "A" : (str.equalsIgnoreCase("CC") || str.equalsIgnoreCase("CS")) ? "C" : (str.startsWith("D") || str.startsWith("G") || str.startsWith("X") || str.startsWith("Q") || str.startsWith(TokenizerME.SPLIT) || str.startsWith("I") || str.startsWith("M")) ? "D" : str.startsWith("A") ? "G" : str.startsWith("NC") ? "N" : str.startsWith("NP") ? "R" : str.startsWith("S") ? "P" : str.startsWith("P") ? "Q" : str.startsWith("V") ? "V" : "O";
    }

    private String mapFrenchTagSetToKaf(String str) {
        return str.startsWith("ADV") ? "A" : (str.equalsIgnoreCase("CC") || str.equalsIgnoreCase("CS")) ? "C" : (str.startsWith("D") || str.startsWith("I")) ? "D" : str.startsWith("ADJ") ? "G" : str.startsWith("NC") ? "N" : str.startsWith("NPP") ? "R" : (str.startsWith("PRO") || str.startsWith("CL")) ? "Q" : (str.equalsIgnoreCase("P") || str.equalsIgnoreCase("P+D") || str.equalsIgnoreCase("P+PRO")) ? "P" : str.startsWith("V") ? "V" : "O";
    }

    private String getKafTagSet(String str) {
        String str2 = null;
        if (this.lang.equalsIgnoreCase("en")) {
            str2 = mapEnglishTagSetToKaf(str);
        }
        if (this.lang.equalsIgnoreCase("es")) {
            str2 = mapSpanishTagSetToKaf(str);
        }
        if (this.lang.equalsIgnoreCase("gl")) {
            str2 = mapGalicianTagSetToKaf(str);
        }
        if (this.lang.equalsIgnoreCase("fr")) {
            str2 = mapFrenchTagSetToKaf(str);
        }
        return str2;
    }

    private String setTermType(String str) {
        return (str.startsWith("N") || str.startsWith("V") || str.startsWith("G") || str.startsWith("A")) ? "open" : "close";
    }

    public final void annotatePOSToKAF(KAFDocument kAFDocument) {
        List<Morpheme> morphemes;
        for (List<WF> list : kAFDocument.getSentences()) {
            ArrayList arrayList = new ArrayList();
            String[] strArr = new String[list.size()];
            for (int i = 0; i < list.size(); i++) {
                strArr[i] = list.get(i).getForm();
                ArrayList arrayList2 = new ArrayList();
                arrayList2.add(list.get(i));
                arrayList.add(KAFDocument.newWFSpan(arrayList2));
            }
            if (this.multiwords.booleanValue()) {
                morphemes = this.posTagger.getMorphemes(this.multiWordMatcher.getTokensWithMultiWords(strArr));
                getMultiWordSpans(strArr, list, arrayList);
            } else {
                morphemes = this.posTagger.getMorphemes(strArr);
            }
            for (int i2 = 0; i2 < morphemes.size(); i2++) {
                Term newTerm = kAFDocument.newTerm(arrayList.get(i2));
                if (this.dictag.booleanValue()) {
                    morphemes.get(i2).setTag(this.dictMorphoTagger.tag(morphemes.get(i2).getWord(), morphemes.get(i2).getTag()));
                }
                String kafTagSet = getKafTagSet(morphemes.get(i2).getTag());
                String termType = setTermType(kafTagSet);
                morphemes.get(i2).setLemma(this.dictLemmatizer.lemmatize(morphemes.get(i2).getWord(), morphemes.get(i2).getTag()));
                newTerm.setType(termType);
                newTerm.setLemma(morphemes.get(i2).getLemma());
                newTerm.setPos(kafTagSet);
                newTerm.setMorphofeat(morphemes.get(i2).getTag());
            }
        }
    }

    private void getMultiWordSpans(String[] strArr, List<WF> list, List<Span<WF>> list2) {
        int i = 0;
        for (opennlp.tools.util.Span span : this.multiWordMatcher.multiWordsToSpans(strArr)) {
            Integer valueOf = Integer.valueOf(span.getStart() - i);
            Integer valueOf2 = Integer.valueOf(span.getEnd() - i);
            i = (i + list2.subList(valueOf.intValue(), valueOf2.intValue()).size()) - 1;
            Span<WF> newWFSpan = KAFDocument.newWFSpan(list.subList(span.getStart(), span.getEnd()));
            list2.subList(valueOf.intValue(), valueOf2.intValue()).clear();
            list2.add(valueOf.intValue(), newWFSpan);
        }
    }

    public final String annotatePOSToCoNLL(KAFDocument kAFDocument) throws IOException {
        List<Morpheme> morphemes;
        StringBuilder sb = new StringBuilder();
        for (List<WF> list : kAFDocument.getSentences()) {
            ArrayList arrayList = new ArrayList();
            String[] strArr = new String[list.size()];
            for (int i = 0; i < list.size(); i++) {
                strArr[i] = list.get(i).getForm();
                ArrayList arrayList2 = new ArrayList();
                arrayList2.add(list.get(i));
                arrayList.add(KAFDocument.newWFSpan(arrayList2));
            }
            if (this.multiwords.booleanValue()) {
                morphemes = this.posTagger.getMorphemes(this.multiWordMatcher.getTokensWithMultiWords(strArr));
                getMultiWordSpans(strArr, list, arrayList);
            } else {
                morphemes = this.posTagger.getMorphemes(strArr);
            }
            for (int i2 = 0; i2 < morphemes.size(); i2++) {
                String tag = morphemes.get(i2).getTag();
                String word = morphemes.get(i2).getWord();
                if (this.dictag.booleanValue()) {
                    morphemes.get(i2).setTag(this.dictMorphoTagger.tag(word, tag));
                }
                sb.append(word).append("\t").append(this.dictLemmatizer.lemmatize(word, morphemes.get(i2).getTag())).append("\t").append(morphemes.get(i2).getTag()).append("\n");
            }
            sb.append("\n");
        }
        return sb.toString();
    }
}
