package de.julielab.jcore.ae.jnet.uima;

import cc.mallet.fst.CRF;
import de.julielab.jcore.types.Abbreviation;
import de.julielab.jcore.types.EntityMention;
import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.types.Token;
import de.julielab.jcore.utility.JCoReAnnotationTools;
import de.julielab.jnet.tagger.NETagger;
import de.julielab.jnet.tagger.Unit;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Properties;
import java.util.StringTokenizer;
import java.util.TreeSet;
import java.util.regex.Pattern;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorContextException;
import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/julielab/jcore/ae/jnet/uima/EntityAnnotator.class */
public class EntityAnnotator extends JCasAnnotator_ImplBase {
    private static final String COMPONENT_ID = EntityAnnotator.class.getCanonicalName();
    private static final Logger LOGGER = LoggerFactory.getLogger(EntityAnnotator.class);
    private static final String OUTSIDE_LABEL = "O";
    protected static final String ABBREV_PATTERN = "[A-Z]{2,3}s?";
    private HashMap<String, String> entityMap;
    private NETagger tagger;
    protected NegativeList negativeList;
    public Pattern abbrevPattern = null;
    protected boolean expandAbbr = false;
    protected ConsistencyPreservation consistencyPreservation = null;
    protected float confidenceThresholdForConsistencyPreservation = -1.0f;
    protected boolean showSegmentConf = false;
    protected TreeSet<String> entityMentionTypes = null;
    Properties featureConfig = null;
    ArrayList<String> activatedMetas = null;
    ArrayList<FSIterator<Annotation>> annotationIterators = null;
    ArrayList<String> valueMethods = null;
    private String maxEnt_parameter = "maxEnt";
    private String iteration_parameter = "iterations";
    private boolean maxEnt = false;
    private int iterations_number = 0;

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        LOGGER.info("initialize() - initializing JNET...");
        super.initialize(uimaContext);
        try {
            setModel(uimaContext);
            setEntityTypes(uimaContext);
            setShowSegmentConfidence(uimaContext);
            setNegativeList(uimaContext);
            Object configParameterValue = uimaContext.getConfigParameterValue("ExpandAbbreviations");
            if (configParameterValue != null) {
                this.expandAbbr = ((Boolean) configParameterValue).booleanValue();
            }
            Object configParameterValue2 = uimaContext.getConfigParameterValue("ConsistencyPreservation");
            if (configParameterValue2 != null) {
                this.consistencyPreservation = new ConsistencyPreservation((String) configParameterValue2);
            }
            Object configParameterValue3 = uimaContext.getConfigParameterValue("ConfidenceThresholdForConsistencyPreservation");
            if (configParameterValue3 != null) {
                this.confidenceThresholdForConsistencyPreservation = ((Float) configParameterValue3).floatValue();
            }
            Object configParameterValue4 = uimaContext.getConfigParameterValue("IgnoreNotIntroducedAbbreviations");
            if (configParameterValue4 != null && ((Boolean) configParameterValue4).booleanValue()) {
                this.abbrevPattern = Pattern.compile(ABBREV_PATTERN);
            }
            Object configParameterValue5 = uimaContext.getConfigParameterValue(this.maxEnt_parameter);
            if (configParameterValue5 != null && ((Boolean) configParameterValue5).booleanValue()) {
                this.maxEnt = true;
                this.tagger.set_Max_Ent(this.maxEnt);
            }
            Object configParameterValue6 = uimaContext.getConfigParameterValue(this.iteration_parameter);
            if (configParameterValue6 != null && ((Integer) configParameterValue6) != null) {
                this.iterations_number = ((Integer) configParameterValue6).intValue();
                this.tagger.set_Number_Iterations(this.iterations_number);
            }
            LOGGER.info("initialize() - abbreviation expansion: " + this.expandAbbr);
            LOGGER.info("initialize() - negative list: " + (this.negativeList != null));
            LOGGER.info("initialize() - show confidence: " + this.showSegmentConf);
            LOGGER.info("initialize() - consistency preservation: " + (this.consistencyPreservation != null ? this.consistencyPreservation.toString() : "none"));
            LOGGER.info("initialize() - ignore not introduces abbreviations: " + (this.abbrevPattern != null));
        } catch (AnnotatorConfigurationException e) {
            e.printStackTrace();
            throw new ResourceInitializationException();
        } catch (AnnotatorInitializationException e2) {
            e2.printStackTrace();
            throw new ResourceInitializationException();
        } catch (IOException e3) {
            e3.printStackTrace();
        } catch (AnnotatorContextException e4) {
            e4.printStackTrace();
            throw new ResourceInitializationException();
        }
    }

    private void retrieveMetaInformation(JCas jCas) throws AnalysisEngineProcessException {
        JFSIndexRepository jFSIndexRepository = jCas.getJFSIndexRepository();
        this.featureConfig = this.tagger.getFeatureConfig();
        this.activatedMetas = new ArrayList<>();
        this.annotationIterators = new ArrayList<>();
        this.valueMethods = new ArrayList<>();
        Enumeration keys = this.featureConfig.keys();
        while (keys.hasMoreElements()) {
            String str = (String) keys.nextElement();
            if (str.matches("[A-Za-z]+_feat_enabled") && this.featureConfig.getProperty(str).matches("true")) {
                this.activatedMetas.add(str.substring(0, str.indexOf("_feat_enabled")));
            }
            if (str.matches("[A-Za-z]+_feat_valMethod")) {
                str.substring(0, str.indexOf("_feat_valMethod"));
                this.valueMethods.add(this.featureConfig.getProperty(str));
            }
        }
        for (int i = 0; i < this.activatedMetas.size(); i++) {
            try {
                this.annotationIterators.add(jFSIndexRepository.getAnnotationIndex(JCoReAnnotationTools.getAnnotationByClassName(jCas, this.featureConfig.getProperty(String.valueOf(this.activatedMetas.get(i)) + "_feat_data")).getTypeIndexID()).iterator());
            } catch (Exception e) {
                throw new AnalysisEngineProcessException(e);
            }
        }
    }

    private void setEntityTypes(UimaContext uimaContext) throws ResourceInitializationException, AnnotatorContextException, AnnotatorConfigurationException {
        this.entityMentionTypes = new TreeSet<>();
        Object configParameterValue = uimaContext.getConfigParameterValue("EntityTypes");
        if (configParameterValue == null) {
            LOGGER.error("setEntityTypes() - descriptor incomplete, entity types not specified!");
            throw new AnnotatorConfigurationException();
        }
        String[] strArr = (String[]) configParameterValue;
        this.entityMap = new HashMap<>();
        for (String str : strArr) {
            String[] split = str.split("=");
            this.entityMap.put(split[0], split[1]);
            this.entityMentionTypes.add(split[1]);
        }
        CRF crf = (CRF) this.tagger.getModel();
        if (crf != null) {
            Object[] array = crf.getOutputAlphabet().toArray();
            for (String str2 : strArr) {
                String[] split2 = str2.split("=");
                boolean z = false;
                for (Object obj : array) {
                    if (split2[0].equals(obj)) {
                        z = true;
                    }
                }
                if (!z) {
                    LOGGER.error("setEntityTypes() - Could not find entity label \"{}\" from descriptor in the tagger's OutputAlphabet.", split2[0]);
                    throw new AnnotatorConfigurationException();
                }
            }
        }
        LOGGER.debug("Entity mention types: " + this.entityMentionTypes.toString());
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v26, types: [java.io.InputStream] */
    private void setModel(UimaContext uimaContext) throws AnnotatorConfigurationException, AnnotatorContextException, AnnotatorInitializationException, IOException {
        FileInputStream fileInputStream;
        Object configParameterValue = uimaContext.getConfigParameterValue("ModelFilename");
        if (configParameterValue == null) {
            LOGGER.error("setModel() - descriptor incomplete, no model file specified!");
            throw new AnnotatorConfigurationException();
        }
        String str = (String) configParameterValue;
        this.tagger = new NETagger();
        try {
            LOGGER.debug("setModel() -  loading JNET model " + str);
            File file = new File(str);
            if (file.exists()) {
                fileInputStream = new FileInputStream(file);
            } else {
                fileInputStream = getClass().getResourceAsStream(str.startsWith("/") ? str : "/" + str);
            }
            this.tagger.readModel(fileInputStream);
        } catch (Exception e) {
            LOGGER.error("setModel() - Could not load JNET model: " + e.getMessage(), e);
            throw new AnnotatorInitializationException();
        }
    }

    private void setNegativeList(UimaContext uimaContext) throws AnnotatorConfigurationException, AnnotatorContextException {
        Object configParameterValue = uimaContext.getConfigParameterValue("NegativeList");
        if (configParameterValue != null) {
            File file = new File((String) configParameterValue);
            try {
                this.negativeList = new NegativeList(file);
                LOGGER.debug("setNegativeList() - using negative list: " + file);
            } catch (IOException e) {
                LOGGER.error("setNegativeList() - specified negative list file cannot be read: " + e.getMessage());
                throw new AnnotatorConfigurationException();
            }
        }
    }

    private void setShowSegmentConfidence(UimaContext uimaContext) throws AnnotatorContextException {
        Object configParameterValue = uimaContext.getConfigParameterValue("ShowSegmentConfidence");
        if (configParameterValue != null) {
            this.showSegmentConf = ((Boolean) configParameterValue).booleanValue();
        }
        LOGGER.debug("setShowSegmentConfidence() - show segment confidence: " + this.showSegmentConf);
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        LOGGER.info("process() - processing next document");
        JFSIndexRepository jFSIndexRepository = jCas.getJFSIndexRepository();
        retrieveMetaInformation(jCas);
        FSIterator it = jFSIndexRepository.getAnnotationIndex(Sentence.type).iterator();
        while (it.hasNext()) {
            Sentence sentence = (Sentence) it.next();
            ArrayList<Token> arrayList = (ArrayList) UIMAUtils.getAnnotations(jCas, sentence, new Token(jCas, 0, 0).getClass());
            ArrayList<HashMap<String, String>> metaList = getMetaList(arrayList);
            if (arrayList.size() != metaList.size()) {
                LOGGER.error("process() - token list, and meta list for this sentence not of same size!");
                throw new AnalysisEngineProcessException();
            }
            de.julielab.jnet.tagger.Sentence createUnitSentence = createUnitSentence(arrayList, jCas, metaList);
            LOGGER.debug("process() - original sentence: " + sentence.getCoveredText());
            StringBuffer stringBuffer = new StringBuffer();
            Iterator<Unit> it2 = createUnitSentence.getUnits().iterator();
            while (it2.hasNext()) {
                stringBuffer.append(String.valueOf(it2.next().getRep()) + " ");
            }
            LOGGER.debug("process() - sentence for prediction: " + createUnitSentence.toString());
            try {
                this.tagger.predict(createUnitSentence, this.showSegmentConf);
                if (this.expandAbbr) {
                    createUnitSentence = removeDuplicatedTokens(createUnitSentence);
                }
                LOGGER.debug("process() - sentence with labels: " + createUnitSentence.toString());
                writeToCAS(createUnitSentence, jCas);
            } catch (IllegalStateException e) {
                LOGGER.error("process() - predicting with JNET failed: " + e.getMessage());
                throw new AnalysisEngineProcessException();
            }
        }
        if (this.consistencyPreservation != null) {
            LOGGER.debug("process() - running consistency preservation");
            this.consistencyPreservation.stringMatch(jCas, this.entityMentionTypes, this.confidenceThresholdForConsistencyPreservation);
            this.consistencyPreservation.acroMatch(jCas, this.entityMentionTypes);
        }
    }

    protected de.julielab.jnet.tagger.Sentence removeDuplicatedTokens(de.julielab.jnet.tagger.Sentence sentence) {
        de.julielab.jnet.tagger.Sentence sentence2 = new de.julielab.jnet.tagger.Sentence();
        String str = null;
        Unit unit = null;
        TreeSet treeSet = new TreeSet();
        for (int i = 0; i < sentence.getUnits().size(); i++) {
            Unit unit2 = sentence.get(i);
            treeSet.add(unit2.getLabel());
            String str2 = String.valueOf(unit2.begin) + "@" + unit2.end;
            if (str == null || !str.equals(str2)) {
                sentence2.add(unit2);
                treeSet = new TreeSet();
                treeSet.add(unit2.getLabel());
            } else {
                treeSet.add(unit2.getLabel());
                if (treeSet.size() > 1) {
                    unit.setLabel(OUTSIDE_LABEL);
                }
            }
            str = str2;
            unit = unit2;
        }
        return sentence2;
    }

    protected de.julielab.jnet.tagger.Sentence createUnitSentence(ArrayList<Token> arrayList, JCas jCas, ArrayList<HashMap<String, String>> arrayList2) {
        de.julielab.jnet.tagger.Sentence sentence = new de.julielab.jnet.tagger.Sentence();
        ArrayList<Abbreviation> abbreviationList = getAbbreviationList(arrayList, jCas);
        for (int i = 0; i < arrayList.size(); i++) {
            Token token = arrayList.get(i);
            HashMap<String, String> hashMap = arrayList2.get(i);
            Abbreviation abbreviation = abbreviationList.get(i);
            String coveredText = token.getCoveredText();
            if (this.expandAbbr && abbreviation != null) {
                coveredText = abbreviation.getDefinedHere() ? null : abbreviation.getTextReference().getCoveredText();
            }
            if (coveredText != null) {
                if (coveredText.equals(token.getCoveredText())) {
                    sentence.add(new Unit(token.getBegin(), token.getEnd(), coveredText, "", hashMap));
                } else {
                    ArrayList arrayList3 = (ArrayList) UIMAUtils.getAnnotations(jCas, abbreviation.getTextReference(), new Token(jCas, 0, 0).getClass());
                    if (abbreviation.getTextReference().getCoveredText().length() <= 0 || arrayList3.size() != 0) {
                        Iterator it = arrayList3.iterator();
                        while (it.hasNext()) {
                            sentence.add(new Unit(token.getBegin(), token.getEnd(), ((Token) it.next()).getCoveredText(), "", hashMap));
                        }
                    } else {
                        StringTokenizer stringTokenizer = new StringTokenizer(coveredText);
                        while (stringTokenizer.hasMoreTokens()) {
                            sentence.add(new Unit(token.getBegin(), token.getEnd(), stringTokenizer.nextToken(), "", hashMap));
                        }
                    }
                }
            }
        }
        if (this.expandAbbr) {
            sentence = removeConsecutiveBrackets(sentence);
        }
        return sentence;
    }

    private de.julielab.jnet.tagger.Sentence removeConsecutiveBrackets(de.julielab.jnet.tagger.Sentence sentence) {
        de.julielab.jnet.tagger.Sentence sentence2 = new de.julielab.jnet.tagger.Sentence();
        int i = 0;
        while (i < sentence.getUnits().size()) {
            Unit unit = sentence.getUnits().get(i);
            if (i + 1 < sentence.getUnits().size()) {
                Unit unit2 = sentence.getUnits().get(i + 1);
                if ((unit.getRep().equals("(") && unit2.getRep().equals(")")) || (unit.getRep().equals("[") && unit2.getRep().equals("]"))) {
                    i++;
                    i++;
                }
            }
            sentence2.add(unit);
            i++;
        }
        return sentence2;
    }

    private ArrayList<Abbreviation> getAbbreviationList(ArrayList<Token> arrayList, JCas jCas) {
        ArrayList<Abbreviation> arrayList2 = new ArrayList<>();
        Iterator<Token> it = arrayList.iterator();
        while (it.hasNext()) {
            ArrayList arrayList3 = (ArrayList) UIMAUtils.getAnnotations(jCas, it.next(), new Abbreviation(jCas, 0, 0).getClass());
            if (arrayList3 == null || arrayList3.size() <= 0) {
                arrayList2.add(null);
            } else {
                arrayList2.add((Abbreviation) arrayList3.get(0));
            }
        }
        return arrayList2;
    }

    private ArrayList<HashMap<String, String>> getMetaList(ArrayList<Token> arrayList) {
        ArrayList<HashMap<String, String>> arrayList2 = new ArrayList<>();
        Interval[] intervalArr = new Interval[this.activatedMetas.size()];
        for (int i = 0; i < intervalArr.length; i++) {
            intervalArr[i] = null;
        }
        Iterator<Token> it = arrayList.iterator();
        while (it.hasNext()) {
            arrayList2.add(getMetas(it.next(), intervalArr));
        }
        return arrayList2;
    }

    private HashMap<String, String> getMetas(Token token, Interval[] intervalArr) {
        HashMap<String, String> hashMap = new HashMap<>();
        if (this.featureConfig == null) {
            return hashMap;
        }
        for (int i = 0; i < this.annotationIterators.size(); i++) {
            try {
                if (this.annotationIterators.get(i).hasNext() && intervalArr[i] == null) {
                    Annotation annotation = (Annotation) this.annotationIterators.get(i).next();
                    intervalArr[i] = new Interval(annotation.getBegin(), annotation.getEnd(), new StringBuilder().append(annotation.getClass().getMethod(this.valueMethods.get(i), new Class[0]).invoke(annotation, null)).toString());
                }
            } catch (Exception e) {
                LOGGER.warn("getMetas() - failed getting meta information for current token. No metas used!");
                hashMap = new HashMap<>();
            }
        }
        for (int i2 = 0; i2 < this.activatedMetas.size(); i2++) {
            Interval interval = intervalArr[i2];
            String property = this.featureConfig.getProperty(String.valueOf(this.activatedMetas.get(i2)) + "_feat_unit");
            if (interval != null && interval.isIn(token.getBegin(), token.getEnd())) {
                if (this.featureConfig.getProperty(String.valueOf(this.activatedMetas.get(i2)) + "_begin_flag").equals("true") && interval.getBegin() == token.getBegin()) {
                    hashMap.put(property, "B_" + intervalArr[i2].getAnnotation());
                } else {
                    hashMap.put(property, intervalArr[i2].getAnnotation());
                }
                if (interval.getEnd() == token.getEnd()) {
                    intervalArr[i2] = null;
                }
            }
        }
        return hashMap;
    }

    public void writeToCAS(de.julielab.jnet.tagger.Sentence sentence, JCas jCas) {
        String str = OUTSIDE_LABEL;
        int i = 0;
        int i2 = 0;
        double d = -1.0d;
        for (int i3 = 0; i3 < sentence.size(); i3++) {
            Unit unit = sentence.get(i3);
            String label = unit.getLabel();
            double confidence = unit.getConfidence();
            if (str.equals(OUTSIDE_LABEL) && !label.equals(OUTSIDE_LABEL)) {
                i = unit.begin;
            } else if ((!str.equals(OUTSIDE_LABEL) && !label.equals(OUTSIDE_LABEL) && !str.equals(label)) || (!str.equals(OUTSIDE_LABEL) && label.equals(OUTSIDE_LABEL))) {
                addAnnotation(jCas, i, i2, str, d);
                i = unit.begin;
            }
            str = label;
            i2 = unit.end;
            d = confidence;
            if (i3 == sentence.size() - 1 && !label.equals(OUTSIDE_LABEL)) {
                i2 = unit.end;
                addAnnotation(jCas, i, i2, str, d);
            }
        }
    }

    private void addAnnotation(JCas jCas, int i, int i2, String str, double d) {
        String substring = jCas.getDocumentText().substring(i, i2);
        if (ignoreLabel(jCas, i, i2)) {
            return;
        }
        if (this.negativeList != null && this.negativeList.contains(substring, str)) {
            LOGGER.debug("addAnnotation() - ignoring current entity mention as contained in negativeList");
            return;
        }
        String str2 = this.entityMap.get(str);
        if (str2 == null) {
            LOGGER.debug("addAnnotation() - ommitted entity mention for label: " + str);
            return;
        }
        try {
            EntityMention annotationByClassName = JCoReAnnotationTools.getAnnotationByClassName(jCas, str2);
            annotationByClassName.setBegin(i);
            annotationByClassName.setEnd(i2);
            annotationByClassName.setTextualRepresentation(jCas.getDocumentText().substring(i, i2));
            annotationByClassName.setSpecificType(str);
            annotationByClassName.setComponentId(COMPONENT_ID);
            if (this.showSegmentConf) {
                annotationByClassName.setConfidence(new StringBuilder(String.valueOf(d)).toString());
            }
            annotationByClassName.addToIndexes();
        } catch (Exception e) {
            LOGGER.error("addAnnotation() - could not generate new EntityMention", e);
        }
    }

    protected boolean ignoreLabel(JCas jCas, int i, int i2) {
        String substring = jCas.getDocumentText().substring(i, i2);
        if (this.abbrevPattern == null || !this.abbrevPattern.matcher(substring).matches()) {
            return false;
        }
        Annotation annotation = new Annotation(jCas, i, i2);
        annotation.addToIndexes();
        ArrayList arrayList = (ArrayList) UIMAUtils.getAnnotations(jCas, annotation, new Abbreviation(jCas, 0, 0).getClass());
        annotation.removeFromIndexes();
        if (arrayList != null && arrayList.size() > 0) {
            LOGGER.debug("ignoreLabel() - found JACRO-recognized abbreviations under this string: " + substring);
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                Abbreviation abbreviation = (Abbreviation) it.next();
                if (abbreviation.getTextReference() != null && abbreviation.getCoveredText().matches(ABBREV_PATTERN)) {
                    LOGGER.debug("ignoreLabel() - abbreviation: " + abbreviation.getCoveredText() + " introduced for: " + abbreviation.getTextReference().getCoveredText());
                    return false;
                }
            }
        }
        LOGGER.debug("ignoreLabel() - ignoring annotations on " + substring + " because it is a not introduced abbreviation!");
        return true;
    }
}
