package ch.epfl.bbp.uima.ae;

import ch.epfl.bbp.uima.types.Keep;
import de.julielab.jules.types.Token;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@TypeCapability(inputs = {"ch.epfl.bbp.uima.types.Keep"}, outputs = {"ch.epfl.bbp.uima.types.Keep"})
/* loaded from: input_file:ch/epfl/bbp/uima/ae/BioLemmatizerNormalizerAnnotator.class */
public class BioLemmatizerNormalizerAnnotator extends JCasAnnotator_ImplBase {
    private static Logger LOG = LoggerFactory.getLogger(BioLemmatizerNormalizerAnnotator.class);

    @ConfigurationParameter(name = "caseSensitive", defaultValue = {"false"}, description = "If true, tokens are not normalized to lowercase before string comparisions")
    private boolean caseSensitive;

    @ConfigurationParameter(name = "onlyTokens", defaultValue = {"false"}, description = "Only lemmatize the Keeps that are Tokens, rest are left unchanged.")
    private boolean onlyTokens;

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        BlueBioLemmatizer.lemmatize("warmup", "");
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        for (Keep keep : JCasUtil.select(jCas, Keep.class)) {
            Token enclosedAnnot = keep.getEnclosedAnnot();
            String str = null;
            if (enclosedAnnot instanceof Token) {
                String lemmaStr = enclosedAnnot.getLemmaStr();
                if (!this.caseSensitive) {
                    lemmaStr = lemmaStr.toLowerCase();
                }
                keep.setNormalizedText(lemmaStr);
            } else if (!this.onlyTokens) {
                try {
                    try {
                        str = BlueBioLemmatizer.lemmatize(enclosedAnnot.getCoveredText(), "");
                        if (!this.caseSensitive) {
                            str = str.toLowerCase();
                        }
                        keep.setNormalizedText(str);
                    } catch (Exception e) {
                        LOG.warn("Failed to lemmatize '{}'", enclosedAnnot.getCoveredText());
                        String trim = enclosedAnnot.getCoveredText().trim();
                        if (!this.caseSensitive) {
                            trim = trim.toLowerCase();
                        }
                        keep.setNormalizedText(trim);
                    }
                } catch (Throwable th) {
                    if (!this.caseSensitive) {
                        str = str.toLowerCase();
                    }
                    keep.setNormalizedText(str);
                    throw th;
                }
            }
        }
    }
}
