package de.julielab.jcore.ae.acronymtagger.main;

import de.julielab.jcore.ae.acronymtagger.entries.AcronymEntry;
import de.julielab.jcore.ae.acronymtagger.entries.FullformEntry;
import de.julielab.jcore.types.Abbreviation;
import de.julielab.jcore.types.Annotation;
import de.julielab.jcore.types.Sentence;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorContextException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceProcessException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/julielab/jcore/ae/acronymtagger/main/AcronymAnnotator.class */
public class AcronymAnnotator extends JCasAnnotator_ImplBase {
    private static final String COMPONENT_ID = "de.julielab.jcore.ae.acronymtagger.AcronymAnnotator";
    public static final String PARAM_ACROLIST = "AcroList";
    public static final String PARAM_CONSISTENCY_ANNO = "ConsistencyAnno";
    public static final String PARAM_MAXLENGTH_FACTOR = "MaxLength";

    @ConfigurationParameter(name = PARAM_MAXLENGTH_FACTOR, defaultValue = {"5"})
    int MAXLENGTHFACTOR;
    private final Pattern ABBR_PATTERN = Pattern.compile(ABBREVIATION);
    private final Pattern EMBEDDED_ABBR_PATTERN = Pattern.compile(EMBEDDED_ABBR);
    private final Pattern LONG_FORM_IN_PARENTHESIS_PATTERN = Pattern.compile(LONG_FORM_IN_PARENTHESIS);

    @ConfigurationParameter(name = PARAM_CONSISTENCY_ANNO, defaultValue = {"true"})
    private boolean consistencyAnno = false;
    private HashMap<String, String> acro2fullForm;

    @ConfigurationParameter(name = PARAM_ACROLIST, mandatory = false)
    private String acroList;
    private static final String[] STOPWORDS = {"a", "about", "above", "across", "after", "afterwards", "again", "against", "all", "almost", "alone", "along", "already", "also", "although", "always", "am", "among", "amongst", "amoungst", "amount", "an", "and", "another", "any", "anyhow", "anyone", "anything", "anyway", "anywhere", "are", "around", "as", "at", "back", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "bill", "both", "bottom", "but", "by", "call", "can", "cannot", "cant", "co", "computer", "con", "could", "couldnt", "cry", "de", "describe", "detail", "do", "done", "down", "due", "during", "each", "eg", "eight", "either", "eleven", "else", "elsewhere", "empty", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly", "forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", "has", "hasnt", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his", "how", "however", "hundred", "i", "ie", "if", "in", "inc", "indeed", "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter", "latterly", "least", "less", "ltd", "made", "many", "may", "me", "meanwhile", "might", "mill", "mine", "more", "moreover", "most", "mostly", "move", "much", "must", "my", "myself", "name", "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own", "part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed", "seeming", "seems", "serious", "several", "she", "should", "show", "side", "since", "sincere", "six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "system", "take", "ten", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "thereupon", "these", "they", "thick", "thin", "third", "this", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward", "towards", "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "whose", "why", "will", "with", "within", "without", "would", "yet", "you", "your", "yours", "yourself", "yourselves"};
    private static String ABBREVIATION = "[\\(\\[][-\\w]*?([A-Z]-?\\w|\\w-?[A-Z])[-\\w]*?[\\)\\]]";
    private static String EMBEDDED_ABBR = "[\\(\\[][a-z]+?([A-Z]-?\\w|\\w-?[A-Z])[-\\w]*?[\\)\\]]";
    private static String LONG_FORM_IN_PARENTHESIS = "[\\(\\[]\\w+ (\\w+[ \\)])+";
    private static final Logger LOGGER = LoggerFactory.getLogger(AcronymAnnotator.class);

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        LOGGER.info("[JACRO] initializing AcronymAnnotator...");
        try {
            setAcroList(uimaContext);
            this.consistencyAnno = ((Boolean) uimaContext.getConfigParameterValue(PARAM_CONSISTENCY_ANNO)).booleanValue();
            this.MAXLENGTHFACTOR = ((Integer) uimaContext.getConfigParameterValue(PARAM_MAXLENGTH_FACTOR)).intValue();
            LOGGER.info(" done");
        } catch (AnnotatorContextException e) {
            throw new ResourceInitializationException();
        } catch (AnnotatorConfigurationException e2) {
            throw new ResourceInitializationException();
        } catch (ResourceProcessException e3) {
            throw new ResourceInitializationException();
        }
    }

    /* JADX WARN: Code restructure failed: missing block: B:26:0x0122, code lost:
    
        throw new org.apache.uima.resource.ResourceProcessException("resource_data_not_valid", new java.lang.String[]{"faulty line in acroList: " + r0});
     */
    /* JADX WARN: Finally extract failed */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private void setAcroList(org.apache.uima.UimaContext r11) throws org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException, org.apache.uima.analysis_engine.annotator.AnnotatorContextException, org.apache.uima.resource.ResourceProcessException, org.apache.uima.resource.ResourceInitializationException {
        /*
            Method dump skipped, instructions count: 484
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: de.julielab.jcore.ae.acronymtagger.main.AcronymAnnotator.setAcroList(org.apache.uima.UimaContext):void");
    }

    public void process(JCas jCas) {
        LOGGER.debug("[JACRO] processing document...");
        try {
            FSIterator it = jCas.getJFSIndexRepository().getAnnotationIndex(Sentence.type).iterator();
            while (it.hasNext()) {
                Sentence sentence = (Sentence) it.next();
                annotate(sentence.getCoveredText(), jCas, sentence.getBegin());
            }
            if (this.consistencyAnno) {
                new ConsistencyAnnotator().consistencyAnnotate(jCas);
            }
        } catch (StringIndexOutOfBoundsException e) {
            LOGGER.error("typical Error in AcronymAnnotator.process() : StringIndexOutOfBounds");
        }
    }

    private void annotate(String str, JCas jCas, int i) {
        try {
            processAllMatches(this.ABBR_PATTERN.matcher(str), jCas, str, i, false);
            processAllMatches(this.EMBEDDED_ABBR_PATTERN.matcher(str), jCas, str, i, true);
            processLongFormInParantheses(this.LONG_FORM_IN_PARENTHESIS_PATTERN.matcher(str), str, jCas, i);
        } catch (Exception e) {
            LOGGER.error("annotate(String sentence, JCas aJCas, int offset)", e);
        }
    }

    private void processLongFormInParantheses(Matcher matcher, String str, JCas jCas, int i) {
        int i2 = 0;
        while (matcher.find(i2)) {
            int start = matcher.start() + 1;
            int end = matcher.end() - 1;
            i2 = end;
            int nextToken = getNextToken(str, start);
            int nextToken2 = getNextToken(str, nextToken - 1) + 1;
            String substring = str.substring(nextToken2, nextToken);
            String group = matcher.group();
            String substring2 = group.substring(1, group.length() - 1);
            StringBuilder sb = new StringBuilder();
            for (int i3 = 0; i3 < substring2.length(); i3++) {
                char charAt = substring2.charAt(i3);
                if (i3 == 0 || (' ' == substring2.charAt(i3 - 1) && ' ' != charAt)) {
                    sb.append(charAt);
                }
            }
            if (sb.toString().equalsIgnoreCase(substring)) {
                LOGGER.debug("identified full form: " + substring2 + " for abbreviation: " + substring);
                Abbreviation abbreviation = new Abbreviation(jCas, nextToken2 + i, nextToken + i);
                abbreviation.setExpan(substring2);
                abbreviation.setDefinedHere(true);
                Annotation annotation = new Annotation(jCas, i + start, i + end);
                annotation.setComponentId(COMPONENT_ID);
                annotation.addToIndexes();
                abbreviation.setTextReference(annotation);
                abbreviation.setComponentId(COMPONENT_ID);
                abbreviation.addToIndexes();
            }
        }
    }

    private boolean hasMoreThanOneUpperCase(String str) {
        StringBuffer stringBuffer = new StringBuffer(str);
        int i = 0;
        int i2 = 0;
        for (int i3 = 0; i3 < stringBuffer.length(); i3++) {
            char charAt = stringBuffer.charAt(i3);
            if ((charAt > '@' && charAt < '[') || ((charAt > 191 && charAt < 215) || (charAt > 215 && charAt < 223))) {
                i++;
            } else if ((charAt > '`' && charAt < '{') || ((charAt > 212 && charAt < 247) || (charAt > 248 && charAt < 256))) {
                i2++;
            }
        }
        return i > 1;
    }

    private void processAllMatches(Matcher matcher, JCas jCas, String str, int i, boolean z) {
        int findFullformStart;
        int i2 = 0;
        while (matcher.find(i2)) {
            int embeddedAcroStart = z ? getEmbeddedAcroStart(str, matcher.start() + 2) : matcher.start() + 1;
            int end = matcher.end() - 1;
            String substring = str.substring(embeddedAcroStart, end);
            if (hasMoreThanOneUpperCase(substring)) {
                int potFullformStart = getPotFullformStart(str, embeddedAcroStart, substring.length());
                String substring2 = str.substring(potFullformStart, embeddedAcroStart);
                if (substring2.length() != 0 && (findFullformStart = findFullformStart(" " + substring2, substring)) != -1) {
                    int i3 = potFullformStart + findFullformStart;
                    int ffEnd = getFfEnd(str, matcher.start() + 1);
                    String substring3 = str.substring(i3, ffEnd);
                    LOGGER.debug("processAllMatches() - identified full form: " + substring3 + " for abbreviation: " + substring.toString());
                    Abbreviation abbreviation = new Abbreviation(jCas, embeddedAcroStart + i, end + i);
                    abbreviation.setExpan(substring3);
                    abbreviation.setDefinedHere(true);
                    Annotation annotation = new Annotation(jCas, i + i3, i + ffEnd);
                    annotation.setComponentId(COMPONENT_ID);
                    annotation.addToIndexes();
                    abbreviation.setTextReference(annotation);
                    abbreviation.setComponentId(COMPONENT_ID);
                    abbreviation.addToIndexes();
                }
                i2 = matcher.end() + 1;
                if (i2 >= str.length() || i2 < 0) {
                    return;
                }
            } else {
                i2 = matcher.end() + 1;
                if (i2 >= str.length() || i2 < 0) {
                    return;
                }
            }
        }
    }

    private int getEmbeddedAcroStart(String str, int i) {
        while (i < str.length()) {
            char charAt = str.charAt(i);
            if (charAt > '@' && charAt < '[') {
                return i;
            }
            i++;
        }
        return i;
    }

    private int getFfEnd(String str, int i) {
        return Character.isWhitespace(str.charAt(i - 2)) ? i - 2 : i - 1;
    }

    private int findFullformStart(String str, String str2) {
        int length = str2.length() - 1;
        int length2 = str.length() - 1;
        LOGGER.debug("findFullformStart() -- acro: " + str2);
        LOGGER.debug("findFullformStart() -- potential FF: " + str);
        if (this.acro2fullForm.containsKey(str2)) {
            int indexOf = str.toLowerCase().indexOf(this.acro2fullForm.get(str2));
            if (indexOf != -1) {
                return indexOf - 1;
            }
        }
        while (length >= 0) {
            char charAt = str2.charAt(length);
            char charAt2 = str.charAt(length2);
            if (Character.isLetter(charAt)) {
                charAt = Character.toLowerCase(charAt);
            }
            if (Character.isLetter(charAt2)) {
                charAt2 = Character.toLowerCase(charAt2);
            }
            if (Character.isWhitespace(charAt) || Character.isDigit(charAt) || charAt == '-' || charAt == '+') {
                length--;
            } else {
                while (true) {
                    if ((length2 < 0 || charAt == charAt2) && (length2 <= 0 || length != 0 || Character.isWhitespace(str.charAt(length2 - 1)) || str.charAt(length2 - 1) == '-' || str.charAt(length2 - 1) == ')' || str.charAt(length2 - 1) == '/' || str.charAt(length2 - 1) == '\"')) {
                        break;
                    }
                    length2--;
                    if (length2 >= 0) {
                        charAt2 = str.charAt(length2);
                        if (Character.isLetter(charAt2)) {
                            charAt2 = Character.toLowerCase(charAt2);
                        }
                    }
                }
                if (length2 <= 0 && length >= 0) {
                    return -1;
                }
                length2--;
                length--;
            }
        }
        return length2;
    }

    private int getPotFullformStart(String str, int i, int i2) {
        int i3 = 0;
        String substring = str.substring(0, i);
        int length = substring.length() - 1;
        int nextToken = getNextToken(substring, length);
        if (i >= 2 && substring.charAt(i - 2) == ' ') {
            i3 = 0 - 1;
        }
        while (nextToken != -1 && i3 != this.MAXLENGTHFACTOR * i2) {
            if (Arrays.binarySearch(STOPWORDS, nextToken == 0 ? substring.substring(0, length) : substring.substring(nextToken + 1, length)) >= 0) {
                i3--;
            }
            i3++;
            if (i3 == i2 + 2) {
                break;
            }
            length = nextToken;
            nextToken = getNextToken(substring, length - 1);
        }
        return nextToken == 0 ? nextToken : nextToken + 1;
    }

    int getNextToken(String str, int i) {
        if (i == 0 || i == -1) {
            return -1;
        }
        int i2 = i;
        while (i2 != 0 && str.charAt(i2) != ' ') {
            i2--;
        }
        return i2;
    }

    private String getBestFullformFromDict(AcronymEntry acronymEntry) {
        String str = "";
        int i = 0;
        for (Map.Entry<String, FullformEntry> entry : acronymEntry.getAllFullforms()) {
            FullformEntry value = entry.getValue();
            if (value.count > i) {
                i = value.count;
                str = entry.getKey();
            }
        }
        return str;
    }
}
