package de.julielab.jules.ae.genemapping.utils;

import de.julielab.jcore.types.EntityMention;
import de.julielab.jcore.types.Header;
import de.julielab.jcore.types.Token;
import de.julielab.jcore.utility.JCoReAnnotationTools;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.search.BooleanQuery;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.jcas.JCas;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.tartarus.snowball.ext.EnglishStemmer;

/* loaded from: input_file:de/julielab/jules/ae/genemapping/utils/ContextUtils.class */
public class ContextUtils {
    public static final String[] STOPWORDS = {"a", "about", "above", "across", "after", "afterwards", "again", "against", "all", "almost", "alone", "along", "already", "also", "although", "always", "am", "among", "amongst", "amoungst", "amount", "an", "and", "another", "any", "anyhow", "anyone", "anything", "anyway", "anywhere", "are", "around", "as", "at", "back", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "bill", "both", "bottom", "but", "by", "call", "can", "cannot", "cant", "co", "computer", "con", "could", "couldnt", "cry", "de", "describe", "detail", "do", "done", "down", "due", "during", "each", "eg", "eight", "either", "eleven", "else", "elsewhere", "empty", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly", "forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", "has", "hasnt", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his", "how", "however", "hundred", "ie", "if", "in", "inc", "indeed", "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter", "latterly", "least", "less", "ltd", "made", "many", "may", "me", "meanwhile", "might", "mill", "mine", "more", "moreover", "most", "mostly", "move", "much", "must", "my", "myself", "name", "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own", "part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed", "seeming", "seems", "serious", "several", "she", "should", "show", "side", "since", "sincere", "six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "system", "take", "ten", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "thereupon", "these", "they", "thick", "thin", "third", "this", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward", "towards", "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "whose", "why", "will", "with", "within", "without", "would", "yet", "you", "your", "yours", "yourself", "yourselves"};
    private static final Logger LOGGER = LoggerFactory.getLogger(ContextUtils.class);

    public static TokenStream getStemmingStream(String str) {
        return new SnowballFilter(new StandardAnalyzer(CharArraySet.copy(new HashSet(Arrays.asList(STOPWORDS)))).tokenStream("", new StringReader(str)), new EnglishStemmer());
    }

    public static String makeContextTypes(String str) throws IOException {
        StringBuilder sb = new StringBuilder();
        HashSet hashSet = new HashSet();
        TokenStream stemmingStream = getStemmingStream(str);
        try {
            CharTermAttribute addAttribute = stemmingStream.addAttribute(CharTermAttribute.class);
            while (stemmingStream.incrementToken()) {
                String obj = addAttribute.toString();
                if (!str.contains(obj)) {
                    sb.append(obj);
                    sb.append(" ");
                    hashSet.add(obj);
                }
            }
            sb.deleteCharAt(sb.length() - 1);
            if (stemmingStream != null) {
                stemmingStream.close();
            }
            return sb.toString();
        } catch (Throwable th) {
            if (stemmingStream != null) {
                try {
                    stemmingStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public static String makeContextTokens(String str) throws IOException {
        StringBuilder sb = new StringBuilder();
        TokenStream stemmingStream = getStemmingStream(str);
        try {
            stemmingStream.reset();
            CharTermAttribute addAttribute = stemmingStream.addAttribute(CharTermAttribute.class);
            while (stemmingStream.incrementToken()) {
                sb.append(addAttribute.toString());
                sb.append(" ");
            }
            if (stemmingStream != null) {
                stemmingStream.close();
            }
            if (sb.length() > 0) {
                sb.deleteCharAt(sb.length() - 1);
            }
            return sb.toString();
        } catch (Throwable th) {
            if (stemmingStream != null) {
                try {
                    stemmingStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public static BooleanQuery makeContextQuery(JCas jCas) throws IOException {
        String documentText = jCas.getDocumentText();
        if (documentText == null || documentText.length() == 0) {
            LOGGER.error("ContextString for the Query is empty");
            return null;
        }
        LOGGER.debug("semantic index search context: " + documentText);
        return makeContextQuery(documentText.trim());
    }

    public static BooleanQuery makeContextQuery(JCas jCas, int i, EntityMention entityMention) throws IOException {
        LOGGER.debug("Making context query");
        return makeContextQuery(makeContext(jCas, i, entityMention).trim());
    }

    public static String makeContext(JCas jCas, int i, EntityMention entityMention) {
        FSIterator it;
        int i2;
        int i3;
        LOGGER.debug("Making context");
        StringBuilder sb = new StringBuilder();
        try {
            it = jCas.getAnnotationIndex(Token.type).iterator();
        } catch (Exception e) {
            FSIterator it2 = jCas.getAnnotationIndex(Header.type).iterator();
            LOGGER.error("Error while running MakeContextQuery on document with ID {} on entity {}: {}", new Object[]{it2.hasNext() ? ((Header) it2.next()).getDocId() : null, entityMention, e});
            e.printStackTrace();
        }
        if (!it.hasNext()) {
            FSIterator it3 = jCas.getAnnotationIndex(Header.type).iterator();
            Header header = it3.hasNext() ? (Header) it3.next() : null;
            LOGGER.warn("The document with ID {} does not have any tokens. Cannot create context on token basis, resorting to character-based context.", header != null ? header.getDocId() : "<unknown>");
            int i4 = 5 * i;
            LOGGER.warn("Converting token window size of {} to character window size of {} (larger by factor 5)", Integer.valueOf(i), Integer.valueOf(i4));
            int end = (i4 - (entityMention.getEnd() - entityMention.getBegin())) / 2;
            return jCas.getDocumentText().substring(Math.max(0, entityMention.getBegin() - end), Math.min(jCas.getDocumentText().length(), entityMention.getEnd() + end + 1)).trim();
        }
        ArrayList arrayList = (ArrayList) UIMAUtils.getAnnotations(jCas, entityMention, Token.class);
        Token containingAnnotation = (arrayList == null || arrayList.size() <= 0) ? UIMAUtils.getContainingAnnotation(jCas, entityMention, Token.class) : (Token) arrayList.get(0);
        if (containingAnnotation == null) {
            containingAnnotation = JCoReAnnotationTools.getOverlappingAnnotation(jCas, Token.class.getCanonicalName(), entityMention.getBegin(), entityMention.getEnd());
        }
        it.moveTo(containingAnnotation);
        int maxClauseCount = BooleanQuery.getMaxClauseCount();
        if (i > 0) {
            i2 = i / 2;
            i3 = i;
        } else {
            i2 = maxClauseCount / 2;
            i3 = maxClauseCount;
        }
        while (it.isValid() && i2 > 0) {
            i2--;
            it.moveToPrevious();
        }
        if (!it.isValid()) {
            it.moveToFirst();
        }
        for (int i5 = 0; it.hasNext() && i5 < i3; i5++) {
            sb.append(((Token) it.next()).getCoveredText());
            sb.append(" ");
        }
        if (sb == null || sb.length() == 0) {
            LOGGER.error("ContextString for the Query is empty");
            return null;
        }
        LOGGER.debug("semantic index search context: " + sb);
        LOGGER.debug("----" + sb + "-----");
        return sb.toString();
    }

    private static void between(int i, int i2) {
    }

    /* JADX WARN: Code restructure failed: missing block: B:10:0x0055, code lost:
    
        de.julielab.jules.ae.genemapping.utils.ContextUtils.LOGGER.warn("makeContextQuery() - context too long, cut after " + r0 + " tokens");
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public static org.apache.lucene.search.BooleanQuery makeContextQuery(java.lang.String r8) throws java.io.IOException {
        /*
            org.apache.lucene.search.BooleanQuery$Builder r0 = new org.apache.lucene.search.BooleanQuery$Builder
            r1 = r0
            r1.<init>()
            r9 = r0
            int r0 = org.apache.lucene.search.BooleanQuery.getMaxClauseCount()
            r10 = r0
            r0 = r8
            org.apache.lucene.analysis.TokenStream r0 = getStemmingStream(r0)
            r11 = r0
            r0 = r11
            r0.reset()     // Catch: java.lang.Throwable -> L71
            r0 = r11
            java.lang.Class<org.apache.lucene.analysis.tokenattributes.CharTermAttribute> r1 = org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class
            org.apache.lucene.util.Attribute r0 = r0.addAttribute(r1)     // Catch: java.lang.Throwable -> L71
            org.apache.lucene.analysis.tokenattributes.CharTermAttribute r0 = (org.apache.lucene.analysis.tokenattributes.CharTermAttribute) r0     // Catch: java.lang.Throwable -> L71
            r12 = r0
            r0 = 0
            r13 = r0
        L23:
            r0 = r11
            boolean r0 = r0.incrementToken()     // Catch: java.lang.Throwable -> L71
            if (r0 == 0) goto L66
            r0 = r13
            r1 = r10
            r2 = 1
            int r1 = r1 - r2
            if (r0 >= r1) goto L55
            r0 = r9
            org.apache.lucene.search.TermQuery r1 = new org.apache.lucene.search.TermQuery     // Catch: java.lang.Throwable -> L71
            r2 = r1
            org.apache.lucene.index.Term r3 = new org.apache.lucene.index.Term     // Catch: java.lang.Throwable -> L71
            r4 = r3
            java.lang.String r5 = "indexed_context"
            r6 = r12
            java.lang.String r6 = r6.toString()     // Catch: java.lang.Throwable -> L71
            r4.<init>(r5, r6)     // Catch: java.lang.Throwable -> L71
            r2.<init>(r3)     // Catch: java.lang.Throwable -> L71
            org.apache.lucene.search.BooleanClause$Occur r2 = org.apache.lucene.search.BooleanClause.Occur.SHOULD     // Catch: java.lang.Throwable -> L71
            org.apache.lucene.search.BooleanQuery$Builder r0 = r0.add(r1, r2)     // Catch: java.lang.Throwable -> L71
            int r13 = r13 + 1
            goto L23
        L55:
            org.slf4j.Logger r0 = de.julielab.jules.ae.genemapping.utils.ContextUtils.LOGGER     // Catch: java.lang.Throwable -> L71
            r1 = r10
            java.lang.String r1 = "makeContextQuery() - context too long, cut after " + r1 + " tokens"     // Catch: java.lang.Throwable -> L71
            r0.warn(r1)     // Catch: java.lang.Throwable -> L71
            goto L66
        L66:
            r0 = r11
            if (r0 == 0) goto L8a
            r0 = r11
            r0.close()
            goto L8a
        L71:
            r12 = move-exception
            r0 = r11
            if (r0 == 0) goto L87
            r0 = r11
            r0.close()     // Catch: java.lang.Throwable -> L7e
            goto L87
        L7e:
            r13 = move-exception
            r0 = r12
            r1 = r13
            r0.addSuppressed(r1)
        L87:
            r0 = r12
            throw r0
        L8a:
            org.slf4j.Logger r0 = de.julielab.jules.ae.genemapping.utils.ContextUtils.LOGGER
            r1 = r9
            java.lang.String r1 = "makeContextQuery() - query for disambiguation: " + r1
            r0.debug(r1)
            r0 = r9
            org.apache.lucene.search.BooleanQuery r0 = r0.build()
            return r0
        */
        throw new UnsupportedOperationException("Method not decompiled: de.julielab.jules.ae.genemapping.utils.ContextUtils.makeContextQuery(java.lang.String):org.apache.lucene.search.BooleanQuery");
    }
}
