package de.julielab.jules.ae.genemapping.disambig;

import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.wcohen.ss.Jaccard;
import de.julielab.jules.ae.genemapping.SynHit;
import de.julielab.jules.ae.genemapping.index.ContextIndexFieldNames;
import de.julielab.jules.ae.genemapping.utils.ContextUtils;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/julielab/jules/ae/genemapping/disambig/SemanticContextIndex.class */
public class SemanticContextIndex implements SemanticIndex {
    private static final Logger LOGGER = LoggerFactory.getLogger(SemanticContextIndex.class);
    private static ConcurrentHashMap<String, Cache<String, String>> caches = new ConcurrentHashMap<>();
    public IndexSearcher searcher;
    boolean debug = false;
    private Cache<String, String> geneContextCache;

    public SemanticContextIndex(File file) throws IOException {
        this.searcher = null;
        this.searcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(file.toPath())));
        this.searcher.setSimilarity(new ClassicSimilarity());
        LOGGER.info("using " + file.getAbsolutePath() + " as semantic disambiguation index with " + this.searcher.getIndexReader().numDocs() + " gene entries");
        String canonicalPath = file.getCanonicalPath();
        synchronized (caches) {
            this.geneContextCache = caches.get(canonicalPath);
            if (this.geneContextCache == null) {
                LOGGER.info("Creating new gene context cache for index {}", canonicalPath);
                this.geneContextCache = CacheBuilder.newBuilder().maximumSize(10000L).expireAfterWrite(10L, TimeUnit.MINUTES).build();
                if (null != caches.put(canonicalPath, this.geneContextCache)) {
                    throw new IllegalStateException("There already is a candidate index for " + canonicalPath + " which points to a faulty concurrency implementation");
                }
            } else {
                LOGGER.info("Using existing gene context cache for index {}", canonicalPath);
            }
        }
    }

    public SynHit doDisambiguation(List<SynHit> list, Query query) throws IOException {
        return doDisambiguation(list, query, 0.0d);
    }

    public SynHit doDisambiguation(List<SynHit> list, Query query, double d) throws IOException {
        ArrayList arrayList = new ArrayList();
        HashMap hashMap = new HashMap();
        ScoreDoc[] scoreDocArr = getContextForSynHits(list, hashMap, query).scoreDocs;
        for (int i = 0; i < scoreDocArr.length; i++) {
            SynHit synHit = (SynHit) hashMap.get(this.searcher.doc(scoreDocArr[i].doc).getField(ContextIndexFieldNames.LOOKUP_ID_FIELD).stringValue());
            synHit.setSemanticScore(scoreDocArr[i].score);
            synHit.setCompareType(SynHit.CompareType.SEMSCORE);
            arrayList.add(synHit);
            LOGGER.debug("hit: " + synHit.toString());
            LOGGER.debug("TFIDF semantic score is: " + scoreDocArr[i].score);
        }
        Collections.sort(arrayList);
        if (arrayList.size() == 0) {
            return null;
        }
        SynHit synHit2 = (SynHit) arrayList.get(0);
        LOGGER.debug("doDisambiguation() - bestHit: " + synHit2);
        if (synHit2.getSemanticScore() >= d) {
            return synHit2;
        }
        return null;
    }

    public Map<String, String> retrieveGeneContexts(List<SynHit> list) throws IOException, CorruptIndexException {
        HashMap hashMap = new HashMap();
        ArrayList arrayList = new ArrayList();
        for (SynHit synHit : list) {
            String str = (String) this.geneContextCache.getIfPresent(synHit.getId());
            if (str == null) {
                arrayList.add(synHit);
            } else {
                hashMap.put(synHit.getId(), str);
            }
        }
        if (!arrayList.isEmpty()) {
            for (ScoreDoc scoreDoc : getContextForSynHits(arrayList, new HashMap(), null).scoreDocs) {
                Document doc = getSemanticIndexSearcher().doc(scoreDoc.doc);
                String stringValue = doc.getField(ContextIndexFieldNames.LOOKUP_ID_FIELD).stringValue();
                String stringValue2 = doc.getField(ContextIndexFieldNames.LOOKUP_CONTEXT_FIELD).stringValue();
                hashMap.put(stringValue, stringValue2);
                this.geneContextCache.put(stringValue, stringValue2);
            }
        }
        return hashMap;
    }

    public TopDocs getContextForSynHits(List<SynHit> list, Map<String, SynHit> map, Query query) throws IOException {
        String[] strArr = new String[list.size()];
        for (int i = 0; i < list.size(); i++) {
            SynHit synHit = list.get(i);
            String id = synHit.getId();
            map.put(id, synHit);
            strArr[i] = id;
        }
        LOGGER.debug("number of IDs: " + strArr.length);
        Query makeQuery = makeQuery(strArr, query);
        LOGGER.debug("query: " + makeQuery);
        TopDocs search = this.searcher.search(makeQuery, list.size());
        LOGGER.debug("scoredDocs.length: {}", Integer.valueOf(search.scoreDocs.length));
        return search;
    }

    public BooleanQuery makeContextQuery(String str) throws IOException {
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        TokenStream stemmingStream = ContextUtils.getStemmingStream(str);
        try {
            CharTermAttribute addAttribute = stemmingStream.addAttribute(CharTermAttribute.class);
            while (stemmingStream.incrementToken()) {
                builder.add(new TermQuery(new Term(ContextIndexFieldNames.LOOKUP_CONTEXT_FIELD, addAttribute.toString())), BooleanClause.Occur.SHOULD);
            }
            if (stemmingStream != null) {
                stemmingStream.close();
            }
            return builder.build();
        } catch (Throwable th) {
            if (stemmingStream != null) {
                try {
                    stemmingStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public Query makeQuery(String[] strArr, Query query) throws IOException {
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        for (String str : strArr) {
            builder.add(new TermQuery(new Term(ContextIndexFieldNames.LOOKUP_ID_FIELD, str)), BooleanClause.Occur.SHOULD);
            LOGGER.debug("makeQuery() - id added to idQuery: " + str);
        }
        BooleanQuery.Builder builder2 = new BooleanQuery.Builder();
        builder2.add(query, BooleanClause.Occur.MUST);
        builder2.add(builder.build(), BooleanClause.Occur.FILTER);
        return builder2.build();
    }

    public BooleanQuery makeQuery(String str, BooleanQuery booleanQuery) throws IOException {
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        builder.add(booleanQuery, BooleanClause.Occur.MUST);
        builder.add(new TermQuery(new Term(ContextIndexFieldNames.LOOKUP_ID_FIELD, str)), BooleanClause.Occur.FILTER);
        LOGGER.debug("makeQuery() - id added to idQuery: " + str);
        return builder.build();
    }

    public SynHit doSeparateDisambiguation(ArrayList<SynHit> arrayList, String str, double d) throws IOException {
        String[] strArr = new String[arrayList.size()];
        new HashMap();
        for (int i = 0; i < arrayList.size(); i++) {
            SynHit synHit = arrayList.get(i);
            String id = synHit.getId();
            StringBuilder sb = new StringBuilder();
            TokenStream stemmingStream = ContextUtils.getStemmingStream(str);
            try {
                CharTermAttribute addAttribute = stemmingStream.addAttribute(CharTermAttribute.class);
                while (stemmingStream.incrementToken()) {
                    sb.append(addAttribute.toString());
                    sb.append(" ");
                }
                if (stemmingStream != null) {
                    stemmingStream.close();
                }
                sb.deleteCharAt(sb.length() - 1);
                TopDocs search = this.searcher.search(makeSeparateQuery(id, sb.toString()), arrayList.size());
                double maxScore = search.getMaxScore();
                ScoreDoc[] scoreDocArr = search.scoreDocs;
                for (int i2 = 0; scoreDocArr.length > 0 && i2 < 1; i2++) {
                    System.out.println(new Jaccard().explainScore(str, this.searcher.doc(scoreDocArr[i2].doc).getField(ContextIndexFieldNames.LOOKUP_CONTEXT_FIELD).stringValue()));
                }
                synHit.setSemanticScore(maxScore);
                LOGGER.debug("doSeparateDisambiguation() - next hit: " + synHit.toString());
                LOGGER.debug("doSeparateDisambiguation() - semantic score is: " + maxScore);
                LOGGER.debug("doSeaprateDisambiguation() - semantic score is: " + maxScore);
            } catch (Throwable th) {
                if (stemmingStream != null) {
                    try {
                        stemmingStream.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
                throw th;
            }
        }
        Iterator<SynHit> it = arrayList.iterator();
        while (it.hasNext()) {
            it.next().setCompareType(SynHit.CompareType.SEMSCORE);
        }
        Collections.sort(arrayList);
        SynHit synHit2 = arrayList.get(0);
        if (synHit2.getSemanticScore() >= d) {
            return synHit2;
        }
        return null;
    }

    public BooleanQuery makeSeparateQuery(String str, String str2) throws IOException {
        TermQuery termQuery = new TermQuery(new Term(ContextIndexFieldNames.LOOKUP_ID_FIELD, str));
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        builder.add(termQuery, BooleanClause.Occur.MUST);
        TokenStream stemmingStream = ContextUtils.getStemmingStream(str2);
        try {
            CharTermAttribute addAttribute = stemmingStream.addAttribute(CharTermAttribute.class);
            while (stemmingStream.incrementToken()) {
                builder.add(new TermQuery(new Term(ContextIndexFieldNames.LOOKUP_CONTEXT_FIELD, addAttribute.toString())), BooleanClause.Occur.SHOULD);
            }
            if (stemmingStream != null) {
                stemmingStream.close();
            }
            return builder.build();
        } catch (Throwable th) {
            if (stemmingStream != null) {
                try {
                    stemmingStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public IndexSearcher getSemanticIndexSearcher() {
        return this.searcher;
    }
}
