package de.julielab.gene.candidateretrieval;

import com.google.common.cache.LoadingCache;
import de.julielab.gene.candidateretrieval.scoring.LuceneScorer;
import de.julielab.gene.candidateretrieval.scoring.MaxEntScorer;
import de.julielab.geneexpbase.GeneExpRuntimeException;
import de.julielab.geneexpbase.TermNormalizer;
import de.julielab.geneexpbase.candidateretrieval.CandidateCacheKey;
import de.julielab.geneexpbase.candidateretrieval.GeneCandidateRetrievalException;
import de.julielab.geneexpbase.candidateretrieval.QueryGenerator;
import de.julielab.geneexpbase.candidateretrieval.SynHit;
import de.julielab.geneexpbase.configuration.Parameters;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.genemodel.GeneName;
import de.julielab.geneexpbase.scoring.JaroWinklerScorer;
import de.julielab.geneexpbase.scoring.LevenshteinScorer;
import de.julielab.geneexpbase.scoring.Scorer;
import de.julielab.geneexpbase.scoring.SimpleScorer;
import de.julielab.geneexpbase.scoring.TFIDFScorer;
import de.julielab.geneexpbase.scoring.TFIDFUtils;
import de.julielab.geneexpbase.scoring.TokenJaroSimilarityScorer;
import de.julielab.geneexpbase.services.CacheService;
import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.cache.Cache;
import javax.inject.Inject;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.lucene.analysis.custom.CustomAnalyzer;
import org.apache.lucene.analysis.ngram.NGramFilterFactory;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.QueryBuilder;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/julielab/gene/candidateretrieval/NameCentricRetrieval.class */
public class NameCentricRetrieval implements CandidateRetrieval {
    public static final String NAME_PRIO_DELIMITER = "__";
    public static final String LOGGER_NAME_CANDIDATES = "de.julielab.jules.ae.genemapper.candidates";
    public static final int SIMPLE_SCORER = 0;
    public static final int TOKEN_JAROWINKLER_SCORER = 1;
    public static final int MAXENT_SCORER = 2;
    public static final int JAROWINKLER_SCORER = 3;
    public static final int LEVENSHTEIN_SCORER = 4;
    public static final int TFIDF = 5;
    public static final int LUCENE_SCORER = 10;
    public static final String MAXENT_SCORER_MODEL = "/genemapper_jules_mallet.mod";
    public static final int LUCENE_MAX_HITS = 20;
    private static ExecutorService executorService;
    private final Map<String, Float> globalFieldWeights;
    private final Scorer exactScorer;
    private final Scorer approxScorer;
    private final Cache<CandidateCacheKey, List> candidateCache;
    private final Configuration configuration;
    private final IndexReader geneRecordIndexReader;
    private final IndexReader nameCentricIndexReader;
    private final Boolean useLuceneCandidateCache;
    private CustomAnalyzer ngramAnalyzer;
    private String maxEntModel;
    private TermNormalizer normalizer;
    private SpellChecker spellingChecker;
    public static final QueryGenerator CONJUNCTION = new BooleanQueryGenerator(BooleanClause.Occur.MUST, 0);
    public static final QueryGenerator DISJUNCTION = new BooleanQueryGenerator(BooleanClause.Occur.SHOULD, -1);
    public static final QueryGenerator DISJUNCTION_MINUS_1 = new BooleanQueryGenerator(BooleanClause.Occur.SHOULD, 1);
    public static final QueryGenerator DISJUNCTION_MINUS_2 = new BooleanQueryGenerator(BooleanClause.Occur.SHOULD, 2);
    public static final QueryGenerator NGRAM_2_3 = new NGramQueryGenerator(2, 3);
    public static final Logger candidateLog = LoggerFactory.getLogger("de.julielab.jules.ae.genemapper.candidates");
    private static final Logger log = LoggerFactory.getLogger(LuceneCandidateRetrieval.class);
    private static final ConcurrentHashMap<String, LoadingCache<CandidateCacheKey, List<SynHit>>> caches = new ConcurrentHashMap<>();
    private static final AtomicLong totalCacheGettime = new AtomicLong();
    private static final AtomicLong totalCachePuttime = new AtomicLong();
    private static final AtomicLong totalLuceneQueryTime = new AtomicLong();
    private static final Map<Thread, IndexSearcher> mentionIndexSearcher = new ConcurrentHashMap();
    private static final Map<Thread, IndexSearcher> geneRecordIndexSearcher = new ConcurrentHashMap();

    @Inject
    public NameCentricRetrieval(Configuration configuration, CacheService cacheService) throws GeneCandidateRetrievalException {
        this.maxEntModel = "/genemapper_jules_mallet.mod";
        this.configuration = configuration;
        this.useLuceneCandidateCache = (Boolean) configuration.getBoolean(Configuration.USE_LUCENE_CANDIDATES_CACHE).orElse(false);
        String property = configuration.getProperty(Configuration.NAME_CENTRIC_INDEX);
        if (property == null) {
            throw new GeneCandidateRetrievalException("mention index not specified in configuration file (critical).");
        }
        String property2 = configuration.getProperty(Configuration.GENE_RECORD_INDEX);
        if (property2 == null) {
            throw new GeneCandidateRetrievalException("geneRecordIndex index not specified in configuration file (critical).");
        }
        try {
            this.nameCentricIndexReader = DirectoryReader.open(FSDirectory.open(Paths.get(property, new String[0])));
            int parseInt = Integer.parseInt((String) configuration.getOrDefault("concurrency_level", "1"));
            synchronized (LuceneCandidateRetrieval.class) {
                if (executorService == null) {
                    executorService = Executors.newFixedThreadPool(parseInt, new ThreadFactory() { // from class: de.julielab.gene.candidateretrieval.NameCentricRetrieval.1
                        private final ThreadFactory tf = Executors.defaultThreadFactory();

                        @Override // java.util.concurrent.ThreadFactory
                        public Thread newThread(@NotNull Runnable runnable) {
                            Thread newThread = this.tf.newThread(runnable);
                            newThread.setName("pool-lucene-candidate-retrieval");
                            return newThread;
                        }
                    });
                }
            }
            log.debug("mention index loaded.");
            this.geneRecordIndexReader = DirectoryReader.open(FSDirectory.open(Paths.get(property2, new String[0])));
            log.info("Gene record index has {} segments", Integer.valueOf(this.geneRecordIndexReader.getSegmentInfos().size()));
            String property3 = configuration.getProperty(Configuration.SPELLING_INDEX);
            if (property3 != null) {
                File file = new File(property3);
                if (file.exists()) {
                    this.spellingChecker = new SpellChecker(FSDirectory.open(file.toPath()));
                }
            }
            if (this.spellingChecker == null) {
                log.warn("Spelling index was not given or file does not exist. No spelling correction can be done. Specified spelling index: {}", property3);
            }
            String property4 = configuration.getProperty(Configuration.EXACT_SCORER_TYPE);
            if (property4 == null) {
                log.debug("No configuration value given for exact_scorer_type");
                this.exactScorer = setScorerType(10);
            } else {
                this.exactScorer = setScorerType(Integer.valueOf(property4).intValue());
            }
            String property5 = configuration.getProperty(Configuration.APPROX_SCORER_TYPE);
            if (property5 == null) {
                log.debug("No configuration value given for approx_scorer_type");
                this.approxScorer = setScorerType(10);
            } else {
                this.approxScorer = setScorerType(Integer.valueOf(property5).intValue());
            }
            String property6 = configuration.getProperty("maxent_model");
            if (property6 != null) {
                this.maxEntModel = property6;
            }
            this.normalizer = new TermNormalizer();
            log.info("Mention index: " + property);
            log.info("Exact scorer: " + this.exactScorer);
            log.info("Approx scorer: " + this.approxScorer);
            this.candidateCache = cacheService.getCacheManager().getCache("candidates-cache-name-centric");
            try {
                HashMap hashMap = new HashMap();
                hashMap.put("minGramSize", "2");
                hashMap.put("maxGramSize", "3");
                this.ngramAnalyzer = CustomAnalyzer.builder().withTokenizer("whitespace", new String[0]).addTokenFilter(NGramFilterFactory.class, hashMap).build();
            } catch (IOException e) {
                e.printStackTrace();
            }
            this.globalFieldWeights = new HashMap();
            configuration.getDouble(Configuration.dot(new String[]{Configuration.PREFIX_CANDIDATE_RETRIEVAL, Configuration.PARAM_DISMAX_TIE_BREAKER})).ifPresent(d -> {
                this.globalFieldWeights.put(Configuration.PARAM_DISMAX_TIE_BREAKER, Float.valueOf((float) d));
            });
            for (String str : GeneRecordQueryGenerator.ALL_FIELDS) {
                configuration.getDouble(Configuration.dot(new String[]{Configuration.PREFIX_CANDIDATE_RETRIEVAL, str})).ifPresent(d2 -> {
                    this.globalFieldWeights.put(str, Float.valueOf((float) d2));
                });
            }
            for (String str2 : GeneRecordQueryGenerator.SYNONYM_FIELDS) {
                configuration.getDouble(Configuration.dot(new String[]{Configuration.PREFIX_CANDIDATE_RETRIEVAL, str2 + "_exact"})).ifPresent(d3 -> {
                    this.globalFieldWeights.put(str2, Float.valueOf((float) d3));
                });
            }
        } catch (IOException e2) {
            throw new GeneCandidateRetrievalException(e2);
        }
    }

    public static AtomicLong getTotalCacheGettime() {
        return totalCacheGettime;
    }

    public static AtomicLong getTotalCachePuttime() {
        return totalCachePuttime;
    }

    public static AtomicLong getTotalLuceneQueryTime() {
        return totalLuceneQueryTime;
    }

    public static void shutdownExecutor() {
        executorService.shutdown();
    }

    public TermNormalizer getNormalizer() {
        return this.normalizer;
    }

    public void setNormalizer(TermNormalizer termNormalizer) {
        this.normalizer = termNormalizer;
    }

    public Scorer getScorer() {
        return this.exactScorer;
    }

    @Override // de.julielab.gene.candidateretrieval.CandidateRetrieval
    public SpellChecker getSpellingChecker() {
        return this.spellingChecker;
    }

    public Scorer setScorerType(int i) throws GeneCandidateRetrievalException {
        SimpleScorer tFIDFScorer;
        if (i == 0) {
            tFIDFScorer = new SimpleScorer();
        } else if (i == 1) {
            tFIDFScorer = new TokenJaroSimilarityScorer();
        } else if (i == 2) {
            tFIDFScorer = !this.maxEntModel.equals("/genemapper_jules_mallet.mod") ? new MaxEntScorer(new File(this.maxEntModel)) : new MaxEntScorer(getClass().getResourceAsStream("/genemapper_jules_mallet.mod"));
        } else if (i == 3) {
            tFIDFScorer = new JaroWinklerScorer();
        } else if (i == 10) {
            tFIDFScorer = new LuceneScorer();
        } else if (i == 4) {
            tFIDFScorer = new LevenshteinScorer();
        } else {
            if (i != 5) {
                throw new GeneCandidateRetrievalException("Unknown mention scorer type: " + i);
            }
            Thread.currentThread();
            TFIDFUtils tFIDFUtils = new TFIDFUtils();
            tFIDFUtils.learnFromLuceneIndex(this.nameCentricIndexReader, SynonymIndexFieldNames.LOOKUP_SYN_FIELD);
            tFIDFScorer = new TFIDFScorer(tFIDFUtils);
        }
        return tFIDFScorer;
    }

    public String getScorerInfo() {
        return this.exactScorer == null ? "Lucene Score (unnormalized)" : this.exactScorer.info();
    }

    public int getScorerType() {
        return this.exactScorer.getScorerType();
    }

    @Override // de.julielab.gene.candidateretrieval.CandidateRetrieval
    public List<SynHit> getCandidates(String str, QueryGenerator queryGenerator) {
        return getCandidates(new GeneMention(str, this.normalizer), queryGenerator);
    }

    public List<SynHit> getCandidates(GeneMention geneMention, QueryGenerator queryGenerator) {
        return getCandidates(geneMention, geneMention.getTaxonomyIds(), queryGenerator);
    }

    public List<SynHit> getCandidates(GeneMention geneMention, Collection<String> collection, QueryGenerator queryGenerator) {
        return getCandidates(geneMention, (Collection<String>) null, collection != null ? collection : Collections.emptyList(), queryGenerator);
    }

    public List<SynHit> getCandidates(GeneMention geneMention, Collection<String> collection, Collection<String> collection2, QueryGenerator queryGenerator) {
        return getCandidates(geneMention, collection, collection2, true, null, queryGenerator);
    }

    @Override // de.julielab.gene.candidateretrieval.CandidateRetrieval
    public List<SynHit> getCandidates(GeneMention geneMention, Collection<String> collection, Collection<String> collection2, boolean z, Parameters parameters, QueryGenerator queryGenerator) {
        return getCandidates(geneMention, collection, collection2, z, parameters, -1, queryGenerator);
    }

    public List<SynHit> getCandidates(GeneMention geneMention, Collection<String> collection, Collection<String> collection2, boolean z, Parameters parameters, int i, QueryGenerator queryGenerator) {
        List<SynHit> arrayList = new ArrayList();
        CandidateCacheKey candidateCacheKey = new CandidateCacheKey(geneMention.getGeneName());
        candidateCacheKey.setLoadSynHitFields(z);
        candidateCacheKey.setQueryGenerator(queryGenerator);
        if (parameters != null && parameters.getBoolean(Configuration.dot(new String[]{Configuration.PREFIX_CANDIDATE_RETRIEVAL, Configuration.PARAM_USE_QUERY_FIELD_WEIGHTS}))) {
            candidateCacheKey.setFieldWeights(getFieldWeightsFromParameters(parameters));
        }
        if ((queryGenerator instanceof GeneRecordQueryGenerator) && ((GeneRecordQueryGenerator) queryGenerator).isUseContextGenesAsRelevanceSignal()) {
            candidateCacheKey.setContextNames((Collection) geneMention.getContextGeneNames().collect(Collectors.toSet()));
        }
        if (collection != null) {
            candidateCacheKey.setGeneIdsFilter(collection);
        }
        if (i > 0) {
            candidateCacheKey.setMaxHits(i);
        }
        if (collection2 == null || collection2.isEmpty()) {
            arrayList = getCandidatesFromIndex(candidateCacheKey);
            if (log.isDebugEnabled()) {
                log.debug("Returning {} candidates for gene mention {}[{}-{}]", new Object[]{Integer.valueOf(arrayList.size()), candidateCacheKey.getGeneName().getText(), Integer.valueOf(geneMention.getOffsets() != null ? geneMention.getBegin() : -1), Integer.valueOf(geneMention.getOffsets() != null ? geneMention.getEnd() : -1)});
            }
        }
        if (collection2 != null) {
            Iterator<String> it = collection2.iterator();
            while (it.hasNext()) {
                candidateCacheKey.setTaxId(it.next());
                arrayList.addAll(getCandidatesFromIndex(candidateCacheKey));
                if (log.isDebugEnabled()) {
                    int i2 = -1;
                    int i3 = -1;
                    if (geneMention.getOffsets() != null) {
                        i2 = geneMention.getBegin();
                        i3 = geneMention.getEnd();
                    }
                    log.debug("Returning {} candidates for gene mention {}[{}-{}] for taxonomy ID {}", new Object[]{Integer.valueOf(arrayList.size()), candidateCacheKey.getGeneName().getText(), Integer.valueOf(i2), Integer.valueOf(i3), collection2});
                }
            }
        }
        arrayList.stream().forEach(synHit -> {
            synHit.setCompareType(SynHit.CompareType.SCORE);
        });
        return (List) arrayList.stream().sorted().collect(Collectors.toList());
    }

    @Override // de.julielab.gene.candidateretrieval.CandidateRetrieval
    public List<SynHit> getCandidates(String str, Collection<String> collection, Collection<String> collection2, QueryGenerator queryGenerator) {
        return getCandidates(new GeneMention(str, this.normalizer), collection, collection2, queryGenerator);
    }

    public List<SynHit> getCandidates(String str, Collection<String> collection, Collection<String> collection2, boolean z, QueryGenerator queryGenerator) {
        return getCandidates(new GeneMention(str, this.normalizer), collection, collection2, queryGenerator);
    }

    private List<SynHit> getCandidatesFromIndex(CandidateCacheKey candidateCacheKey) {
        List<SynHit> list = this.useLuceneCandidateCache.booleanValue() ? (List) this.candidateCache.get(candidateCacheKey) : null;
        if (list == null) {
            try {
                list = getCandidatesFromIndexWithoutCache(candidateCacheKey);
                this.candidateCache.put(candidateCacheKey, list);
            } catch (IOException e) {
                throw new GeneExpRuntimeException(e);
            }
        }
        try {
            return (List) list.stream().map((v0) -> {
                return v0.clone();
            }).collect(Collectors.toList());
        } catch (Exception e2) {
            e2.printStackTrace();
            return null;
        }
    }

    private List<SynHit> getCandidatesFromIndexWithoutCache(CandidateCacheKey candidateCacheKey) throws IOException, BooleanQuery.TooManyClauses {
        long currentTimeMillis = System.currentTimeMillis();
        Query generateQuery = candidateCacheKey.generateQuery();
        TopDocs search = getNameCentricIndexSearcher().search(generateQuery, candidateCacheKey.getMaxHits());
        log.debug("searching with query: " + generateQuery + "; found hits: " + search.totalHits);
        ArrayList<SynHit> scoreHits = scoreHits(search, candidateCacheKey.getGeneName(), candidateCacheKey.isLoadSynHitFields());
        totalLuceneQueryTime.addAndGet(System.currentTimeMillis() - currentTimeMillis);
        return scoreHits;
    }

    private ArrayList<SynHit> scoreHits(TopDocs topDocs, GeneName geneName, boolean z) throws CorruptIndexException, IOException {
        ArrayList<SynHit> arrayList = new ArrayList<>();
        String lowerCase = geneName.getText().toLowerCase();
        String normalizedText = geneName.getNormalizedText();
        ScoreDoc[] scoreDocArr = topDocs.scoreDocs;
        log.debug("ordering candidates for best match to this reference term: " + lowerCase + " for top " + scoreDocArr.length + " candidates");
        candidateLog.trace("Search term: " + normalizedText);
        IndexSearcher nameCentricIndexSearcher = getNameCentricIndexSearcher();
        for (int i = 0; i < scoreDocArr.length; i++) {
            Document doc = nameCentricIndexSearcher.doc(scoreDocArr[i].doc);
            String stringValue = doc.getField(SynonymIndexFieldNames.LOOKUP_SYN_FIELD).stringValue();
            ArrayList arrayList2 = new ArrayList();
            ArrayList arrayList3 = new ArrayList();
            String str = null;
            String str2 = null;
            List emptyList = Collections.emptyList();
            Arrays.stream(doc.getFields(SynonymIndexFieldNames.ID_FIELD)).map((v0) -> {
                return v0.stringValue();
            }).map(str3 -> {
                return str3.split("__");
            }).forEach(strArr -> {
                arrayList2.add(strArr[0]);
                arrayList3.add(Integer.valueOf(strArr[1]));
            });
            if (z) {
                str = doc.getField(SynonymIndexFieldNames.SOURCE).stringValue();
                str2 = doc.getField(SynonymIndexFieldNames.ENTITY_TYPE).stringValue();
                emptyList = (List) Arrays.stream(doc.getFields(SynonymIndexFieldNames.TAX_ID_FIELD)).map((v0) -> {
                    return v0.stringValue();
                }).collect(Collectors.toList());
            }
            Scorer scorer = stringValue.equals(normalizedText) ? this.exactScorer : this.approxScorer;
            double score = scorer.getScorerType() == 10 ? stringValue.equals(normalizedText) ? 9999.0d : scoreDocArr[i].score : scorer.getScore(normalizedText, stringValue);
            SynHit synHit = new SynHit(stringValue, score, arrayList2, str, str2, emptyList);
            synHit.setMappedMention(lowerCase);
            synHit.setMappedGeneName(geneName);
            synHit.setSynonymPriorities(arrayList3);
            synHit.setLuceneScore(scoreDocArr[i].score);
            if (score == 9999.0d) {
                synHit.setExactMatch(true);
            }
            arrayList.add(synHit);
        }
        return arrayList;
    }

    private List<SynHit> combineHits(List<SynHit> list) {
        HashMap hashMap = new HashMap();
        for (SynHit synHit : list) {
            for (String str : synHit.getIds()) {
                if (hashMap.containsKey(str)) {
                    if (synHit.getLexicalScore() >= ((SynHit) hashMap.get(str)).getLexicalScore()) {
                        hashMap.put(str, synHit);
                    }
                } else {
                    hashMap.put(str, synHit);
                }
            }
        }
        HashSet hashSet = new HashSet();
        Iterator it = hashMap.keySet().iterator();
        while (it.hasNext()) {
            SynHit synHit2 = (SynHit) hashMap.get((String) it.next());
            synHit2.setCompareType(SynHit.CompareType.SCORE);
            hashSet.add(synHit2);
        }
        return (List) hashSet.stream().sorted().collect(Collectors.toList());
    }

    @Override // de.julielab.gene.candidateretrieval.CandidateRetrieval
    public List<SynHit> getCandidates(GeneMention geneMention, String str, QueryGenerator queryGenerator) {
        return getCandidates(geneMention, str != null ? Arrays.asList(str) : Collections.emptyList(), queryGenerator);
    }

    @Override // de.julielab.gene.candidateretrieval.CandidateRetrieval
    public List<SynHit> getCandidates(String str, String str2, QueryGenerator queryGenerator) {
        return getCandidates(new GeneMention(str, this.normalizer), str2 != null ? Arrays.asList(str2) : Collections.emptyList(), queryGenerator);
    }

    @Override // de.julielab.gene.candidateretrieval.CandidateRetrieval
    public List<SynHit> getCandidates(String str, Collection<String> collection, QueryGenerator queryGenerator) {
        return getCandidates(new GeneMention(str, this.normalizer), collection, queryGenerator);
    }

    public String mapGeneIdToTaxId(String str) {
        try {
            BooleanQuery build = new BooleanQuery.Builder().add(new TermQuery(new Term(SynonymIndexFieldNames.ID_FIELD, str + "__-1")), BooleanClause.Occur.MUST).add(new TermQuery(new Term(SynonymIndexFieldNames.ENTITY_TYPE, "Gene/Protein")), BooleanClause.Occur.FILTER).build();
            IndexSearcher nameCentricIndexSearcher = getNameCentricIndexSearcher();
            TopDocs search = nameCentricIndexSearcher.search(build, 1);
            ScoreDoc[] scoreDocArr = search.scoreDocs;
            if (search.totalHits.value <= 0) {
                log.warn("GeneID: " + str + " was not found in the index.");
                return "";
            }
            Document doc = nameCentricIndexSearcher.doc(scoreDocArr[0].doc);
            List list = (List) Arrays.stream(doc.getFields(SynonymIndexFieldNames.ID_FIELD)).map((v0) -> {
                return v0.stringValue();
            }).map(str2 -> {
                return str2.split("__");
            }).map(strArr -> {
                return strArr[0];
            }).collect(Collectors.toList());
            List list2 = (List) Arrays.stream(doc.getFields(SynonymIndexFieldNames.TAX_ID_FIELD)).map((v0) -> {
                return v0.stringValue();
            }).collect(Collectors.toList());
            String str3 = "";
            for (int i = 0; i < list.size(); i++) {
                if (((String) list.get(i)).equals(str)) {
                    str3 = (String) list2.get(i);
                }
            }
            if (str3.isBlank()) {
                log.warn("GeneID: " + str + " has no TaxId assigned.");
            }
            return str3;
        } catch (IOException e) {
            throw new GeneExpRuntimeException(e);
        }
    }

    public List<SynHit> getIndexEntries(List<String> list) throws IOException {
        log.warn("LuceneCandidateRetrieval.getIndexEntries(): This method currently does not work as intended since the synonym index is now synonym-centric instead of id-centric. The ID field values have the form id_priority, thus at this place a wildcard query for all priorities would be needed");
        ArrayList arrayList = new ArrayList(list.size());
        for (String str : list) {
            BooleanQuery build = new BooleanQuery.Builder().add(new BooleanClause(new TermQuery(new Term(SynonymIndexFieldNames.ID_FIELD, str + "__-1")), BooleanClause.Occur.FILTER)).build();
            IndexSearcher nameCentricIndexSearcher = getNameCentricIndexSearcher();
            TopDocs search = nameCentricIndexSearcher.search(build, 1);
            if (search.totalHits.value > 0) {
                Document doc = nameCentricIndexSearcher.doc(search.scoreDocs[0].doc);
                String stringValue = doc.getField(SynonymIndexFieldNames.SOURCE).stringValue();
                String stringValue2 = doc.getField(SynonymIndexFieldNames.ENTITY_TYPE).stringValue();
                List list2 = (List) Arrays.stream(doc.getFields(SynonymIndexFieldNames.TAX_ID_FIELD)).map((v0) -> {
                    return v0.stringValue();
                }).filter(str2 -> {
                    return !StringUtils.isBlank(str2);
                }).collect(Collectors.toList());
                if (list2.isEmpty()) {
                    log.warn("GeneID: " + str + " has no TaxId assigned.");
                }
                arrayList.add(new SynHit("<none>", 0.0d, Arrays.asList(str), stringValue, stringValue2, list2));
            }
            arrayList.add(null);
        }
        return arrayList;
    }

    public List<SynHit> getIndexRecords(Collection<String> collection) throws IOException {
        ArrayList arrayList = new ArrayList(collection.size());
        for (String str : collection) {
            BooleanQuery build = new BooleanQuery.Builder().add(new BooleanClause(new TermQuery(new Term(SynonymIndexFieldNames.ID_FIELD, str)), BooleanClause.Occur.FILTER)).build();
            IndexSearcher geneRecordIndexSearcher2 = getGeneRecordIndexSearcher();
            TopDocs search = geneRecordIndexSearcher2.search(build, 1);
            if (search.totalHits.value > 0) {
                arrayList.add(getRecordHit(true, new GeneName("<retrieved by id " + str + ">", this.normalizer), search.scoreDocs[0], geneRecordIndexSearcher2.doc(search.scoreDocs[0].doc)));
            }
        }
        return arrayList;
    }

    public List<String> getSynonyms(String str) {
        try {
            List<String> emptyList = Collections.emptyList();
            BooleanQuery build = new BooleanQuery.Builder().add(new BooleanClause(new WildcardQuery(new Term(SynonymIndexFieldNames.ID_FIELD, str + "__*")), BooleanClause.Occur.FILTER)).build();
            IndexSearcher nameCentricIndexSearcher = getNameCentricIndexSearcher();
            TopDocs search = nameCentricIndexSearcher.search(build, 200);
            if (search.totalHits.value > 0) {
                emptyList = new ArrayList(200);
                for (int i = 0; i < search.scoreDocs.length; i++) {
                    emptyList.add(nameCentricIndexSearcher.doc(search.scoreDocs[i].doc).getField(SynonymIndexFieldNames.LOOKUP_SYN_FIELD).stringValue());
                }
            }
            return emptyList;
        } catch (IOException e) {
            throw new GeneExpRuntimeException(e);
        }
    }

    public List<SynHit> scoreIdsByNGramSynonyms(String str, Set<String> set) {
        new CandidateCacheKey(new GeneName(str, this.normalizer));
        QueryBuilder queryBuilder = new QueryBuilder(this.ngramAnalyzer);
        Query createBooleanQuery = queryBuilder.createBooleanQuery(SynonymIndexFieldNames.LOOKUP_SYN_FIELD_NGRAMS, str);
        Stream<R> map = set.stream().map(str2 -> {
            return new TermQuery(new Term(SynonymIndexFieldNames.ID_FIELD, str2));
        });
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        map.forEach(termQuery -> {
            builder.add(termQuery, BooleanClause.Occur.SHOULD);
        });
        if (queryBuilder == null) {
            return Collections.emptyList();
        }
        BooleanQuery.Builder add = new BooleanQuery.Builder().add(createBooleanQuery, BooleanClause.Occur.MUST).add(builder.build(), BooleanClause.Occur.FILTER);
        ArrayList arrayList = new ArrayList();
        try {
            IndexSearcher nameCentricIndexSearcher = getNameCentricIndexSearcher();
            for (ScoreDoc scoreDoc : nameCentricIndexSearcher.search(add.build(), 1000).scoreDocs) {
                arrayList.add(new SynHit(nameCentricIndexSearcher.doc(scoreDoc.doc).getField(SynonymIndexFieldNames.LOOKUP_SYN_FIELD).stringValue(), r0.score, Collections.emptyList(), (String) null, (String) null, Collections.emptyList()));
            }
            return arrayList;
        } catch (IOException e) {
            throw new GeneExpRuntimeException(e);
        }
    }

    @Override // de.julielab.gene.candidateretrieval.CandidateRetrieval
    public List<SynHit> scoreIdsByBoWSynonyms(Collection<String> collection, Set<String> set, QueryGenerator queryGenerator) {
        CandidateCacheKey candidateCacheKey = new CandidateCacheKey(new GeneName(String.join(" ", collection), this.normalizer), (String) null);
        candidateCacheKey.setGeneIdsFilter(set);
        candidateCacheKey.setQueryGenerator(queryGenerator);
        candidateCacheKey.setMaxHits(1000);
        candidateCacheKey.setLoadSynHitFields(false);
        return getCandidatesFromIndex(candidateCacheKey);
    }

    @Override // de.julielab.gene.candidateretrieval.CandidateRetrieval
    public List<SynHit> getCandidates(GeneMention geneMention, Collection<String> collection, Parameters parameters, QueryGenerator queryGenerator) {
        throw new NotImplementedException("This method should be implemented when needed.");
    }

    public Pair<Map<String, Double>, Map<String, List<String>>> scoreSynonymsRecordIndex(Collection<GeneName> collection, Set<String> set, Function<GeneRecordHit, String[]> function, QueryGenerator queryGenerator) {
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        Iterator<GeneName> it = collection.iterator();
        while (it.hasNext()) {
            CandidateCacheKey candidateCacheKey = new CandidateCacheKey(it.next(), (String) null);
            candidateCacheKey.setGeneIdsFilter(set);
            candidateCacheKey.setQueryGenerator(queryGenerator);
            candidateCacheKey.setMaxHits(1000);
            candidateCacheKey.setLoadSynHitFields(function != null);
            for (SynHit synHit : getCandidatesFromIndex(candidateCacheKey)) {
                hashMap.merge(synHit.getId(), Double.valueOf(synHit.getLexicalScore()), (d, d2) -> {
                    return Double.valueOf(d.doubleValue() + d2.doubleValue());
                });
                if (function != null) {
                    List list = (List) hashMap2.compute(synHit.getId(), (str, list2) -> {
                        return list2 != null ? list2 : new ArrayList();
                    });
                    String[] apply = function.apply((GeneRecordHit) synHit);
                    if (apply != null) {
                        for (String str2 : apply) {
                            if (str2 != null) {
                                list.add(str2);
                            }
                        }
                    }
                }
            }
        }
        return new ImmutablePair(hashMap, hashMap2);
    }

    public List<SynHit> scoreIdsByExactSynonyms(Collection<String> collection, Set<String> set) {
        Stream<R> map = collection.stream().map(str -> {
            return new TermQuery(new Term(SynonymIndexFieldNames.LOOKUP_SYN_FIELD_EXACT, str));
        });
        Stream<R> map2 = set.stream().map(str2 -> {
            return new TermQuery(new Term(SynonymIndexFieldNames.ID_FIELD, str2));
        });
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        map.forEach(termQuery -> {
            builder.add(termQuery, BooleanClause.Occur.SHOULD);
        });
        BooleanQuery.Builder builder2 = new BooleanQuery.Builder();
        map2.forEach(termQuery2 -> {
            builder2.add(termQuery2, BooleanClause.Occur.SHOULD);
        });
        BooleanQuery.Builder add = builder.add(builder2.build(), BooleanClause.Occur.FILTER);
        ArrayList arrayList = new ArrayList();
        try {
            IndexSearcher nameCentricIndexSearcher = getNameCentricIndexSearcher();
            for (ScoreDoc scoreDoc : nameCentricIndexSearcher.search(add.build(), 1000).scoreDocs) {
                Document doc = nameCentricIndexSearcher.doc(scoreDoc.doc);
                String stringValue = doc.getField(SynonymIndexFieldNames.LOOKUP_SYN_FIELD).stringValue();
                List emptyList = Collections.emptyList();
                ArrayList arrayList2 = new ArrayList();
                Arrays.stream(doc.getFields(SynonymIndexFieldNames.ID_FIELD)).map((v0) -> {
                    return v0.stringValue();
                }).map(str3 -> {
                    return str3.split("__");
                }).forEach(strArr -> {
                    arrayList2.add(strArr[0]);
                });
                arrayList.add(new SynHit(stringValue, r0.score, arrayList2, (String) null, (String) null, emptyList));
            }
            return arrayList;
        } catch (IOException e) {
            throw new GeneExpRuntimeException(e);
        }
    }

    public List<String> getPriorityNames(String str, int i) {
        try {
            List<String> emptyList = Collections.emptyList();
            BooleanQuery build = new BooleanQuery.Builder().add(new BooleanClause(new TermQuery(new Term(SynonymIndexFieldNames.ID_FIELD, str + "__" + i)), BooleanClause.Occur.FILTER)).build();
            IndexSearcher nameCentricIndexSearcher = getNameCentricIndexSearcher();
            TopDocs search = nameCentricIndexSearcher.search(build, 1);
            if (search.totalHits.value > 0) {
                emptyList = new ArrayList(1);
                for (int i2 = 0; i2 < search.scoreDocs.length; i2++) {
                    emptyList.add(nameCentricIndexSearcher.doc(search.scoreDocs[i2].doc).getField(SynonymIndexFieldNames.LOOKUP_SYN_FIELD).stringValue());
                }
            }
            return emptyList;
        } catch (IOException e) {
            throw new GeneExpRuntimeException(e);
        }
    }

    public Map<String, String> getPriorityNamesMap(Collection<String> collection, int i) {
        try {
            Map<String, String> emptyMap = Collections.emptyMap();
            BooleanQuery.Builder builder = new BooleanQuery.Builder();
            collection.stream().map(str -> {
                return new TermQuery(new Term(SynonymIndexFieldNames.ID_FIELD, str + "__" + i));
            }).forEach(termQuery -> {
                builder.add(termQuery, BooleanClause.Occur.SHOULD);
            });
            BooleanQuery build = new BooleanQuery.Builder().add(builder.build(), BooleanClause.Occur.FILTER).build();
            IndexSearcher nameCentricIndexSearcher = getNameCentricIndexSearcher();
            TopDocs search = nameCentricIndexSearcher.search(build, collection.size());
            if (search.totalHits.value > 0) {
                emptyMap = new HashMap(collection.size());
                for (int i2 = 0; i2 < search.scoreDocs.length; i2++) {
                    Document doc = nameCentricIndexSearcher.doc(search.scoreDocs[i2].doc);
                    emptyMap.put((String) Stream.of((Object[]) doc.getFields(SynonymIndexFieldNames.ID_FIELD)).map((v0) -> {
                        return v0.stringValue();
                    }).map(str2 -> {
                        return str2.split("__")[0];
                    }).filter(str3 -> {
                        return collection.contains(str3);
                    }).findFirst().get(), doc.getField(SynonymIndexFieldNames.LOOKUP_SYN_FIELD).stringValue());
                }
            }
            return emptyMap;
        } catch (IOException e) {
            throw new GeneExpRuntimeException(e);
        }
    }

    public List<String> getPriorityNames(Collection<String> collection, int i) throws IOException {
        Stream.Builder builder = Stream.builder();
        Iterator<String> it = collection.iterator();
        while (it.hasNext()) {
            builder.accept(getPriorityNames(it.next(), i));
        }
        return (List) builder.build().flatMap((v0) -> {
            return v0.stream();
        }).collect(Collectors.toList());
    }

    @Override // de.julielab.gene.candidateretrieval.CandidateRetrieval, java.io.Closeable, java.lang.AutoCloseable
    public void close() {
        try {
            if (this.nameCentricIndexReader != null) {
                this.nameCentricIndexReader.close();
            }
            if (this.geneRecordIndexReader != null) {
                this.geneRecordIndexReader.close();
            }
        } catch (IOException e) {
            log.error("Could not close lucene indices", e);
        }
    }

    @Override // de.julielab.gene.candidateretrieval.CandidateRetrieval
    public List<SynHit> getFamilyNames(GeneMention geneMention, QueryGenerator queryGenerator) {
        throw new NotImplementedException("This method should be implemented when needed.");
    }

    @Override // de.julielab.gene.candidateretrieval.CandidateRetrieval
    public List<SynHit> getOriginalNamesIndexRecords(Collection<String> collection, GeneName geneName) {
        throw new NotImplementedException("This method should be implemented when needed.");
    }

    @Override // de.julielab.gene.candidateretrieval.CandidateRetrieval
    public List<SynHit> getOriginalNamesIndexRecords(Collection<String> collection) {
        throw new NotImplementedException("This method should be implemented when needed.");
    }

    @Override // de.julielab.gene.candidateretrieval.CandidateRetrieval
    public TFIDFScorer getTFIDFOnGeneRecordNames() {
        throw new NotImplementedException("This method should be implemented when needed.");
    }

    @Override // de.julielab.gene.candidateretrieval.CandidateRetrieval
    public void setFulltextFieldsToRecordHits(Collection<? extends SynHit> collection, Collection<String> collection2) {
        throw new NotImplementedException("This method should be implemented when needed.");
    }

    @Override // de.julielab.gene.candidateretrieval.CandidateRetrieval
    public Pair<Map<String, Double>, Map<String, Set<String>>> scoreSynonymsRecordIndex(String str, Map<String, Collection<GeneName>> map, Function<GeneRecordHit, String[]> function, QueryGenerator queryGenerator) {
        return null;
    }

    private IndexSearcher getGeneRecordIndexSearcher() {
        return geneRecordIndexSearcher.compute(Thread.currentThread(), (thread, indexSearcher) -> {
            return indexSearcher != null ? indexSearcher : new IndexSearcher(this.geneRecordIndexReader, executorService);
        });
    }

    private IndexSearcher getNameCentricIndexSearcher() {
        IndexSearcher compute = mentionIndexSearcher.compute(Thread.currentThread(), (thread, indexSearcher) -> {
            return indexSearcher != null ? indexSearcher : new IndexSearcher(this.nameCentricIndexReader, executorService);
        });
        compute.setSimilarity(new ClassicSimilarity());
        return compute;
    }

    private List<SynHit> getCandidatesFromRecordIndex(CandidateCacheKey candidateCacheKey) throws IOException {
        try {
            Query generateQuery = candidateCacheKey.generateQuery();
            TopScoreDocCollector create = TopScoreDocCollector.create(candidateCacheKey.getMaxHits(), candidateCacheKey.getMaxHits());
            IndexSearcher geneRecordIndexSearcher2 = getGeneRecordIndexSearcher();
            geneRecordIndexSearcher2.search(generateQuery, create);
            TopDocs topDocs = create.topDocs();
            ArrayList arrayList = new ArrayList();
            boolean isLoadSynHitFields = candidateCacheKey.isLoadSynHitFields();
            GeneName geneName = candidateCacheKey.getGeneName();
            for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
                arrayList.add(getRecordHit(isLoadSynHitFields, geneName, scoreDoc, geneRecordIndexSearcher2.doc(scoreDoc.doc)));
            }
            return arrayList;
        } catch (BooleanQuery.TooManyClauses e) {
            log.warn("Got too many clauses exception from gene name \"{}\". Assuming that this is a tagging error and not returning any candidates.", candidateCacheKey.getGeneName().getText());
            return Collections.emptyList();
        }
    }

    @NotNull
    private GeneRecordHit getRecordHit(boolean z, GeneName geneName, ScoreDoc scoreDoc, Document document) {
        String stringValue = document.getField(SynonymIndexFieldNames.ID_FIELD).stringValue();
        String stringValue2 = z ? document.getField(SynonymIndexFieldNames.TAX_ID_FIELD).stringValue() : null;
        IndexableField field = document.getField(SynonymIndexFieldNames.SYMBOL);
        String stringValue3 = field != null ? field.stringValue() : "";
        GeneRecordHit geneRecordHit = new GeneRecordHit(stringValue3, scoreDoc.score, stringValue, "<no source specified>");
        geneRecordHit.setMappedMention(geneName != null ? geneName.getText() : "none");
        geneRecordHit.setLuceneScore(scoreDoc.score);
        if (stringValue2 != null) {
            geneRecordHit.setTaxIds(Collections.singletonList(stringValue2));
            geneRecordHit.setTaxId(stringValue2);
        }
        if (z) {
            geneRecordHit.setSymbol(stringValue3);
            Optional.ofNullable(document.getField(SynonymIndexFieldNames.SYMBOL_FROM_NOMCENCLATURE)).ifPresent(indexableField -> {
                geneRecordHit.setNomenclature(indexableField.stringValue());
            });
            Optional.ofNullable(document.getField(SynonymIndexFieldNames.CHROMOSOME)).ifPresent(indexableField2 -> {
                geneRecordHit.setChromosome(indexableField2.stringValue());
            });
            Optional.ofNullable(document.getField(SynonymIndexFieldNames.MAPLOCATION)).ifPresent(indexableField3 -> {
                geneRecordHit.setMapLocation(indexableField3.stringValue());
            });
            geneRecordHit.setSynonyms((String[]) Arrays.stream(document.getFields(SynonymIndexFieldNames.SYNONYMS)).map((v0) -> {
                return v0.stringValue();
            }).toArray(i -> {
                return new String[i];
            }));
            geneRecordHit.setFullNames((String[]) Arrays.stream(document.getFields(SynonymIndexFieldNames.FULL_NAMES)).map((v0) -> {
                return v0.stringValue();
            }).toArray(i2 -> {
                return new String[i2];
            }));
            geneRecordHit.setOtherDesignations((String[]) Arrays.stream(document.getFields(SynonymIndexFieldNames.OTHER_DESIGNATIONS)).map((v0) -> {
                return v0.stringValue();
            }).toArray(i3 -> {
                return new String[i3];
            }));
            geneRecordHit.setXrefs((String[]) Arrays.stream(document.getFields(SynonymIndexFieldNames.XREFS)).map((v0) -> {
                return v0.stringValue();
            }).toArray(i4 -> {
                return new String[i4];
            }));
            geneRecordHit.setUniprotNames((String[]) Arrays.stream(document.getFields(SynonymIndexFieldNames.UNIPROT_NAMES)).map((v0) -> {
                return v0.stringValue();
            }).toArray(i5 -> {
                return new String[i5];
            }));
            geneRecordHit.setBioThesaurusNames((String[]) Arrays.stream(document.getFields(SynonymIndexFieldNames.BIO_THESAURUS)).map((v0) -> {
                return v0.stringValue();
            }).toArray(i6 -> {
                return new String[i6];
            }));
            for (String str : GeneRecordQueryGenerator.SYNONYM_FIELDS) {
                Optional findAny = Arrays.stream(document.getFields(str)).map((v0) -> {
                    return v0.stringValue();
                }).filter(str2 -> {
                    return str2.equals(geneName.getNormalizedText());
                }).findAny();
                if (findAny.isPresent()) {
                    geneRecordHit.setLexicalScore(9999.0d);
                    geneRecordHit.setSynonym((String) findAny.get());
                }
            }
        }
        return geneRecordHit;
    }

    public Set<GeneRecordHit> getGeneRecords(Collection<String> collection) {
        try {
            HashSet hashSet = new HashSet();
            IndexSearcher geneRecordIndexSearcher2 = getGeneRecordIndexSearcher();
            BooleanQuery.Builder builder = new BooleanQuery.Builder();
            builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
            BooleanQuery.Builder builder2 = new BooleanQuery.Builder();
            collection.stream().forEach(str -> {
                builder2.add(new TermQuery(new Term(SynonymIndexFieldNames.ID_FIELD, str)), BooleanClause.Occur.SHOULD);
            });
            builder.add(builder2.build(), BooleanClause.Occur.FILTER);
            for (ScoreDoc scoreDoc : geneRecordIndexSearcher2.search(builder.build(), collection.size()).scoreDocs) {
                hashSet.add(getRecordHit(true, null, scoreDoc, geneRecordIndexSearcher2.doc(scoreDoc.doc)));
            }
            return hashSet;
        } catch (IOException e) {
            throw new GeneExpRuntimeException(e);
        }
    }

    private Map<String, Float> getFieldWeightsFromParameters(Map<String, Object> map) {
        if (map == null) {
            map = Collections.emptyMap();
        }
        HashMap hashMap = new HashMap();
        Object obj = map.get(Configuration.dot(new String[]{Configuration.PREFIX_CANDIDATE_RETRIEVAL, Configuration.PARAM_DISMAX_TIE_BREAKER}));
        hashMap.put(Configuration.PARAM_DISMAX_TIE_BREAKER, obj == null ? this.globalFieldWeights.getOrDefault(Configuration.PARAM_DISMAX_TIE_BREAKER, Float.valueOf(0.3f)) : Float.valueOf(Float.parseFloat((String) obj)));
        for (String str : GeneRecordQueryGenerator.ALL_FIELDS) {
            Float orDefault = this.globalFieldWeights.getOrDefault(str, Float.valueOf(1.0f));
            String str2 = (String) map.get(Configuration.dot(new String[]{Configuration.PREFIX_CANDIDATE_RETRIEVAL, str}));
            hashMap.put(str, Float.valueOf(str2 != null ? Float.parseFloat(str2) : orDefault.floatValue()));
        }
        for (String str3 : GeneRecordQueryGenerator.SYNONYM_FIELDS) {
            String str4 = str3 + "_exact";
            Float orDefault2 = this.globalFieldWeights.getOrDefault(str4, Float.valueOf(1.0f));
            String str5 = (String) map.get(Configuration.dot(new String[]{Configuration.PREFIX_CANDIDATE_RETRIEVAL, str4}));
            hashMap.put(str4, Float.valueOf(str5 != null ? Float.parseFloat(str5) : orDefault2.floatValue()));
        }
        return hashMap;
    }
}
