package de.julielab.geneexpbase.scoring;

import com.wcohen.ss.BasicStringWrapper;
import com.wcohen.ss.BasicStringWrapperIterator;
import com.wcohen.ss.SerializableTFIDF;
import com.wcohen.ss.TFIDF;
import com.wcohen.ss.api.StringWrapper;
import com.wcohen.ss.tokens.SerializableSimpleTokenizer;
import de.julielab.geneexpbase.GeneExpRuntimeException;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.stream.IntStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/julielab/geneexpbase/scoring/TFIDFUtils.class */
public class TFIDFUtils implements Serializable {
    private static final Logger log = LoggerFactory.getLogger(TFIDFUtils.class);
    public static boolean TEST_MODE = false;
    private TFIDF tfidf;

    public static void main(String[] strArr) {
        new TFIDFUtils().test();
    }

    public void learnFromLuceneIndex(IndexReader indexReader, String str) {
        learnFromLuceneIndex(indexReader, new String[]{str});
    }

    public void learnFromLuceneIndex(IndexReader indexReader, String[] strArr) {
        long currentTimeMillis = System.currentTimeMillis();
        log.info("Learning TF/IDF statistic from Lucene index, field names: {}", Arrays.toString(strArr));
        if (TEST_MODE) {
            log.warn("Test mode is active. TFIDF is only estimated on a very small subset of documents.");
        }
        Iterator it = IntStream.range(0, TEST_MODE ? Math.min(1000, indexReader.numDocs()) : indexReader.numDocs()).mapToObj(i -> {
            try {
                Document document = indexReader.document(i);
                if (0 >= strArr.length) {
                    return null;
                }
                String str = strArr[0];
                IndexableField field = document.getField(str);
                if (field == null) {
                    throw new IllegalArgumentException("Field " + str + " was not found in the given index.");
                }
                return new BasicStringWrapper(field.stringValue());
            } catch (IOException e) {
                log.error("Exception while iterating over index", e);
                throw new GeneExpRuntimeException(e);
            }
        }).iterator();
        log.info("Computing TF/IDF statistics...");
        this.tfidf = new SerializableTFIDF(SerializableSimpleTokenizer.DEFAULT_TOKENIZER);
        this.tfidf.train(new BasicStringWrapperIterator(it));
        log.info("Done learning TF/IDF for fields {} in {} seconds.", Arrays.toString(strArr), Long.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000));
    }

    public double score(String str, String str2) {
        double score;
        synchronized (this.tfidf) {
            score = this.tfidf.score(str, str2);
        }
        return score;
    }

    public double score(StringWrapper stringWrapper, StringWrapper stringWrapper2) {
        double score;
        synchronized (this.tfidf) {
            score = this.tfidf.score(stringWrapper, stringWrapper2);
        }
        return score;
    }

    public void test() {
        TFIDF tfidf = new TFIDF();
        ArrayList arrayList = new ArrayList();
        arrayList.add(getbsw("1 2 3 4 "));
        arrayList.add(getbsw("1 4"));
        arrayList.add(getbsw("5 5 5 "));
        arrayList.add(getbsw("5 6 2 1"));
        tfidf.train(new BasicStringWrapperIterator(arrayList.iterator()));
        System.out.println(tfidf.explainScore("2 4 6", "7, 4, 9"));
        System.out.println(tfidf.explainScore("1 2 3", "1 7 19"));
    }

    private StringWrapper getbsw(String str) {
        return new BasicStringWrapper(str);
    }
}
