package pitt.search.semanticvectors;

import cern.colt.matrix.AbstractFormatter;
import java.io.IOException;
import java.util.Enumeration;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.lucene.analysis.pattern.PatternReplaceCharFilter;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import pitt.search.semanticvectors.utils.VerbatimLogger;
import pitt.search.semanticvectors.vectors.Vector;
import pitt.search.semanticvectors.vectors.VectorFactory;

/* loaded from: input_file:pitt/search/semanticvectors/TermVectorsFromLucene.class */
public class TermVectorsFromLucene {
    private static final Logger logger = Logger.getLogger(TermVectorsFromLucene.class.getCanonicalName());
    private FlagConfig flagConfig;
    private VectorStore termVectors;
    private LuceneUtils luceneUtils;
    private VectorStore elementalDocVectors;

    private TermVectorsFromLucene(FlagConfig flagConfig) throws IOException {
        this.flagConfig = flagConfig;
        this.luceneUtils = new LuceneUtils(flagConfig);
    }

    public VectorStore getElementalDocVectors() {
        return this.elementalDocVectors;
    }

    public LuceneUtils getLuceneUtils() {
        return this.luceneUtils;
    }

    public VectorStore getSemanticTermVectors() {
        return this.termVectors;
    }

    public static TermVectorsFromLucene createTermVectorsFromLucene(FlagConfig flagConfig, VectorStore vectorStore) throws IOException, RuntimeException {
        TermVectorsFromLucene termVectorsFromLucene = new TermVectorsFromLucene(flagConfig);
        termVectorsFromLucene.elementalDocVectors = vectorStore;
        termVectorsFromLucene.createTermVectorsFromLuceneImpl();
        return termVectorsFromLucene;
    }

    private void createTermVectorsFromLuceneImpl() throws IOException {
        if (this.elementalDocVectors != null) {
            logger.info("Reusing basic doc vectors; number of documents: " + this.elementalDocVectors.getNumVectors());
            if (this.elementalDocVectors.getNumVectors() != this.luceneUtils.getNumDocs()) {
                throw new RuntimeException("Wrong number of basicDocVectors passed into constructor ...");
            }
        } else {
            this.elementalDocVectors = new ElementalVectorStore(this.flagConfig);
        }
        trainTermVectors();
    }

    private void trainTermVectors() throws IOException {
        this.termVectors = new VectorStoreRAM(this.flagConfig);
        VerbatimLogger.log(Level.INFO, "Creating semantic term vectors ...\n");
        for (String str : this.flagConfig.contentsfields()) {
            int i = 0;
            while (this.luceneUtils.getTermsForField(str).iterator(null).next() != null) {
                i++;
            }
            VerbatimLogger.info("There are " + i + " terms (and " + this.luceneUtils.getNumDocs() + " docs).\n");
        }
        for (String str2 : this.flagConfig.contentsfields()) {
            VerbatimLogger.info("Training term vectors for field " + str2 + AbstractFormatter.DEFAULT_ROW_SEPARATOR);
            int i2 = 0;
            TermsEnum it = this.luceneUtils.getTermsForField(str2).iterator(null);
            while (true) {
                BytesRef next = it.next();
                if (next != null) {
                    if (i2 % PatternReplaceCharFilter.DEFAULT_MAX_BLOCK_CHARS == 0 || (i2 < 10000 && i2 % LogDocMergePolicy.DEFAULT_MIN_MERGE_DOCS == 0)) {
                        VerbatimLogger.info("Processed " + i2 + " terms ... ");
                    }
                    i2++;
                    Term term = new Term(str2, next);
                    if (this.luceneUtils.termFilter(term)) {
                        Vector createZeroVector = VectorFactory.createZeroVector(this.flagConfig.vectortype(), this.flagConfig.dimension());
                        DocsEnum docsForTerm = this.luceneUtils.getDocsForTerm(term);
                        while (docsForTerm.nextDoc() != Integer.MAX_VALUE) {
                            createZeroVector.superpose(this.elementalDocVectors.getVector(Integer.toString(docsForTerm.docID())), docsForTerm.freq(), null);
                        }
                        createZeroVector.normalize();
                        ((VectorStoreRAM) this.termVectors).putVector(term.text(), createZeroVector);
                    }
                }
            }
        }
        VerbatimLogger.info("\nCreated " + this.termVectors.getNumVectors() + " term vectors.\n");
    }

    public static TermVectorsFromLucene createTermBasedRRIVectors(FlagConfig flagConfig) throws IOException, RuntimeException {
        TermVectorsFromLucene termVectorsFromLucene = new TermVectorsFromLucene(flagConfig);
        termVectorsFromLucene.luceneUtils = new LuceneUtils(flagConfig);
        termVectorsFromLucene.createTermBasedRRIVectorsImpl();
        return termVectorsFromLucene;
    }

    private void createTermBasedRRIVectorsImpl() throws IOException, RuntimeException {
        this.termVectors = new ElementalVectorStore(this.flagConfig);
        if (!this.flagConfig.initialtermvectors().isEmpty() && !this.flagConfig.initialtermvectors().equals("random")) {
            VerbatimLogger.info("Using elemental term vectors from file " + this.flagConfig.initialtermvectors());
            CloseableVectorStore openVectorStore = VectorStoreReader.openVectorStore(this.flagConfig.initialtermvectors(), this.flagConfig);
            Enumeration<ObjectVector> allVectors = openVectorStore.getAllVectors();
            this.termVectors = new VectorStoreRAM(this.flagConfig);
            int i = 0;
            while (allVectors.hasMoreElements()) {
                ObjectVector nextElement = allVectors.nextElement();
                ((VectorStoreRAM) this.termVectors).putVector(nextElement.getObject(), nextElement.getVector());
                i++;
            }
            openVectorStore.close();
            logger.info("Read in " + i + " vectors");
            return;
        }
        logger.info("Generating new elemental term vectors");
        this.termVectors = new ElementalVectorStore(this.flagConfig);
        for (String str : this.flagConfig.contentsfields()) {
            TermsEnum it = this.luceneUtils.getTermsForField(str).iterator(null);
            while (true) {
                BytesRef next = it.next();
                if (next != null) {
                    Term term = new Term(str, next);
                    if (this.luceneUtils.termFilter(term)) {
                        this.termVectors.getVector(term.text());
                    }
                }
            }
        }
    }
}
