package pitt.search.semanticvectors;

import java.io.IOException;
import java.util.Enumeration;
import java.util.logging.Logger;
import org.apache.lucene.analysis.pattern.PatternReplaceCharFilter;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.Term;
import pitt.search.semanticvectors.utils.VerbatimLogger;
import pitt.search.semanticvectors.vectors.Vector;
import pitt.search.semanticvectors.vectors.VectorFactory;
import pitt.search.semanticvectors.vectors.VectorType;

/* loaded from: input_file:pitt/search/semanticvectors/DocVectors.class */
public class DocVectors implements VectorStore {
    private static final Logger logger = Logger.getLogger(DocVectors.class.getCanonicalName());
    private FlagConfig flagConfig;
    private VectorStoreRAM docVectors;
    private VectorStore termVectors;
    private LuceneUtils luceneUtils;

    /* loaded from: input_file:pitt/search/semanticvectors/DocVectors$DocIndexingStrategy.class */
    public enum DocIndexingStrategy {
        INMEMORY,
        INCREMENTAL,
        NONE
    }

    public VectorType getVectorType() {
        return this.flagConfig.vectortype();
    }

    public int getDimension() {
        return this.flagConfig.dimension();
    }

    public DocVectors(VectorStore vectorStore, FlagConfig flagConfig, LuceneUtils luceneUtils) throws IOException {
        this.flagConfig = flagConfig;
        this.luceneUtils = luceneUtils;
        this.termVectors = vectorStore;
        this.docVectors = new VectorStoreRAM(flagConfig);
        initializeDocVectors();
        trainDocVectors();
    }

    private void trainDocVectors() {
        VerbatimLogger.info("Building document vectors ... ");
        Enumeration<ObjectVector> allVectors = this.termVectors.getAllVectors();
        int i = 0;
        while (allVectors.hasMoreElements()) {
            try {
                if (i % PatternReplaceCharFilter.DEFAULT_MAX_BLOCK_CHARS == 0 || (i < 10000 && i % LogDocMergePolicy.DEFAULT_MIN_MERGE_DOCS == 0)) {
                    VerbatimLogger.info("Processed " + i + " terms ... ");
                }
                i++;
                ObjectVector nextElement = allVectors.nextElement();
                Vector vector = nextElement.getVector();
                String str = (String) nextElement.getObject();
                for (String str2 : this.flagConfig.contentsfields()) {
                    Term term = new Term(str2, str);
                    float globalTermWeight = this.luceneUtils.getGlobalTermWeight(term);
                    float f = 1.0f;
                    DocsEnum docsForTerm = this.luceneUtils.getDocsForTerm(term);
                    if (docsForTerm != null) {
                        while (docsForTerm.nextDoc() != Integer.MAX_VALUE) {
                            int docID = docsForTerm.docID();
                            Vector vector2 = this.docVectors.getVector(Integer.toString(docID));
                            float freq = docsForTerm.freq();
                            if (this.flagConfig.fieldweight()) {
                                int i2 = 0;
                                while (this.luceneUtils.getTermVector(docID, str2).iterator(null).next() != null) {
                                    i2++;
                                }
                                f = (float) (1.0d / Math.sqrt(i2));
                            }
                            vector2.superpose(vector, freq * globalTermWeight * f, null);
                        }
                    }
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        VerbatimLogger.info("\nNormalizing doc vectors ...\n");
        for (int i3 = 0; i3 < this.luceneUtils.getNumDocs(); i3++) {
            this.docVectors.getVector(Integer.toString(i3)).normalize();
        }
    }

    private void initializeDocVectors() {
        VerbatimLogger.info("Initializing document vector store ... \n");
        for (int i = 0; i < this.luceneUtils.getNumDocs(); i++) {
            this.docVectors.putVector(Integer.toString(i), VectorFactory.createZeroVector(this.flagConfig.vectortype(), this.flagConfig.dimension()));
        }
    }

    public VectorStore makeWriteableVectorStore() {
        VectorStoreRAM vectorStoreRAM = new VectorStoreRAM(this.flagConfig);
        for (int i = 0; i < this.luceneUtils.getNumDocs(); i++) {
            String str = "";
            try {
                if (this.luceneUtils.getDoc(i).getField(this.flagConfig.docidfield()) != null) {
                    str = this.luceneUtils.getDoc(i).getField(this.flagConfig.docidfield()).stringValue();
                    if (str.length() == 0) {
                        logger.warning("Empty document name!!! This will cause problems ...");
                        logger.warning("Please set -docidfield to a nonempty field in your Lucene index.");
                    }
                }
                vectorStoreRAM.putVector(str, this.docVectors.getVector(Integer.toString(i)));
            } catch (CorruptIndexException e) {
                e.printStackTrace();
            } catch (IOException e2) {
                e2.printStackTrace();
            }
        }
        return vectorStoreRAM;
    }

    @Override // pitt.search.semanticvectors.VectorStore
    public Vector getVector(Object obj) {
        return this.docVectors.getVector(obj);
    }

    @Override // pitt.search.semanticvectors.VectorStore
    public Enumeration<ObjectVector> getAllVectors() {
        return this.docVectors.getAllVectors();
    }

    @Override // pitt.search.semanticvectors.VectorStore
    public int getNumVectors() {
        return this.docVectors.getNumVectors();
    }

    @Override // pitt.search.semanticvectors.VectorStore
    public boolean containsVector(Object obj) {
        return getVector(obj) != null;
    }
}
