package pitt.search.semanticvectors;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Enumeration;
import java.util.Random;
import java.util.logging.Logger;
import org.apache.lucene.analysis.pattern.PatternReplaceCharFilter;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import pitt.search.semanticvectors.utils.VerbatimLogger;
import pitt.search.semanticvectors.vectors.Vector;
import pitt.search.semanticvectors.vectors.VectorFactory;

/* loaded from: input_file:pitt/search/semanticvectors/IncrementalTermVectors.class */
public class IncrementalTermVectors implements VectorStore {
    public static String usageMessage = "\nIncrementalTermVectors class in package pitt.search.semanticvectors\nUsage: java pitt.search.semanticvectors.IncrementalTermVectors [-docvectorsfile ...] [-luceneindexpath ...]\nIncrementalTermVectors creates termvectors files in local directory from docvectors file.";
    private static final Logger logger = Logger.getLogger(IncrementalTermVectors.class.getCanonicalName());
    private FlagConfig flagConfig;
    private VectorStoreRAM termVectorData;
    private LuceneUtils luceneUtils;

    public IncrementalTermVectors(FlagConfig flagConfig, LuceneUtils luceneUtils) throws IOException {
        this.luceneUtils = null;
        this.flagConfig = flagConfig;
        this.luceneUtils = luceneUtils;
        createIncrementalTermVectorsFromLucene();
    }

    private void initializeVectorStore() throws IOException {
        this.termVectorData = new VectorStoreRAM(this.flagConfig);
        for (String str : this.flagConfig.contentsfields()) {
            TermsEnum it = this.luceneUtils.getTermsForField(str).iterator(null);
            int i = 0;
            while (true) {
                BytesRef next = it.next();
                if (next != null) {
                    Term term = new Term(str, next);
                    if (this.termVectorData.getVector(term.text()) == null && this.luceneUtils.termFilter(term)) {
                        i++;
                        this.termVectorData.putVector(term.text(), VectorFactory.createZeroVector(this.flagConfig.vectortype(), this.flagConfig.dimension()));
                    }
                }
            }
            VerbatimLogger.info(String.format("There are %d terms (and %d docs)", Integer.valueOf(i), Integer.valueOf(this.luceneUtils.getNumDocs())));
        }
    }

    private void createIncrementalTermVectorsFromLucene() throws IOException {
        IndexInput indexInput;
        int numDocs = this.luceneUtils.getNumDocs();
        File file = new File(this.flagConfig.docvectorsfile());
        String parent = file.getParent();
        if (parent == null) {
            parent = "";
        }
        FSDirectory open = FSDirectory.open(new File(parent));
        new Random();
        try {
            indexInput = open.openInput(VectorStoreUtils.getStoreFileName(this.flagConfig.docvectorsfile(), this.flagConfig), IOContext.DEFAULT);
            logger.info("Reading vectors incrementally from file " + file);
            FlagConfig.mergeWriteableFlagsFromString(indexInput.readString(), this.flagConfig);
        } catch (FileNotFoundException e) {
            logger.info("No file '" + file + "' so will use random elemental vectors instead.");
            indexInput = null;
        }
        initializeVectorStore();
        for (int i = 0; i < numDocs; i++) {
            if (i % PatternReplaceCharFilter.DEFAULT_MAX_BLOCK_CHARS == 0 || (i < 10000 && i % LogDocMergePolicy.DEFAULT_MIN_MERGE_DOCS == 0)) {
                VerbatimLogger.info(i + " ... ");
            }
            if (indexInput.getFilePointer() < indexInput.length() - 1) {
                Vector createZeroVector = VectorFactory.createZeroVector(this.flagConfig.vectortype(), this.flagConfig.dimension());
                indexInput.readString();
                createZeroVector.readFromLuceneStream(indexInput);
                for (String str : this.flagConfig.contentsfields()) {
                    Terms termVector = this.luceneUtils.getTermVector(i, str);
                    if (termVector == null) {
                        logger.severe("No term vector for document " + i);
                    } else {
                        TermsEnum it = termVector.iterator(null);
                        while (true) {
                            BytesRef next = it.next();
                            if (next != null) {
                                Vector vector = null;
                                try {
                                    vector = this.termVectorData.getVector(next.utf8ToString());
                                } catch (NullPointerException e2) {
                                    logger.finest(String.format("term %s not represented", next.utf8ToString()));
                                }
                                if (vector != null && vector.getDimension() > 0) {
                                    it.docs(null, null).nextDoc();
                                    vector.superpose(createZeroVector, this.luceneUtils.getLocalTermWeight(r0.freq()), null);
                                }
                            }
                        }
                    }
                }
            }
        }
        Enumeration<ObjectVector> allVectors = this.termVectorData.getAllVectors();
        while (allVectors.hasMoreElements()) {
            ObjectVector nextElement = allVectors.nextElement();
            Vector vector2 = nextElement.getVector();
            vector2.normalize();
            nextElement.setVector(vector2);
        }
        if (indexInput != null) {
            indexInput.close();
        }
    }

    @Override // pitt.search.semanticvectors.VectorStore
    public Vector getVector(Object obj) {
        return this.termVectorData.getVector(obj);
    }

    @Override // pitt.search.semanticvectors.VectorStore
    public Enumeration<ObjectVector> getAllVectors() {
        return this.termVectorData.getAllVectors();
    }

    @Override // pitt.search.semanticvectors.VectorStore
    public int getNumVectors() {
        return this.termVectorData.getNumVectors();
    }

    public static void main(String[] strArr) throws IOException {
        try {
            FlagConfig flagConfig = FlagConfig.getFlagConfig(strArr);
            VectorStoreWriter.writeVectors(flagConfig.termvectorsfile(), flagConfig, new IncrementalTermVectors(flagConfig, new LuceneUtils(flagConfig)));
        } catch (IllegalArgumentException e) {
            System.err.println(usageMessage);
            throw e;
        }
    }

    @Override // pitt.search.semanticvectors.VectorStore
    public boolean containsVector(Object obj) {
        return getVector(obj) != null;
    }
}
