package pitt.search.semanticvectors.orthography;

import java.io.File;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Random;
import java.util.StringTokenizer;
import org.apache.lucene.analysis.pattern.PatternReplaceCharFilter;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.FSDirectory;
import pitt.search.semanticvectors.FlagConfig;
import pitt.search.semanticvectors.LuceneUtils;
import pitt.search.semanticvectors.ObjectVector;
import pitt.search.semanticvectors.VectorStoreRAM;
import pitt.search.semanticvectors.VectorStoreWriter;
import pitt.search.semanticvectors.utils.Bobcat;
import pitt.search.semanticvectors.vectors.Vector;
import pitt.search.semanticvectors.vectors.VectorFactory;

/* loaded from: input_file:pitt/search/semanticvectors/orthography/SentenceVectors.class */
public class SentenceVectors {
    static LuceneUtils lUtils;

    public static Vector getPhraseVector(String str, VectorStoreRAM vectorStoreRAM, VectorStoreRAM vectorStoreRAM2, FlagConfig flagConfig) {
        Vector createZeroVector = VectorFactory.createZeroVector(flagConfig.vectortype(), flagConfig.dimension());
        Random random = new Random();
        StringTokenizer stringTokenizer = new StringTokenizer(str, " ");
        int countTokens = stringTokenizer.countTokens();
        for (int i = 0; i < countTokens; i++) {
            String nextToken = stringTokenizer.nextToken();
            float globalTermWeight = lUtils != null ? lUtils.getGlobalTermWeight(new Term(flagConfig.contentsfields()[0], nextToken)) : 1.0f;
            Vector vector = vectorStoreRAM2.getVector(nextToken);
            if (vector == null) {
                random.setSeed(Bobcat.asLong(nextToken));
                vector = VectorFactory.generateRandomVector(flagConfig.vectortype(), flagConfig.dimension(), flagConfig.seedlength, random);
                vectorStoreRAM2.putVector(nextToken, vector.copy());
            }
            Vector vector2 = vectorStoreRAM.getVector(Integer.valueOf(i + 1));
            if (vector2 == null) {
                System.out.println(countTokens + ":" + (i + 1));
                System.out.println(vector2);
                System.exit(0);
            }
            try {
                vector.bind(vector2);
            } catch (Exception e) {
                System.out.println(vector);
                System.out.println(vector2);
                e.printStackTrace();
                System.exit(0);
            }
            createZeroVector.superpose(vector, globalTermWeight, null);
            vector.release(vector2);
        }
        createZeroVector.normalize();
        return createZeroVector;
    }

    public static void main(String[] strArr) throws Exception {
        try {
            FlagConfig flagConfig = FlagConfig.getFlagConfig(strArr);
            String[] strArr2 = flagConfig.remainingArgs;
            lUtils = null;
            if (!flagConfig.luceneindexpath().isEmpty()) {
                lUtils = new LuceneUtils(flagConfig);
            }
            DirectoryReader open = IndexReader.open(FSDirectory.open(new File(flagConfig.luceneindexpath())));
            int numDocs = open.numDocs();
            VectorStoreRAM vectorStoreRAM = new VectorStoreRAM(flagConfig);
            new VectorStoreRAM(flagConfig);
            VectorStoreRAM vectorStoreRAM2 = new VectorStoreRAM(flagConfig);
            if (!flagConfig.initialtermvectors().equals("random")) {
                vectorStoreRAM2.initFromFile(flagConfig.initialtermvectors());
            }
            VectorStoreRAM vectorStoreRAM3 = new VectorStoreRAM(flagConfig);
            Hashtable hashtable = new Hashtable();
            NumberRepresentation numberRepresentation = new NumberRepresentation(flagConfig, "*STARTSENTENCE*", "*ENDSENTENCE*");
            System.err.println("Numdocs " + numDocs);
            for (int i = 0; i < numDocs; i++) {
                if (i % PatternReplaceCharFilter.DEFAULT_MAX_BLOCK_CHARS == 0) {
                    System.err.print(i + "...");
                }
                String lowerCase = open.document(i).get(flagConfig.contentsfields()[0]).replaceAll("[^A-Za-z]", " ").toLowerCase();
                int countTokens = new StringTokenizer(lowerCase, " ").countTokens();
                if (countTokens >= 2) {
                    VectorStoreRAM vectorStoreRAM4 = (VectorStoreRAM) hashtable.get(new Integer(countTokens));
                    if (vectorStoreRAM4 == null) {
                        vectorStoreRAM4 = numberRepresentation.getNumberVectors(0, countTokens + 1);
                        hashtable.put(new Integer(countTokens), vectorStoreRAM4);
                        Enumeration<ObjectVector> allVectors = vectorStoreRAM4.getAllVectors();
                        while (allVectors.hasMoreElements()) {
                            ObjectVector nextElement = allVectors.nextElement();
                            if (vectorStoreRAM3.getVector(countTokens + ":" + nextElement.getObject()) == null) {
                                vectorStoreRAM3.putVector(countTokens + ":" + nextElement.getObject(), nextElement.getVector());
                            }
                        }
                    }
                    vectorStoreRAM.putVector(lowerCase, getPhraseVector(lowerCase, vectorStoreRAM4, vectorStoreRAM2, flagConfig));
                }
            }
            VectorStoreWriter.writeVectorsInLuceneFormat("sentencevectors.bin", flagConfig, vectorStoreRAM);
            VectorStoreWriter.writeVectorsInLuceneFormat("sentencenumbervectors.bin", flagConfig, vectorStoreRAM3);
            VectorStoreWriter.writeVectorsInLuceneFormat("sentencetermvectors.bin", flagConfig, vectorStoreRAM2);
        } catch (IllegalArgumentException e) {
            throw e;
        }
    }
}
