package pitt.search.semanticvectors.orthography;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import org.jsoup.Jsoup;
import pitt.search.semanticvectors.FlagConfig;
import pitt.search.semanticvectors.VectorStoreDeterministic;
import pitt.search.semanticvectors.VectorStoreOrthographical;
import pitt.search.semanticvectors.VectorStoreRAM;
import pitt.search.semanticvectors.VectorStoreWriter;
import pitt.search.semanticvectors.vectors.VectorFactory;

/* loaded from: input_file:pitt/search/semanticvectors/orthography/CharRepresentation.class */
public class CharRepresentation {
    public static final String ENCODING = "utf8";
    private FlagConfig flagConfig;
    private VectorStoreDeterministic elementalCharVectors;
    private VectorStoreRAM semanticCharVectors;
    private VectorStoreOrthographical semanticTermVectors;

    public CharRepresentation(FlagConfig flagConfig) {
        this.flagConfig = flagConfig;
        this.elementalCharVectors = new VectorStoreDeterministic(this.flagConfig);
        this.semanticCharVectors = new VectorStoreRAM(this.flagConfig);
    }

    private static String letterAt(String str, int i) {
        return "" + str.charAt(i);
    }

    private void addStringToCharRep(String str) {
        for (int i = 0; i < str.length(); i++) {
            if (this.semanticCharVectors.getVector(letterAt(str, i)) == null) {
                this.semanticCharVectors.putVector(letterAt(str, i), VectorFactory.createZeroVector(this.flagConfig.vectortype(), this.flagConfig.dimension()));
            }
            if (i != 0) {
                this.semanticCharVectors.getVector(letterAt(str, i)).superpose(this.elementalCharVectors.getVector(letterAt(str, i - 1)), 1.0d, null);
            }
            if (i != str.length() - 1) {
                this.semanticCharVectors.getVector(letterAt(str, i)).superpose(this.elementalCharVectors.getVector(letterAt(str, i + 1)), 1.0d, null);
            }
        }
    }

    private void addHtmlFileToCharRep(File file) throws IOException {
        StringTokenizer stringTokenizer = new StringTokenizer(Jsoup.parse(file, ENCODING).text(), " ");
        while (stringTokenizer.hasMoreTokens()) {
            addStringToCharRep(stringTokenizer.nextToken());
        }
    }

    private void addFileToCharRep(File file) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return;
            }
            addStringToCharRep(readLine);
        }
    }

    private void addStringToTermRep(String str) {
        StringTokenizer stringTokenizer = new StringTokenizer(str, " ");
        while (stringTokenizer.hasMoreTokens()) {
            String nextToken = stringTokenizer.nextToken();
            if (!this.semanticTermVectors.containsVector(nextToken)) {
                this.semanticTermVectors.getVector(nextToken);
            }
        }
    }

    private void addHtmlFileToTermRep(File file) throws IOException {
        addStringToTermRep(Jsoup.parse(file, ENCODING).text());
    }

    private void addFileToTermRep(File file) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), ENCODING));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return;
            }
            addStringToTermRep(readLine);
        }
    }

    public static void listDirRecursive(File file, List<File> list) {
        file.listFiles();
        for (File file2 : file.listFiles()) {
            if (file2.isFile()) {
                list.add(file2);
            } else if (file2.isDirectory()) {
                listDirRecursive(file2, list);
            }
        }
    }

    public static void main(String[] strArr) throws IOException {
        FlagConfig flagConfig = FlagConfig.getFlagConfig(strArr);
        ArrayList<File> arrayList = new ArrayList();
        File file = new File(flagConfig.remainingArgs[0]);
        if (!file.exists()) {
            throw new IllegalArgumentException("Not a file or directory: '" + strArr[0] + "'.");
        }
        listDirRecursive(file, arrayList);
        CharRepresentation charRepresentation = new CharRepresentation(flagConfig);
        for (File file2 : arrayList) {
            System.out.println("Indexing chars from: " + file2.getAbsolutePath());
            charRepresentation.addHtmlFileToCharRep(file2);
        }
        VectorStoreWriter.writeVectors("charvectors", flagConfig, charRepresentation.semanticCharVectors);
        charRepresentation.semanticTermVectors = new VectorStoreOrthographical(flagConfig, charRepresentation.semanticCharVectors);
        for (File file3 : arrayList) {
            System.out.println("Indexing words from: " + file3.getAbsolutePath());
            charRepresentation.addHtmlFileToTermRep(file3);
        }
        VectorStoreWriter.writeVectors("termvectors_semchar", flagConfig, charRepresentation.semanticTermVectors);
        charRepresentation.semanticTermVectors = new VectorStoreOrthographical(flagConfig, charRepresentation.elementalCharVectors);
        for (File file4 : arrayList) {
            System.out.println("Indexing words from: " + file4.getAbsolutePath());
            charRepresentation.addHtmlFileToTermRep(file4);
        }
        VectorStoreWriter.writeVectors("termvectors_elemchar", flagConfig, charRepresentation.semanticTermVectors);
    }
}
