package cc.mallet.topics.tui;

import cc.mallet.pipe.iterator.DBInstanceIterator;
import cc.mallet.topics.ParallelTopicModel;
import cc.mallet.topics.TopicModelDiagnostics;
import cc.mallet.types.FeatureSequence;
import cc.mallet.types.InstanceList;
import cc.mallet.util.CommandOption;
import cc.mallet.util.MalletLogger;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.PrintWriter;
import java.util.logging.Logger;

/* loaded from: input_file:cc/mallet/topics/tui/TopicTrainer.class */
public class TopicTrainer {
    static CommandOption.String inputFile;
    static CommandOption.String inputModelFilename;
    static CommandOption.String inputStateFilename;
    static CommandOption.String outputModelFilename;
    static CommandOption.String stateFile;
    static CommandOption.Integer outputModelInterval;
    static CommandOption.Integer outputStateInterval;
    static CommandOption.String inferencerFilename;
    static CommandOption.String evaluatorFilename;
    static CommandOption.String topicKeysFile;
    static CommandOption.Integer topWords;
    static CommandOption.Integer showTopicsInterval;
    static CommandOption.String topicWordWeightsFile;
    static CommandOption.String wordTopicCountsFile;
    static CommandOption.String diagnosticsFile;
    static CommandOption.String topicReportXMLFile;
    static CommandOption.String topicPhraseReportXMLFile;
    static CommandOption.String topicDocsFile;
    static CommandOption.Integer numTopDocs;
    static CommandOption.String docTopicsFile;
    static CommandOption.Double docTopicsThreshold;
    static CommandOption.Integer docTopicsMax;
    static CommandOption.Integer numTopics;
    static CommandOption.Integer numThreads;
    static CommandOption.Integer numIterations;
    static CommandOption.Integer numMaximizationIterations;
    static CommandOption.Boolean noInference;
    static CommandOption.Integer randomSeed;
    static CommandOption.Integer optimizeInterval;
    static CommandOption.Integer optimizeBurnIn;
    static CommandOption.Boolean useSymmetricAlpha;
    static CommandOption.Double alpha;
    static CommandOption.Double beta;
    private static Logger logger;
    static final /* synthetic */ boolean $assertionsDisabled;

    static {
        $assertionsDisabled = !TopicTrainer.class.desiredAssertionStatus();
        inputFile = new CommandOption.String(TopicTrainer.class, "input", "FILENAME", true, null, "The filename from which to read the list of training instances.  Use - for stdin.  The instances must be FeatureSequence or FeatureSequenceWithBigrams, not FeatureVector", null);
        inputModelFilename = new CommandOption.String(TopicTrainer.class, "input-model", "FILENAME", true, null, "The filename from which to read the binary topic model. The --input option is ignored. By default this is null, indicating that no file will be read.", null);
        inputStateFilename = new CommandOption.String(TopicTrainer.class, "input-state", "FILENAME", true, null, "The filename from which to read the gzipped Gibbs sampling state created by --output-state. The original input file must be included, using --input. By default this is null, indicating that no file will be read.", null);
        outputModelFilename = new CommandOption.String(TopicTrainer.class, "output-model", "FILENAME", true, null, "The filename in which to write the binary topic model at the end of the iterations.  By default this is null, indicating that no file will be written.", null);
        stateFile = new CommandOption.String(TopicTrainer.class, "output-state", "FILENAME", true, null, "The filename in which to write the Gibbs sampling state after at the end of the iterations.  By default this is null, indicating that no file will be written.", null);
        outputModelInterval = new CommandOption.Integer(TopicTrainer.class, "output-model-interval", "INTEGER", true, 0, "The number of iterations between writing the model (and its Gibbs sampling state) to a binary file.  You must also set the --output-model to use this option, whose argument will be the prefix of the filenames.", null);
        outputStateInterval = new CommandOption.Integer(TopicTrainer.class, "output-state-interval", "INTEGER", true, 0, "The number of iterations between writing the sampling state to a text file.  You must also set the --output-state to use this option, whose argument will be the prefix of the filenames.", null);
        inferencerFilename = new CommandOption.String(TopicTrainer.class, "inferencer-filename", "FILENAME", true, null, "A topic inferencer applies a previously trained topic model to new documents.  By default this is null, indicating that no file will be written.", null);
        evaluatorFilename = new CommandOption.String(TopicTrainer.class, "evaluator-filename", "FILENAME", true, null, "A held-out likelihood evaluator for new documents.  By default this is null, indicating that no file will be written.", null);
        topicKeysFile = new CommandOption.String(TopicTrainer.class, "output-topic-keys", "FILENAME", true, null, "The filename in which to write the top words for each topic and any Dirichlet parameters.  By default this is null, indicating that no file will be written.", null);
        topWords = new CommandOption.Integer(TopicTrainer.class, "num-top-words", "INTEGER", true, 20, "The number of most probable words to print for each topic after model estimation.", null);
        showTopicsInterval = new CommandOption.Integer(TopicTrainer.class, "show-topics-interval", "INTEGER", true, 50, "The number of iterations between printing a brief summary of the topics so far.", null);
        topicWordWeightsFile = new CommandOption.String(TopicTrainer.class, "topic-word-weights-file", "FILENAME", true, null, "The filename in which to write unnormalized weights for every topic and word type.  By default this is null, indicating that no file will be written.", null);
        wordTopicCountsFile = new CommandOption.String(TopicTrainer.class, "word-topic-counts-file", "FILENAME", true, null, "The filename in which to write a sparse representation of topic-word assignments.  By default this is null, indicating that no file will be written.", null);
        diagnosticsFile = new CommandOption.String(TopicTrainer.class, "diagnostics-file", "FILENAME", true, null, "The filename in which to write measures of topic quality, in XML format.  By default this is null, indicating that no file will be written.", null);
        topicReportXMLFile = new CommandOption.String(TopicTrainer.class, "xml-topic-report", "FILENAME", true, null, "The filename in which to write the top words for each topic and any Dirichlet parameters in XML format.  By default this is null, indicating that no file will be written.", null);
        topicPhraseReportXMLFile = new CommandOption.String(TopicTrainer.class, "xml-topic-phrase-report", "FILENAME", true, null, "The filename in which to write the top words and phrases for each topic and any Dirichlet parameters in XML format.  By default this is null, indicating that no file will be written.", null);
        topicDocsFile = new CommandOption.String(TopicTrainer.class, "output-topic-docs", "FILENAME", true, null, "The filename in which to write the most prominent documents for each topic, at the end of the iterations.  By default this is null, indicating that no file will be written.", null);
        numTopDocs = new CommandOption.Integer(TopicTrainer.class, "num-top-docs", "INTEGER", true, 100, "When writing topic documents with --output-topic-docs, report this number of top documents.", null);
        docTopicsFile = new CommandOption.String(TopicTrainer.class, "output-doc-topics", "FILENAME", true, null, "The filename in which to write the topic proportions per document, at the end of the iterations.  By default this is null, indicating that no file will be written.", null);
        docTopicsThreshold = new CommandOption.Double(TopicTrainer.class, "doc-topics-threshold", "DECIMAL", true, 0.0d, "When writing topic proportions per document with --output-doc-topics, do not print topics with proportions less than this threshold value.", null);
        docTopicsMax = new CommandOption.Integer(TopicTrainer.class, "doc-topics-max", "INTEGER", true, -1, "When writing topic proportions per document with --output-doc-topics, do not print more than INTEGER number of topics.  A negative value indicates that all topics should be printed.", null);
        numTopics = new CommandOption.Integer(TopicTrainer.class, "num-topics", "INTEGER", true, 10, "The number of topics to fit.", null);
        numThreads = new CommandOption.Integer(TopicTrainer.class, "num-threads", "INTEGER", true, 1, "The number of threads for parallel training.", null);
        numIterations = new CommandOption.Integer(TopicTrainer.class, "num-iterations", "INTEGER", true, 1000, "The number of iterations of Gibbs sampling.", null);
        numMaximizationIterations = new CommandOption.Integer(TopicTrainer.class, "num-icm-iterations", "INTEGER", true, 0, "The number of iterations of iterated conditional modes (topic maximization).", null);
        noInference = new CommandOption.Boolean(TopicTrainer.class, "no-inference", "true|false", false, false, "Do not perform inference, just load a saved model and create a report. Equivalent to --num-iterations 0.", null);
        randomSeed = new CommandOption.Integer(TopicTrainer.class, "random-seed", "INTEGER", true, 0, "The random seed for the Gibbs sampler.  Default is 0, which will use the clock.", null);
        optimizeInterval = new CommandOption.Integer(TopicTrainer.class, "optimize-interval", "INTEGER", true, 0, "The number of iterations between reestimating dirichlet hyperparameters.", null);
        optimizeBurnIn = new CommandOption.Integer(TopicTrainer.class, "optimize-burn-in", "INTEGER", true, 200, "The number of iterations to run before first estimating dirichlet hyperparameters.", null);
        useSymmetricAlpha = new CommandOption.Boolean(TopicTrainer.class, "use-symmetric-alpha", "true|false", false, false, "Only optimize the concentration parameter of the prior over document-topic distributions. This may reduce the number of very small, poorly estimated topics, but may disperse common words over several topics.", null);
        alpha = new CommandOption.Double(TopicTrainer.class, "alpha", "DECIMAL", true, 5.0d, "SumAlpha parameter: sum over topics of smoothing over doc-topic distributions. alpha_k = [this value] / [num topics]", null);
        beta = new CommandOption.Double(TopicTrainer.class, "beta", "DECIMAL", true, 0.01d, "Beta parameter: smoothing parameter for each topic-word. beta_w = [this value]", null);
        logger = MalletLogger.getLogger(TopicTrainer.class.getName());
    }

    public static void main(String[] strArr) throws IOException {
        CommandOption.setSummary(TopicTrainer.class, "A tool for estimating, saving and printing diagnostics for topic models, such as LDA.");
        try {
            CommandOption.process(TopicTrainer.class, strArr);
        } catch (IllegalArgumentException e) {
            logger.warning("");
            logger.warning(e.getMessage());
            System.exit(0);
        }
        ParallelTopicModel parallelTopicModel = null;
        if (inputModelFilename.value != null) {
            try {
                parallelTopicModel = ParallelTopicModel.read(new File(inputModelFilename.value));
            } catch (Exception e2) {
                logger.warning("Unable to restore saved topic model " + inputModelFilename.value + ": " + e2);
                System.exit(1);
            }
        } else {
            parallelTopicModel = new ParallelTopicModel(numTopics.value, alpha.value, beta.value);
        }
        if (randomSeed.value != 0) {
            parallelTopicModel.setRandomSeed(randomSeed.value);
        }
        if (inputFile.value != null) {
            InstanceList instanceList = null;
            try {
                instanceList = inputFile.value.startsWith("db:") ? DBInstanceIterator.getInstances(inputFile.value.substring(3)) : InstanceList.load(new File(inputFile.value));
            } catch (Exception e3) {
                logger.warning("Unable to restore instance list " + inputFile.value + ": " + e3);
                System.exit(1);
            }
            logger.info("Data loaded.");
            if (instanceList.size() > 0 && instanceList.get(0) != null && !(instanceList.get(0).getData() instanceof FeatureSequence)) {
                logger.warning("Topic modeling currently only supports feature sequences: use --keep-sequence option when importing data.");
                System.exit(1);
            }
            parallelTopicModel.addInstances(instanceList);
        }
        if (inputStateFilename.value != null) {
            logger.info("Initializing from saved state.");
            parallelTopicModel.initializeFromState(new File(inputStateFilename.value));
        }
        parallelTopicModel.setTopicDisplay(showTopicsInterval.value, topWords.value);
        parallelTopicModel.setNumIterations(numIterations.value);
        parallelTopicModel.setOptimizeInterval(optimizeInterval.value);
        parallelTopicModel.setBurninPeriod(optimizeBurnIn.value);
        parallelTopicModel.setSymmetricAlpha(useSymmetricAlpha.value);
        if (outputStateInterval.value != 0) {
            parallelTopicModel.setSaveState(outputStateInterval.value, stateFile.value);
        }
        if (outputModelInterval.value != 0) {
            parallelTopicModel.setSaveSerializedModel(outputModelInterval.value, outputModelFilename.value);
        }
        parallelTopicModel.setNumThreads(numThreads.value);
        if (!noInference.value()) {
            parallelTopicModel.estimate();
        }
        if (numMaximizationIterations.value > 0) {
            parallelTopicModel.maximize(numMaximizationIterations.value);
        }
        if (topicKeysFile.value != null) {
            parallelTopicModel.printTopWords(new File(topicKeysFile.value), topWords.value, false);
        }
        if (diagnosticsFile.value != null) {
            PrintWriter printWriter = new PrintWriter(diagnosticsFile.value);
            printWriter.println(new TopicModelDiagnostics(parallelTopicModel, topWords.value).toXML());
            printWriter.close();
        }
        if (topicReportXMLFile.value != null) {
            PrintWriter printWriter2 = new PrintWriter(topicReportXMLFile.value);
            parallelTopicModel.topicXMLReport(printWriter2, topWords.value);
            printWriter2.close();
        }
        if (topicPhraseReportXMLFile.value != null) {
            PrintWriter printWriter3 = new PrintWriter(topicPhraseReportXMLFile.value);
            parallelTopicModel.topicPhraseXMLReport(printWriter3, topWords.value);
            printWriter3.close();
        }
        if (stateFile.value != null && outputStateInterval.value == 0) {
            parallelTopicModel.printState(new File(stateFile.value));
        }
        if (topicDocsFile.value != null) {
            PrintWriter printWriter4 = new PrintWriter(new FileWriter(new File(topicDocsFile.value)));
            parallelTopicModel.printTopicDocuments(printWriter4, numTopDocs.value);
            printWriter4.close();
        }
        if (docTopicsFile.value != null) {
            PrintWriter printWriter5 = new PrintWriter(new FileWriter(new File(docTopicsFile.value)));
            if (docTopicsThreshold.value == 0.0d) {
                parallelTopicModel.printDenseDocumentTopics(printWriter5);
            } else {
                parallelTopicModel.printDocumentTopics(printWriter5, docTopicsThreshold.value, docTopicsMax.value);
            }
            printWriter5.close();
        }
        if (topicWordWeightsFile.value != null) {
            parallelTopicModel.printTopicWordWeights(new File(topicWordWeightsFile.value));
        }
        if (wordTopicCountsFile.value != null) {
            parallelTopicModel.printTypeTopicCounts(new File(wordTopicCountsFile.value));
        }
        if (outputModelFilename.value != null) {
            if (!$assertionsDisabled && parallelTopicModel == null) {
                throw new AssertionError();
            }
            try {
                ObjectOutputStream objectOutputStream = new ObjectOutputStream(new FileOutputStream(outputModelFilename.value));
                objectOutputStream.writeObject(parallelTopicModel);
                objectOutputStream.close();
            } catch (Exception e4) {
                logger.warning("Couldn't write topic model to filename " + outputModelFilename.value);
            }
        }
        if (inferencerFilename.value != null) {
            try {
                ObjectOutputStream objectOutputStream2 = new ObjectOutputStream(new FileOutputStream(inferencerFilename.value));
                objectOutputStream2.writeObject(parallelTopicModel.getInferencer());
                objectOutputStream2.close();
            } catch (Exception e5) {
                logger.warning("Couldn't create inferencer: " + e5.getMessage());
            }
        }
        if (evaluatorFilename.value != null) {
            try {
                ObjectOutputStream objectOutputStream3 = new ObjectOutputStream(new FileOutputStream(evaluatorFilename.value));
                objectOutputStream3.writeObject(parallelTopicModel.getProbEstimator());
                objectOutputStream3.close();
            } catch (Exception e6) {
                logger.warning("Couldn't create evaluator: " + e6.getMessage());
            }
        }
    }
}
