package de.julielab.topicmodeling;

import cc.mallet.topics.ParallelTopicModel;
import cc.mallet.types.InstanceList;
import cc.mallet.types.TokenSequence;
import de.julielab.topicmodeling.businessobjects.Document;
import de.julielab.topicmodeling.businessobjects.Model;
import de.julielab.topicmodeling.services.MalletTopicModeling;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.configuration2.XMLConfiguration;
import org.apache.commons.configuration2.ex.ConfigurationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/julielab/topicmodeling/MalletTopicModelGenerator.class */
public class MalletTopicModelGenerator {
    private static final Logger LOGGER = LoggerFactory.getLogger(MalletTopicModelGenerator.class);

    public static void main(String[] strArr) throws ConfigurationException {
        System.setProperty("logback.configurationFile", "src/main/resources/logback-complex.xml");
        try {
            MalletTopicModelGenerator malletTopicModelGenerator = new MalletTopicModelGenerator();
            LOGGER.info("Started with config " + strArr[0] + " with data file location " + strArr[1] + ", and model will be written in file " + strArr[2]);
            new Model();
            Model generateTopicModelFromDatabase = strArr[1].equals("none") ? malletTopicModelGenerator.generateTopicModelFromDatabase(strArr[0], strArr[2]) : malletTopicModelGenerator.generateTopicModel(strArr[0], strArr[1], strArr[2]);
            if (strArr.length == 4) {
                if (strArr[3].equals("verify")) {
                    malletTopicModelGenerator.verifyModel(strArr[2], strArr[0]);
                } else {
                    malletTopicModelGenerator.printTopicsToFile(new File(strArr[3]), generateTopicModelFromDatabase);
                }
            }
            if (strArr.length == 5) {
                if (strArr[3].equals("verify")) {
                    malletTopicModelGenerator.verifyModel(strArr[2], strArr[0]);
                }
                malletTopicModelGenerator.printTopicsToFile(new File(strArr[4]), generateTopicModelFromDatabase);
            }
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println("Usage: \nObligatory arguments: [0]<configuration file path>, [1]<file or folder path to PUBMED documents to be modelled> type 'none' for DB connection from dbcConnection file, [2]<newly generated model file path> \nOptional arguments: [3]verify (verifies the model after generating), [3]/[4]<filename for monitoring topics> (prints the topics from the new model in a file)");
        }
    }

    public Model generateTopicModel(String str, String str2, String str3) throws ConfigurationException {
        MalletTopicModeling malletTopicModeling = new MalletTopicModeling();
        XMLConfiguration loadConfig = malletTopicModeling.loadConfig(str);
        Model train = malletTopicModeling.train(malletTopicModeling.readDocuments(new File(str2), loadConfig), loadConfig);
        malletTopicModeling.saveModel(train, str3);
        return train;
    }

    public Model generateTopicModelFromDatabase(String str, String str2) throws ConfigurationException {
        MalletTopicModeling malletTopicModeling = new MalletTopicModeling();
        XMLConfiguration loadConfig = malletTopicModeling.loadConfig(str);
        List<Document> readXmiDb = malletTopicModeling.readXmiDb(malletTopicModeling, loadConfig);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (int i = 0; i < readXmiDb.size(); i++) {
            Document document = readXmiDb.get(i);
            arrayList.add((TokenSequence) document.preprocessedData);
            arrayList2.add(document.id);
        }
        LOGGER.info("Start preprocessing with Mallet pipes");
        InstanceList malletPreprocess = malletTopicModeling.malletPreprocess(arrayList);
        LOGGER.info("Start training with Mallet");
        Model train = malletTopicModeling.train(malletPreprocess, loadConfig);
        LOGGER.info("Start mapping Mallet IDs to PMIDs");
        malletTopicModeling.mapMalletIdToPubmedId(readXmiDb, train);
        malletTopicModeling.saveModel(train, str2);
        LOGGER.info("Model is saved in file: " + str2);
        return train;
    }

    public void verifyModel(String str, String str2) throws ConfigurationException {
        MalletTopicModeling malletTopicModeling = new MalletTopicModeling();
        XMLConfiguration loadConfig = malletTopicModeling.loadConfig(str2);
        ParallelTopicModel parallelTopicModel = malletTopicModeling.readModel(str).malletModel;
        if (parallelTopicModel.getTopWords(parallelTopicModel.numTypes).length == loadConfig.getInt("train.parameters.parameter.numTopics")) {
            LOGGER.info("Topic model verified.");
        } else {
            LOGGER.info("Topic model verification failed.");
        }
    }

    public void printTopicsToFile(File file, Model model) throws IOException {
        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(file));
        ParallelTopicModel parallelTopicModel = model.malletModel;
        Object[][] topWords = parallelTopicModel.getTopWords(parallelTopicModel.numTypes);
        for (int i = 0; i < topWords.length; i++) {
            bufferedWriter.write("Topic " + i + "\n");
            for (int i2 = 0; i2 < topWords[i].length; i2++) {
                bufferedWriter.write("Word " + i2 + ": " + topWords[i][i2] + "\n");
            }
            bufferedWriter.write("\n");
        }
        bufferedWriter.close();
        LOGGER.info("Topics written in " + file.getAbsolutePath() + ".");
    }
}
