package cc.mallet.util;

import cc.mallet.pipe.NGramPreprocessor;
import cc.mallet.pipe.iterator.CsvIterator;
import cc.mallet.types.Instance;
import cc.mallet.util.CommandOption;
import java.io.File;
import java.io.FileReader;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.logging.Logger;
import org.apache.uima.pear.tools.InstallationController;
import org.apache.uima.pear.tools.InstallationDescriptorHandler;
import org.apache.uima.util.Level;

/* loaded from: input_file:cc/mallet/util/Replacer.class */
public class Replacer {
    protected static Logger logger = MalletLogger.getLogger(Replacer.class.getName());
    static CommandOption.SpacedStrings inputFiles = new CommandOption.SpacedStrings(Replacer.class, "input", "FILE [FILE ...]", true, null, "The file(s) containing data, one instance per line", null);
    static CommandOption.File outputFile = new CommandOption.File(Replacer.class, "output", InstallationDescriptorHandler.FILE_TAG, true, new File("mallet.data"), "Write the strings with replacements applied to this file", null);
    static CommandOption.SpacedStrings replacementFiles = new CommandOption.SpacedStrings(Replacer.class, "replacement-files", "FILE [FILE ...]", true, null, "files containing string replacements, one per line:\n    'A B [tab] C' replaces A B with C,\n    'A B' replaces A B with A_B", null);
    static CommandOption.SpacedStrings deletionFiles = new CommandOption.SpacedStrings(Replacer.class, "deletion-files", "FILE [FILE ...]", true, null, "files containing strings to delete after replacements but before tokenization (ie multiword stop terms)", null);
    static CommandOption.String lineRegex = new CommandOption.String(Replacer.class, "line-regex", "REGEX", true, "^([^\\t]*)\\t([^\\t]*)\\t(.*)", "Regular expression containing regex-groups for label, name and data.", null);
    static CommandOption.Integer nameGroup = new CommandOption.Integer(Replacer.class, "name", "INTEGER", true, 1, "The index of the group containing the instance name.\n   Use 0 to indicate that this field is not used.", null);
    static CommandOption.Integer labelGroup = new CommandOption.Integer(Replacer.class, "label", "INTEGER", true, 2, "The index of the group containing the label string.\n   Use 0 to indicate that this field is not used.", null);
    static CommandOption.Integer dataGroup = new CommandOption.Integer(Replacer.class, InstallationController.PACKAGE_DATA_DIR, "INTEGER", true, 3, "The index of the group containing the data.", null);

    public static void main(String[] strArr) throws Exception {
        CommandOption.setSummary(Replacer.class, "Tool for modifying text with n-gram preprocessing");
        CommandOption.process(Replacer.class, strArr);
        NGramPreprocessor nGramPreprocessor = new NGramPreprocessor();
        if (replacementFiles.value != null) {
            for (String str : replacementFiles.value) {
                System.out.println("including replacements from " + str);
                nGramPreprocessor.loadReplacements(str);
            }
        }
        if (deletionFiles.value != null) {
            for (String str2 : deletionFiles.value) {
                System.out.println("including deletions from " + str2);
                nGramPreprocessor.loadDeletions(str2);
            }
        }
        new ArrayList();
        PrintWriter printWriter = new PrintWriter(outputFile.value);
        for (String str3 : inputFiles.value) {
            logger.info("Loading " + str3);
            Iterator<Instance> newIteratorFrom = nGramPreprocessor.newIteratorFrom(new CsvIterator(new FileReader(str3), lineRegex.value, dataGroup.value, labelGroup.value, nameGroup.value));
            int i = 0;
            while (newIteratorFrom.hasNext()) {
                Instance next = newIteratorFrom.next();
                printWriter.println(next.getName() + "\t" + next.getTarget() + "\t" + next.getData());
                i++;
                if (i % Level.FINEST_INT == 0) {
                    logger.info("instance " + i);
                }
                newIteratorFrom.next();
            }
        }
        printWriter.close();
    }
}
