package cc.mallet.fst.semi_supervised.tui;

import cc.mallet.fst.CRF;
import cc.mallet.fst.MaxLatticeDefault;
import cc.mallet.fst.MultiSegmentationEvaluator;
import cc.mallet.fst.NoopTransducerTrainer;
import cc.mallet.fst.SimpleTagger;
import cc.mallet.fst.TokenAccuracyEvaluator;
import cc.mallet.fst.Transducer;
import cc.mallet.fst.TransducerEvaluator;
import cc.mallet.fst.TransducerTrainer;
import cc.mallet.fst.semi_supervised.CRFTrainerByGE;
import cc.mallet.fst.semi_supervised.FSTConstraintUtil;
import cc.mallet.fst.semi_supervised.constraints.GEConstraint;
import cc.mallet.fst.semi_supervised.constraints.OneLabelKLGEConstraints;
import cc.mallet.fst.semi_supervised.constraints.OneLabelL2RangeGEConstraints;
import cc.mallet.fst.semi_supervised.pr.CRFTrainerByPR;
import cc.mallet.fst.semi_supervised.pr.constraints.OneLabelL2IndPRConstraints;
import cc.mallet.fst.semi_supervised.pr.constraints.PRConstraint;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.iterator.LineGroupIterator;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.InstanceList;
import cc.mallet.types.Sequence;
import cc.mallet.util.CommandOption;
import cc.mallet.util.MalletLogger;
import cc.mallet.util.Maths;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Random;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import org.springframework.beans.propertyeditors.StringArrayPropertyEditor;

/* loaded from: input_file:cc/mallet/fst/semi_supervised/tui/SimpleTaggerWithConstraints.class */
public class SimpleTaggerWithConstraints {
    private static Logger logger;
    private static final CommandOption.Double gaussianVarianceOption;
    private static final CommandOption.Double qGaussianVarianceOption;
    private static final CommandOption.Boolean trainOption;
    private static final CommandOption.String testOption;
    private static final CommandOption.File modelOption;
    private static final CommandOption.Double trainingFractionOption;
    private static final CommandOption.Integer randomSeedOption;
    private static final CommandOption.IntegerArray ordersOption;
    private static final CommandOption.String forbiddenOption;
    private static final CommandOption.String allowedOption;
    private static final CommandOption.String defaultOption;
    private static final CommandOption.String penaltyOption;
    private static final CommandOption.String learningOption;
    private static final CommandOption.Integer iterationsOption;
    private static final CommandOption.Boolean viterbiOutputOption;
    private static final CommandOption.Boolean continueTrainingOption;
    private static final CommandOption.Integer nBestOption;
    private static final CommandOption.Integer cacheSizeOption;
    private static final CommandOption.Boolean includeInputOption;
    private static final CommandOption.Integer numThreads;
    private static final CommandOption.Integer numResets;
    private static final CommandOption.List commandOptions;
    static final /* synthetic */ boolean $assertionsDisabled;

    private SimpleTaggerWithConstraints() {
    }

    public static CRF trainGE(InstanceList instanceList, InstanceList instanceList2, ArrayList<GEConstraint> arrayList, CRF crf, TransducerEvaluator transducerEvaluator, int i, double d, int i2) {
        logger.info("Training on " + instanceList.size() + " instances");
        if (instanceList2 != null) {
            logger.info("Testing on " + instanceList2.size() + " instances");
        }
        if (!$assertionsDisabled && numThreads.value <= 0) {
            throw new AssertionError();
        }
        CRFTrainerByGE cRFTrainerByGE = new CRFTrainerByGE(crf, arrayList, numThreads.value);
        if (transducerEvaluator != null) {
            cRFTrainerByGE.addEvaluator(transducerEvaluator);
        }
        cRFTrainerByGE.setGaussianPriorVariance(d);
        cRFTrainerByGE.setNumResets(i2);
        cRFTrainerByGE.train(instanceList, i);
        return crf;
    }

    public static CRF trainPR(InstanceList instanceList, InstanceList instanceList2, ArrayList<PRConstraint> arrayList, CRF crf, TransducerEvaluator transducerEvaluator, int i, double d) {
        logger.info("Training on " + instanceList.size() + " instances");
        if (instanceList2 != null) {
            logger.info("Testing on " + instanceList2.size() + " instances");
        }
        if (!$assertionsDisabled && numThreads.value <= 0) {
            throw new AssertionError();
        }
        CRFTrainerByPR cRFTrainerByPR = new CRFTrainerByPR(crf, arrayList, numThreads.value);
        cRFTrainerByPR.addEvaluator(transducerEvaluator);
        cRFTrainerByPR.setPGaussianPriorVariance(d);
        cRFTrainerByPR.train(instanceList, i, i);
        return crf;
    }

    public static CRF getCRF(InstanceList instanceList, int[] iArr, String str, String str2, String str3, boolean z) {
        Pattern compile = Pattern.compile(str2);
        Pattern compile2 = Pattern.compile(str3);
        CRF crf = new CRF(instanceList.getPipe(), (Pipe) null);
        String addOrderNStates = crf.addOrderNStates(instanceList, iArr, null, str, compile, compile2, z);
        for (int i = 0; i < crf.numStates(); i++) {
            crf.getState(i).setInitialWeight(Double.NEGATIVE_INFINITY);
        }
        crf.getState(addOrderNStates).setInitialWeight(0.0d);
        crf.setWeightsDimensionDensely();
        return crf;
    }

    public static void test(TransducerTrainer transducerTrainer, TransducerEvaluator transducerEvaluator, InstanceList instanceList) {
        transducerEvaluator.evaluateInstanceList(transducerTrainer, instanceList, "Testing");
    }

    public static Sequence[] apply(Transducer transducer, Sequence sequence, int i) {
        return i == 1 ? new Sequence[]{transducer.transduce(sequence)} : (Sequence[]) new MaxLatticeDefault(transducer, sequence, null, cacheSizeOption.value()).bestOutputSequences(i).toArray(new Sequence[0]);
    }

    public static void main(String[] strArr) throws Exception {
        Pipe inputPipe;
        CRF trainPR;
        long currentTimeMillis = System.currentTimeMillis();
        FileReader fileReader = null;
        FileReader fileReader2 = null;
        FileReader fileReader3 = null;
        InstanceList instanceList = null;
        InstanceList instanceList2 = null;
        int processOptions = commandOptions.processOptions(strArr);
        if (processOptions == strArr.length) {
            commandOptions.printUsage(true);
            throw new IllegalArgumentException("Missing data file(s)");
        }
        if (trainOption.value) {
            fileReader = new FileReader(new File(strArr[processOptions]));
            if (testOption.value != null) {
                fileReader2 = new FileReader(new File(strArr[processOptions + 1]));
                fileReader3 = new FileReader(new File(strArr[processOptions + 2]));
            } else {
                fileReader3 = new FileReader(new File(strArr[processOptions + 1]));
            }
        } else {
            fileReader2 = new FileReader(new File(strArr[processOptions]));
        }
        CRF crf = null;
        TransducerEvaluator transducerEvaluator = null;
        if (!continueTrainingOption.value && trainOption.value) {
            inputPipe = new SimpleTagger.SimpleTaggerSentence2FeatureVectorSequence();
            inputPipe.getTargetAlphabet().lookupIndex(defaultOption.value);
        } else {
            if (modelOption.value == null) {
                commandOptions.printUsage(true);
                throw new IllegalArgumentException("Missing model file option");
            }
            ObjectInputStream objectInputStream = new ObjectInputStream(new FileInputStream(modelOption.value));
            crf = (CRF) objectInputStream.readObject();
            objectInputStream.close();
            inputPipe = crf.getInputPipe();
        }
        if (trainOption.value) {
            inputPipe.setTargetProcessing(true);
            instanceList = new InstanceList(inputPipe);
            instanceList.addThruPipe(new LineGroupIterator(fileReader, Pattern.compile("^\\s*$"), true));
            logger.info("Number of features in training data: " + inputPipe.getDataAlphabet().size());
            if (testOption.value != null) {
                if (fileReader2 != null) {
                    instanceList2 = new InstanceList(inputPipe);
                    instanceList2.addThruPipe(new LineGroupIterator(fileReader2, Pattern.compile("^\\s*$"), true));
                } else {
                    InstanceList[] split = instanceList.split(new Random(randomSeedOption.value), new double[]{trainingFractionOption.value, 1.0d - trainingFractionOption.value});
                    instanceList = split[0];
                    instanceList2 = split[1];
                }
            }
        } else if (testOption.value != null) {
            inputPipe.setTargetProcessing(true);
            instanceList2 = new InstanceList(inputPipe);
            instanceList2.addThruPipe(new LineGroupIterator(fileReader2, Pattern.compile("^\\s*$"), true));
        } else {
            inputPipe.setTargetProcessing(false);
            instanceList2 = new InstanceList(inputPipe);
            instanceList2.addThruPipe(new LineGroupIterator(fileReader2, Pattern.compile("^\\s*$"), true));
        }
        logger.info("Number of predicates: " + inputPipe.getDataAlphabet().size());
        if (testOption.value != null) {
            if (testOption.value.startsWith("lab")) {
                transducerEvaluator = new TokenAccuracyEvaluator(new InstanceList[]{instanceList, instanceList2}, new String[]{"Training", "Testing"});
            } else {
                if (!testOption.value.startsWith("seg=")) {
                    commandOptions.printUsage(true);
                    throw new IllegalArgumentException("Invalid test option: " + testOption.value);
                }
                String[] split2 = testOption.value.substring(4).split(StringArrayPropertyEditor.DEFAULT_SEPARATOR);
                if (split2.length < 1) {
                    commandOptions.printUsage(true);
                    throw new IllegalArgumentException("Missing segment start/continue labels: " + testOption.value);
                }
                String[] strArr2 = new String[split2.length];
                String[] strArr3 = new String[split2.length];
                for (int i = 0; i < split2.length; i++) {
                    String[] split3 = split2[i].split("\\.");
                    if (split3.length != 2) {
                        commandOptions.printUsage(true);
                        throw new IllegalArgumentException("Incorrectly-specified segment start and end labels: " + split2[i]);
                    }
                    strArr2[i] = split3[0];
                    strArr3[i] = split3[1];
                }
                transducerEvaluator = new MultiSegmentationEvaluator(new InstanceList[]{instanceList, instanceList2}, new String[]{"Training", "Testing"}, strArr2, strArr3);
            }
        }
        if (inputPipe.isTargetProcessing()) {
            Alphabet targetAlphabet = inputPipe.getTargetAlphabet();
            StringBuffer stringBuffer = new StringBuffer("Labels:");
            for (int i2 = 0; i2 < targetAlphabet.size(); i2++) {
                stringBuffer.append(" ").append(targetAlphabet.lookupObject(i2).toString());
            }
            logger.info(stringBuffer.toString());
        }
        if (trainOption.value) {
            if (crf == null) {
                crf = getCRF(instanceList, ordersOption.value, defaultOption.value, forbiddenOption.value, allowedOption.value, true);
            }
            HashMap<Integer, double[][]> loadGEConstraints = FSTConstraintUtil.loadGEConstraints(fileReader3, instanceList);
            if (learningOption.value.equalsIgnoreCase("ge")) {
                ArrayList arrayList = new ArrayList();
                if (penaltyOption.value.equalsIgnoreCase("kl")) {
                    OneLabelKLGEConstraints oneLabelKLGEConstraints = new OneLabelKLGEConstraints();
                    Iterator<Integer> it = loadGEConstraints.keySet().iterator();
                    while (it.hasNext()) {
                        int intValue = it.next().intValue();
                        double[][] dArr = loadGEConstraints.get(Integer.valueOf(intValue));
                        boolean z = true;
                        double d = 0.0d;
                        double[] dArr2 = new double[dArr.length];
                        int i3 = 0;
                        while (true) {
                            if (i3 >= dArr.length) {
                                break;
                            }
                            dArr2[i3] = dArr[i3][0];
                            if (!Maths.almostEquals(dArr[i3][0], dArr[i3][1])) {
                                z = false;
                                break;
                            }
                            if (Double.isInfinite(dArr2[i3])) {
                                dArr2[i3] = 0.0d;
                            }
                            d += dArr2[i3];
                            i3++;
                        }
                        if (!z) {
                            throw new RuntimeException("A KL divergence penalty cannot be used with target ranges!");
                        }
                        if (!Maths.almostEquals(d, 1.0d)) {
                            throw new RuntimeException("Targets must sum to 1 when using a KL divergence penalty!");
                        }
                        oneLabelKLGEConstraints.addConstraint(intValue, dArr2, 1.0d);
                    }
                    arrayList.add(oneLabelKLGEConstraints);
                } else {
                    if (!penaltyOption.value.equalsIgnoreCase("l2")) {
                        throw new RuntimeException("Unknown penalty " + penaltyOption.value);
                    }
                    OneLabelL2RangeGEConstraints oneLabelL2RangeGEConstraints = new OneLabelL2RangeGEConstraints();
                    Iterator<Integer> it2 = loadGEConstraints.keySet().iterator();
                    while (it2.hasNext()) {
                        int intValue2 = it2.next().intValue();
                        double[][] dArr3 = loadGEConstraints.get(Integer.valueOf(intValue2));
                        for (int i4 = 0; i4 < dArr3.length; i4++) {
                            if (!Double.isInfinite(dArr3[i4][0])) {
                                oneLabelL2RangeGEConstraints.addConstraint(intValue2, i4, dArr3[i4][0], dArr3[i4][1], 1.0d);
                            }
                        }
                    }
                    arrayList.add(oneLabelL2RangeGEConstraints);
                }
                trainPR = trainGE(instanceList, instanceList2, arrayList, crf, transducerEvaluator, iterationsOption.value, gaussianVarianceOption.value, numResets.value);
            } else {
                if (!learningOption.value.equalsIgnoreCase("pr")) {
                    throw new RuntimeException("Unknown learning algorithm " + learningOption.value);
                }
                ArrayList arrayList2 = new ArrayList();
                if (!penaltyOption.value.equalsIgnoreCase("l2")) {
                    if (!penaltyOption.value.equalsIgnoreCase("kl")) {
                        throw new RuntimeException("Unknown penalty " + penaltyOption.value);
                    }
                    throw new RuntimeException("KL divergence not supported for PR.");
                }
                OneLabelL2IndPRConstraints oneLabelL2IndPRConstraints = new OneLabelL2IndPRConstraints(true);
                Iterator<Integer> it3 = loadGEConstraints.keySet().iterator();
                while (it3.hasNext()) {
                    int intValue3 = it3.next().intValue();
                    double[][] dArr4 = loadGEConstraints.get(Integer.valueOf(intValue3));
                    for (int i5 = 0; i5 < dArr4.length; i5++) {
                        if (!Double.isInfinite(dArr4[i5][0]) && !Maths.almostEquals(dArr4[i5][0], dArr4[i5][1])) {
                            throw new RuntimeException("Support for range constraints in PR in development. " + penaltyOption.value);
                        }
                        if (!Double.isInfinite(dArr4[i5][0])) {
                            oneLabelL2IndPRConstraints.addConstraint(intValue3, i5, dArr4[i5][0], qGaussianVarianceOption.value);
                        }
                    }
                }
                arrayList2.add(oneLabelL2IndPRConstraints);
                trainPR = trainPR(instanceList, instanceList2, arrayList2, crf, transducerEvaluator, iterationsOption.value, gaussianVarianceOption.value);
            }
            if (modelOption.value != null) {
                ObjectOutputStream objectOutputStream = new ObjectOutputStream(new FileOutputStream(modelOption.value));
                objectOutputStream.writeObject(trainPR);
                objectOutputStream.close();
            }
        } else {
            if (crf == null) {
                if (modelOption.value == null) {
                    commandOptions.printUsage(true);
                    throw new IllegalArgumentException("Missing model file option");
                }
                ObjectInputStream objectInputStream2 = new ObjectInputStream(new FileInputStream(modelOption.value));
                crf = (CRF) objectInputStream2.readObject();
                objectInputStream2.close();
            }
            if (transducerEvaluator != null) {
                test(new NoopTransducerTrainer(crf), transducerEvaluator, instanceList2);
            } else {
                boolean value = includeInputOption.value();
                for (int i6 = 0; i6 < instanceList2.size(); i6++) {
                    Sequence sequence = (Sequence) instanceList2.get(i6).getData();
                    Sequence[] apply = apply(crf, sequence, nBestOption.value);
                    int length = apply.length;
                    boolean z2 = false;
                    for (int i7 = 0; i7 < length; i7++) {
                        if (apply[i7].size() != sequence.size()) {
                            logger.info("Failed to decode input sequence " + i6 + ", answer " + i7);
                            z2 = true;
                        }
                    }
                    if (!z2) {
                        for (int i8 = 0; i8 < sequence.size(); i8++) {
                            StringBuffer stringBuffer2 = new StringBuffer();
                            for (Sequence sequence2 : apply) {
                                stringBuffer2.append(sequence2.get(i8).toString()).append(" ");
                            }
                            if (value) {
                                stringBuffer2.append(((FeatureVector) sequence.get(i8)).toString(true));
                            }
                            System.out.println(stringBuffer2.toString());
                        }
                        System.out.println();
                    }
                }
            }
        }
        System.err.println("took " + ((System.currentTimeMillis() - currentTimeMillis) / 1000) + " seconds");
    }

    static {
        $assertionsDisabled = !SimpleTaggerWithConstraints.class.desiredAssertionStatus();
        logger = MalletLogger.getLogger(SimpleTaggerWithConstraints.class.getName());
        gaussianVarianceOption = new CommandOption.Double(SimpleTaggerWithConstraints.class, "gaussian-variance", "DECIMAL", true, 10.0d, "The gaussian prior variance used for training.", null);
        qGaussianVarianceOption = new CommandOption.Double(SimpleTaggerWithConstraints.class, "q-gaussian-variance", "DECIMAL", true, 10.0d, "The gaussian prior variance used in the E-step for PR training.", null);
        trainOption = new CommandOption.Boolean(SimpleTaggerWithConstraints.class, "train", "true|false", true, false, "Whether to train", null);
        testOption = new CommandOption.String(SimpleTaggerWithConstraints.class, "test", "lab or seg=start-1.continue-1,...,start-n.continue-n", true, null, "Test measuring labeling or segmentation (start-i, continue-i) accuracy", null);
        modelOption = new CommandOption.File(SimpleTaggerWithConstraints.class, "model-file", "FILENAME", true, null, "The filename for reading (train/run) or saving (train) the model.", null);
        trainingFractionOption = new CommandOption.Double(SimpleTaggerWithConstraints.class, "training-proportion", "DECIMAL", true, 0.5d, "Fraction of data to use for training in a random split.", null);
        randomSeedOption = new CommandOption.Integer(SimpleTaggerWithConstraints.class, "random-seed", "INTEGER", true, 0, "The random seed for randomly selecting a proportion of the instance list for training", null);
        ordersOption = new CommandOption.IntegerArray(SimpleTaggerWithConstraints.class, "orders", "COMMA-SEP-DECIMALS", true, new int[]{1}, "List of label Markov orders (main and backoff) ", null);
        forbiddenOption = new CommandOption.String(SimpleTaggerWithConstraints.class, "forbidden", "REGEXP", true, "\\s", "label1,label2 transition forbidden if it matches this", null);
        allowedOption = new CommandOption.String(SimpleTaggerWithConstraints.class, "allowed", "REGEXP", true, ".*", "label1,label2 transition allowed only if it matches this", null);
        defaultOption = new CommandOption.String(SimpleTaggerWithConstraints.class, "default-label", "STRING", true, "O", "Label for initial context and uninteresting tokens", null);
        penaltyOption = new CommandOption.String(SimpleTaggerWithConstraints.class, "penalty", "kl|l2", true, "l2", "penalty function for constraint violation.", null);
        learningOption = new CommandOption.String(SimpleTaggerWithConstraints.class, "learning", "ge|pr", true, "ge", "Learning method to use.", null);
        iterationsOption = new CommandOption.Integer(SimpleTaggerWithConstraints.class, "iterations", "INTEGER", true, 500, "Number of training iterations", null);
        viterbiOutputOption = new CommandOption.Boolean(SimpleTaggerWithConstraints.class, "viterbi-output", "true|false", true, false, "Print Viterbi periodically during training", null);
        continueTrainingOption = new CommandOption.Boolean(SimpleTaggerWithConstraints.class, "continue-training", "true|false", false, false, "Continue training from model specified by --model-file", null);
        nBestOption = new CommandOption.Integer(SimpleTaggerWithConstraints.class, "n-best", "INTEGER", true, 1, "How many answers to output", null);
        cacheSizeOption = new CommandOption.Integer(SimpleTaggerWithConstraints.class, "cache-size", "INTEGER", true, 100000, "How much state information to memoize in n-best decoding", null);
        includeInputOption = new CommandOption.Boolean(SimpleTaggerWithConstraints.class, "include-input", "true|false", true, false, "Whether to include the input features when printing decoding output", null);
        numThreads = new CommandOption.Integer(SimpleTaggerWithConstraints.class, "threads", "INTEGER", true, 1, "Number of threads to use for CRF training.", null);
        numResets = new CommandOption.Integer(SimpleTaggerWithConstraints.class, "resets", "INTEGER", true, 4, "Number of L-BFGS resets to use.", null);
        commandOptions = new CommandOption.List("Training, testing and running a generic tagger.", new CommandOption[]{gaussianVarianceOption, qGaussianVarianceOption, trainOption, iterationsOption, testOption, trainingFractionOption, modelOption, randomSeedOption, ordersOption, forbiddenOption, allowedOption, defaultOption, viterbiOutputOption, penaltyOption, learningOption, continueTrainingOption, nBestOption, cacheSizeOption, includeInputOption, numThreads, numResets});
    }
}
