package de.julielab.jcore.ae.jsbd;

import cc.mallet.fst.CRF;
import cc.mallet.pipe.Pipe;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.LabelSequence;
import de.julielab.jcore.ae.jsbd.postprocessingfilters.PostprocessingFilter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.Random;
import java.util.TreeSet;
import java.util.stream.Stream;
import java.util.zip.GZIPInputStream;
import org.springframework.util.AntPathMatcher;

/* loaded from: input_file:de/julielab/jcore/ae/jsbd/SentenceSplitterApplication.class */
public class SentenceSplitterApplication {
    private static String doPostprocessing = PostprocessingFilter.BIOMED_POSTPROC;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:de/julielab/jcore/ae/jsbd/SentenceSplitterApplication$EvalResult.class */
    public static class EvalResult {
        int nrDecisions;
        double ACC;
        double fp;
        double fn;
        double corrDecisions;

        private EvalResult() {
        }

        double getF() {
            return ((2.0d * getR()) * getP()) / (getR() + getP());
        }

        double getR() {
            return this.corrDecisions / (this.corrDecisions + this.fn);
        }

        double getP() {
            return this.corrDecisions / (this.corrDecisions + this.fp);
        }
    }

    public static void main(String[] strArr) {
        if (strArr.length < 1) {
            System.err.println("usage: JSBD <mode> {mode_specific_parameters}");
            System.err.println("different modes:");
            System.err.println("c: check texts");
            System.err.println("t: train a sentence splitting model");
            System.err.println("p: do the sentence splitting");
            System.err.println("s: evaluation with 90-10 split");
            System.err.println("x: evaluation with cross-validation");
            System.err.println("e: evaluation on previously trained model");
            System.exit(-1);
        }
        String str = strArr[0];
        if (str.equals("c")) {
            startCheckMode(strArr);
            return;
        }
        if (str.equals("t")) {
            startTrainingMode(strArr);
            return;
        }
        if (str.equals("p")) {
            startPredictionMode(strArr);
            return;
        }
        if (str.equals("x")) {
            startXValidationMode(strArr);
            return;
        }
        if (str.equals("s")) {
            start9010ValidationMode(strArr);
        } else if (str.equals("e")) {
            startCompareValidationMode(strArr);
        } else {
            System.err.println("Unknown run mode.");
            System.exit(-1);
        }
    }

    private static void startCompareValidationMode(String[] strArr) {
        System.out.println("performing evaluation previously trained model.");
        if (strArr.length < 4) {
            System.err.println("usage: JSBD e <modelFile> <predictInDir> <errorFile> [<postprocessing>]");
            System.exit(-1);
        }
        if (strArr.length > 4 && PostprocessingFilter.POSTPROC_STREAM.anyMatch(str -> {
            return strArr[4].equals(str);
        })) {
            doPostprocessing = strArr[4];
        }
        CRF crf = null;
        try {
            ObjectInputStream objectInputStream = new ObjectInputStream(new GZIPInputStream(new FileInputStream(strArr[1])));
            crf = (CRF) objectInputStream.readObject();
            objectInputStream.close();
        } catch (IOException | ClassNotFoundException e) {
            e.printStackTrace();
        }
        File file = new File(strArr[2]);
        if (!file.isDirectory()) {
            System.err.println("Error: the specified directory does not exist.");
            System.exit(-1);
        }
        File[] listFiles = file.listFiles();
        TreeSet treeSet = new TreeSet();
        EvalResult doEvaluation = doEvaluation(crf, listFiles, treeSet);
        writeFile((TreeSet<String>) treeSet, new File(strArr[3]));
        System.out.println("\n\nAccuracy on pretrained model: " + doEvaluation.ACC);
        System.exit(0);
    }

    private static void start9010ValidationMode(String[] strArr) {
        System.out.println("performing evaluation on 90/10 split");
        if (strArr.length < 4) {
            System.err.println("usage: JSBD s <textDir> <errorFile> <allow split on all punctuation (false: splits only occur before whitespaces)> [<postprocessing>]");
            System.exit(-1);
        }
        if (strArr.length > 4 && PostprocessingFilter.POSTPROC_STREAM.anyMatch(str -> {
            return strArr[4].equals(str);
        })) {
            doPostprocessing = strArr[4];
        }
        File file = new File(strArr[1]);
        if (!file.isDirectory()) {
            System.err.println("Error: the specified directory does not exist.");
            System.exit(-1);
        }
        File[] listFiles = file.listFiles();
        TreeSet treeSet = new TreeSet();
        boolean parseBoolean = Boolean.parseBoolean(strArr[3]);
        System.out.println("Allow sentence split after all punctuation: " + parseBoolean);
        EvalResult do9010Evaluation = do9010Evaluation(listFiles, treeSet, parseBoolean);
        writeFile((TreeSet<String>) treeSet, new File(strArr[2]));
        System.out.println("\n\nAccuracy on 90/10 split: " + do9010Evaluation.ACC);
        System.exit(0);
    }

    private static void startXValidationMode(String[] strArr) {
        System.out.println("performing cross-validation");
        if (strArr.length < 5) {
            System.err.println("usage: JSBD x <textDir> <cross-val-rounds> <errorFile> <allow split on all punctuation (false: splits only occur before whitespaces)> [<postprocessing>]");
            System.exit(-1);
        }
        if (strArr.length > 5 && PostprocessingFilter.POSTPROC_STREAM.anyMatch(str -> {
            return strArr[5].equals(str);
        })) {
            doPostprocessing = strArr[5];
        }
        File file = new File(strArr[1]);
        if (!file.isDirectory()) {
            System.err.println("Error: the specified directory does not exist.");
            System.exit(-1);
        }
        File[] listFiles = file.listFiles();
        int intValue = new Integer(strArr[2]).intValue();
        if (intValue > listFiles.length / 2 || intValue > 10 || intValue < 2) {
            System.err.println("Error: cannot perform " + intValue + " cross-validation rounds. Choose n in [2:10].");
            System.exit(-1);
        }
        TreeSet treeSet = new TreeSet();
        boolean parseBoolean = Boolean.parseBoolean(strArr[4]);
        System.out.println("Allowing sentence split after all punctuation: " + parseBoolean);
        double doCrossEvaluation = doCrossEvaluation(listFiles, intValue, treeSet, parseBoolean);
        writeFile((TreeSet<String>) treeSet, new File(strArr[3]));
        System.out.println("\n\nAccuracy on cross validation: " + doCrossEvaluation);
        System.exit(0);
    }

    private static void startPredictionMode(String[] strArr) {
        System.out.println("doing the sentence splitting...");
        if (strArr.length < 4) {
            System.err.println("usage: JSBD p <inDir> <outDir> <modelFilename> [<postprocessing>]");
            System.exit(-1);
        }
        if (strArr.length > 4) {
            Stream<String> stream = PostprocessingFilter.POSTPROC_STREAM;
            String str = strArr[4];
            Objects.requireNonNull(str);
            if (stream.anyMatch((v1) -> {
                return r1.equals(v1);
            })) {
                doPostprocessing = strArr[4];
            }
        }
        File file = new File(strArr[1]);
        if (!file.isDirectory()) {
            System.err.println("Error: the specified input directory does not exist.");
            System.exit(-1);
        }
        File[] listFiles = file.listFiles();
        File file2 = new File(strArr[2]);
        if (!file2.isDirectory()) {
            System.err.println("Error: the specified output directory does not exist.");
            System.exit(-1);
        }
        doPrediction(listFiles, file2, strArr[3]);
    }

    private static void startTrainingMode(String[] strArr) {
        System.out.println("training the model...");
        if (strArr.length != 4) {
            System.err.println("usage: JSBD t <trainDir> <allow split on all punctuation (false: splits only occur before whitespaces)> <modelFilename>");
            System.exit(-1);
        }
        File file = new File(strArr[1]);
        if (!file.isDirectory()) {
            System.err.println("Error: the specified directory does not exist.");
            System.exit(-1);
        }
        File[] listFiles = file.listFiles();
        System.out.println("number of files to train on: " + listFiles.length);
        boolean parseBoolean = Boolean.parseBoolean(strArr[2]);
        System.out.println("Allow sentence split after all punctuation: " + parseBoolean);
        String str = strArr[3];
        doTraining(listFiles, parseBoolean, str);
        System.out.println("Saved model to: " + str);
    }

    private static void startCheckMode(String[] strArr) {
        System.out.println("checking abstracts...");
        if (strArr.length != 2) {
            System.err.println("usage: JSBD c <textDir>");
            System.exit(-1);
        }
        File file = new File(strArr[1]);
        if (!file.isDirectory()) {
            System.err.println("Error: the specified directory does not exist.");
            System.exit(-1);
        }
        doCheckAbstracts(file.listFiles(), false);
        System.exit(0);
    }

    private static void doCheckAbstracts(File[] fileArr, boolean z) {
        new SentenceSplitter().makeTrainingData(fileArr, false, z);
        System.out.println("done.");
    }

    private static EvalResult do9010Evaluation(File[] fileArr, TreeSet<String> treeSet, boolean z) {
        ArrayList arrayList = new ArrayList();
        for (File file : fileArr) {
            arrayList.add(file);
        }
        Collections.shuffle(arrayList, new Random(1L));
        int size = arrayList.size();
        int i = (int) (size * 0.1d);
        int i2 = size - i;
        if (i == 0) {
            System.err.println("Error: no test files for this split. Number of files in directory might be too small.");
            System.exit(-1);
        }
        System.out.println("all: " + size + "\ttrain: " + i2 + "\ttest: " + i);
        File[] fileArr2 = new File[i2];
        File[] fileArr3 = new File[i];
        for (int i3 = 0; i3 < i2; i3++) {
            fileArr2[i3] = (File) arrayList.get(i3);
        }
        int i4 = 0;
        for (int i5 = i2; i5 < arrayList.size(); i5++) {
            int i6 = i4;
            i4++;
            fileArr3[i6] = (File) arrayList.get(i5);
        }
        return doEvaluation(fileArr2, fileArr3, treeSet, z);
    }

    private static double doCrossEvaluation(File[] fileArr, int i, TreeSet<String> treeSet, boolean z) {
        File[] fileArr2;
        File[] fileArr3;
        ArrayList arrayList = new ArrayList();
        for (File file : fileArr) {
            arrayList.add(file);
        }
        Collections.shuffle(arrayList, new Random(1L));
        int i2 = 0;
        int length = fileArr.length / i;
        int length2 = fileArr.length;
        int i3 = length + (length2 % i);
        System.out.println("number of files in directory: " + length2);
        System.out.println("size of each/last round: " + length + "/" + i3);
        System.out.println();
        EvalResult[] evalResultArr = new EvalResult[i];
        double d = 0.0d;
        double d2 = 0.0d;
        for (int i4 = 0; i4 < i; i4++) {
            int i5 = 0;
            int i6 = 0;
            if (i4 == i - 1) {
                fileArr2 = new File[length2 - i3];
                fileArr3 = new File[i3];
                for (int i7 = 0; i7 < arrayList.size(); i7++) {
                    File file2 = (File) arrayList.get(i7);
                    if (i7 < i2) {
                        fileArr2[i6] = file2;
                        i6++;
                    } else {
                        fileArr3[i5] = file2;
                        i5++;
                    }
                }
            } else {
                fileArr2 = new File[length2 - length];
                fileArr3 = new File[length];
                for (int i8 = 0; i8 < arrayList.size(); i8++) {
                    File file3 = (File) arrayList.get(i8);
                    if (i8 < i2 || i8 >= i2 + length) {
                        fileArr2[i6] = file3;
                        i6++;
                    } else {
                        fileArr3[i5] = file3;
                        i5++;
                    }
                }
                i2 += length;
            }
            System.out.println("training size: " + fileArr2.length);
            System.out.println("prediction size: " + fileArr3.length);
            evalResultArr[i4] = doEvaluation(fileArr2, fileArr3, treeSet, z);
        }
        DecimalFormat decimalFormat = new DecimalFormat("0.000");
        for (int i9 = 0; i9 < evalResultArr.length; i9++) {
            d += evalResultArr[i9].ACC;
            d2 += evalResultArr[i9].getF();
            System.out.println(i9 + ": " + decimalFormat.format(evalResultArr[i9].ACC));
        }
        double d3 = d / i;
        System.out.println("avg accuracy: " + decimalFormat.format(d3));
        System.out.println("avg f-score: " + decimalFormat.format(d2 / i));
        return d3;
    }

    private static EvalResult doEvaluation(File[] fileArr, File[] fileArr2, TreeSet<String> treeSet, boolean z) {
        SentenceSplitter sentenceSplitter = new SentenceSplitter();
        new EOSSymbols();
        InstanceList makeTrainingData = sentenceSplitter.makeTrainingData(fileArr, z, false);
        Pipe pipe = makeTrainingData.getPipe();
        System.out.println("training...");
        sentenceSplitter.train(makeTrainingData, pipe);
        return doEvaluation(sentenceSplitter.getModel(), fileArr2, treeSet);
    }

    private static EvalResult doEvaluation(CRF crf, File[] fileArr, TreeSet<String> treeSet) {
        SentenceSplitter sentenceSplitter = new SentenceSplitter();
        sentenceSplitter.setModel(crf);
        EOSSymbols eOSSymbols = new EOSSymbols();
        InstanceList makePredictionData = sentenceSplitter.makePredictionData(fileArr, crf.getInputPipe());
        System.out.println("predicting...");
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        for (int i5 = 0; i5 < makePredictionData.size(); i5++) {
            Instance instance = makePredictionData.get(i5);
            String str = (String) instance.getSource();
            List<Unit> list = null;
            try {
                list = sentenceSplitter.predict(instance, doPostprocessing);
            } catch (IllegalStateException e) {
                e.printStackTrace();
            }
            ArrayList<String> labelsFromLabelSequence = getLabelsFromLabelSequence((LabelSequence) instance.getTarget());
            for (int i6 = 0; i6 < list.size(); i6++) {
                String str2 = list.get(i6).rep;
                String str3 = list.get(i6).label;
                String str4 = labelsFromLabelSequence.get(i6);
                if (eOSSymbols.tokenEndsWithEOSSymbol(str2)) {
                    i2++;
                    if (str3.equals(str4)) {
                        i++;
                    } else {
                        treeSet.add(str + "\t" + str4 + "\t" + str3 + "\t" + str2 + "  (" + i6 + ")");
                        if (str3.equals("EOS") && str4.equals("IS")) {
                            i4++;
                        } else if (str3.equals("IS") && str4.equals("EOS")) {
                            i3++;
                        }
                    }
                }
            }
        }
        double d = i / i2;
        EvalResult evalResult = new EvalResult();
        evalResult.corrDecisions = i;
        evalResult.nrDecisions = i2;
        evalResult.fn = i3;
        evalResult.fp = i4;
        evalResult.ACC = d;
        System.out.println("all : " + i2);
        System.out.println("corr: " + i);
        System.out.println("fp :" + i4);
        System.out.println("fn :" + i3);
        System.out.println("R :" + evalResult.getR());
        System.out.println("P :" + evalResult.getP());
        System.out.println("F :" + evalResult.getF());
        System.out.println("ACC : " + d);
        return evalResult;
    }

    private static void doTraining(File[] fileArr, boolean z, String str) {
        SentenceSplitter sentenceSplitter = new SentenceSplitter();
        System.out.println("making training data...");
        InstanceList makeTrainingData = sentenceSplitter.makeTrainingData(fileArr, false, z);
        Pipe pipe = makeTrainingData.getPipe();
        System.out.println("training model...");
        sentenceSplitter.train(makeTrainingData, pipe);
        sentenceSplitter.writeModel(str);
    }

    private static void doPrediction(File[] fileArr, File file, String str) {
        SentenceSplitter sentenceSplitter = new SentenceSplitter();
        System.out.println("reading model...");
        try {
            sentenceSplitter.readModel(new File(str));
        } catch (IOException | ClassNotFoundException e) {
            e.printStackTrace();
        }
        System.out.println("starting sentence splitting...");
        Pipe inputPipe = sentenceSplitter.getModel().getInputPipe();
        int i = 0;
        for (int i2 = 0; i2 < fileArr.length; i2++) {
            System.currentTimeMillis();
            if (i2 % 100 == 0 && i2 > 0) {
                i++;
                System.out.println(i2 + " files done...");
            }
            List<Unit> list = null;
            try {
                list = sentenceSplitter.predict(inputPipe.instanceFrom(new Instance(sentenceSplitter.readFile(fileArr[i2]), "", "", fileArr[i2].getName())), doPostprocessing);
            } catch (IllegalStateException e2) {
                e2.printStackTrace();
            }
            String file2 = fileArr[i2].toString();
            File file3 = new File(file.toString() + "/" + file2.substring(file2.lastIndexOf(AntPathMatcher.DEFAULT_PATH_SEPARATOR) + 1, file2.length()));
            ArrayList arrayList = new ArrayList();
            String str2 = "";
            for (Unit unit : list) {
                String str3 = unit.label;
                str2 = str2 + unit.rep;
                if (unit.afterWs) {
                    str2 = str2 + " ";
                }
                if (str3.equals("EOS")) {
                    arrayList.add(str2);
                    str2 = "";
                }
            }
            System.currentTimeMillis();
            writeFile((ArrayList<String>) arrayList, file3);
        }
    }

    private static ArrayList<String> getLabelsFromLabelSequence(LabelSequence labelSequence) {
        ArrayList<String> arrayList = new ArrayList<>();
        for (int i = 0; i < labelSequence.size(); i++) {
            arrayList.add((String) labelSequence.get(i));
        }
        return arrayList;
    }

    private static void writeFile(TreeSet<String> treeSet, File file) {
        try {
            FileWriter fileWriter = new FileWriter(file);
            Iterator<String> it = treeSet.iterator();
            while (it.hasNext()) {
                fileWriter.write(it.next() + "\n");
            }
            fileWriter.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private static void writeFile(ArrayList<String> arrayList, File file) {
        try {
            FileWriter fileWriter = new FileWriter(file);
            for (int i = 0; i < arrayList.size(); i++) {
                fileWriter.write(arrayList.get(i) + "\n");
            }
            fileWriter.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
