package de.julielab.jsbd;

import bsh.ParserConstants;
import cc.mallet.fst.CRF;
import cc.mallet.fst.CRFTrainerByLabelLikelihood;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence;
import cc.mallet.pipe.tsf.OffsetConjunctions;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.types.LabelSequence;
import cc.mallet.types.Sequence;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.TreeSet;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.aop.framework.autoproxy.target.QuickTargetSourceCreator;

/* loaded from: input_file:de/julielab/jsbd/SentenceSplitter.class */
public class SentenceSplitter {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) SentenceSplitter.class);
    CRF model;
    boolean trained;

    public SentenceSplitter() {
        this.model = null;
        this.trained = false;
        this.model = null;
        this.trained = false;
    }

    public Instance makePredictionData(ArrayList<String> arrayList, Pipe pipe) {
        return this.model.getInputPipe().instanceFrom(new Instance(arrayList, "", "", ""));
    }

    public Instance makePredictionData(File file, Pipe pipe) {
        return this.model.getInputPipe().instanceFrom(new Instance(readFile(file), "", "", file.getName()));
    }

    public InstanceList makePredictionData(File[] fileArr, Pipe pipe) {
        InstanceList instanceList = new InstanceList(pipe);
        for (int i = 0; i < fileArr.length; i++) {
            instanceList.add(this.model.getInputPipe().instanceFrom(new Instance(readFile(fileArr[i]), "", "", fileArr[i].getName())));
        }
        return instanceList;
    }

    /* JADX WARN: Type inference failed for: r7v1, types: [int[], int[][]] */
    public InstanceList makeTrainingData(File[] fileArr, boolean z) {
        LabelAlphabet labelAlphabet = new LabelAlphabet();
        labelAlphabet.lookupLabel("EOS", true);
        labelAlphabet.lookupLabel("IS", true);
        InstanceList instanceList = new InstanceList(new SerialPipes(new Pipe[]{new Abstract2UnitPipe(), new OffsetConjunctions(new int[]{new int[]{-1}, new int[1], new int[]{1}}), new TokenSequence2FeatureVectorSequence(true, true)}));
        System.out.print("preparing training data...");
        int length = fileArr.length / 20;
        int i = 0;
        for (int i2 = 0; i2 < fileArr.length; i2++) {
            ArrayList<String> readFile = readFile(fileArr[i2]);
            if (length > 0 && i2 % length == 0 && i2 > 0) {
                i += 5;
                System.out.print(String.valueOf(i) + "%...");
            }
            instanceList.addThruPipe(new Instance(readFile, "", "", fileArr[i2].getName()));
        }
        return instanceList;
    }

    public void train(InstanceList instanceList, Pipe pipe) {
        long currentTimeMillis = System.currentTimeMillis();
        this.model = new CRF(instanceList.getPipe(), (Pipe) null);
        this.model.addStatesForLabelsConnectedAsIn(instanceList);
        LOGGER.info("SentencesSplitter training: model converged: " + new CRFTrainerByLabelLikelihood(this.model).trainOptimized(instanceList));
        long currentTimeMillis2 = System.currentTimeMillis();
        this.model.getInputPipe().getDataAlphabet().stopGrowth();
        this.trained = true;
        LOGGER.info("training time: " + ((currentTimeMillis2 - currentTimeMillis) / 1000) + " sec");
    }

    public ArrayList<Unit> predict(ArrayList<String> arrayList, boolean z) {
        if (!this.trained || this.model == null) {
            throw new IllegalStateException("No model available. Train or load trained model first.");
        }
        return predict(this.model.getInputPipe().instanceFrom(new Instance(arrayList, "", "", "")), z);
    }

    public ArrayList<Unit> predict(Instance instance, boolean z) {
        if (!this.trained || this.model == null) {
            throw new IllegalStateException("No model available. Train or load trained model first.");
        }
        Sequence sequence = (Sequence) instance.getData();
        ArrayList<Unit> arrayList = (ArrayList) instance.getName();
        ArrayList<String> arrayList2 = new ArrayList<>();
        Sequence transduce = this.model.transduce(sequence);
        for (int i = 0; i < transduce.size(); i++) {
            arrayList2.add((String) transduce.get(i));
        }
        if (z) {
            arrayList2 = postprocessingFilter(arrayList2, arrayList);
        }
        for (int i2 = 0; i2 < arrayList2.size(); i2++) {
            arrayList.get(i2).label = arrayList2.get(i2);
        }
        return arrayList;
    }

    public ArrayList<String> postprocessingFilter(ArrayList<String> arrayList, ArrayList<Unit> arrayList2) {
        TreeSet<String> set = new Abbreviations().getSet();
        String[] strArr = (String[]) arrayList.toArray(new String[arrayList.size()]);
        ArrayList<String> arrayList3 = new ArrayList<>();
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        for (int i4 = 0; i4 < strArr.length; i4++) {
            for (char c : arrayList2.get(i4).rep.toCharArray()) {
                switch (c) {
                    case '(':
                        i++;
                        break;
                    case ParserConstants.NULL /* 41 */:
                        i--;
                        break;
                    case '[':
                        i2++;
                        break;
                    case ']':
                        i2--;
                        break;
                }
            }
            if (i2 > 0 || i > 0) {
                strArr[i4] = "IS";
                i3++;
            }
            if (i3 >= 50) {
                i2 = 0;
                i = 0;
            }
            if (i2 < 0) {
                i2 = 0;
            }
            if (i < 0) {
                i = 0;
            }
        }
        for (int i5 = 0; i5 < strArr.length; i5++) {
            String str = arrayList2.get(i5).rep;
            if (set.contains(str)) {
                strArr[i5] = "IS";
            }
            if (str.endsWith(".\"") || str.endsWith("?") || str.endsWith(QuickTargetSourceCreator.PREFIX_PROTOTYPE)) {
                strArr[i5] = "EOS";
            }
            arrayList3.add(strArr[i5]);
        }
        return arrayList3;
    }

    public ArrayList<String> getLabelsFromLabelSequence(LabelSequence labelSequence) {
        ArrayList<String> arrayList = new ArrayList<>();
        for (int i = 0; i < labelSequence.size(); i++) {
            arrayList.add((String) labelSequence.get(i));
        }
        return arrayList;
    }

    public void writeModel(String str) {
        if (!this.trained || this.model == null) {
            LOGGER.error("train or load trained model first.", (Throwable) new IllegalStateException("train or load trained model first."));
        }
        try {
            ObjectOutputStream objectOutputStream = new ObjectOutputStream(new GZIPOutputStream(new FileOutputStream(new File(String.valueOf(str) + ".gz"))));
            objectOutputStream.writeObject(this.model);
            objectOutputStream.close();
        } catch (IOException e) {
            e.printStackTrace();
            System.exit(0);
        }
    }

    public void readModel(File file) throws IOException, FileNotFoundException, ClassNotFoundException {
        readModel(new FileInputStream(file));
    }

    public void readModel(InputStream inputStream) throws IOException, ClassNotFoundException {
        Throwable th = null;
        try {
            ObjectInputStream objectInputStream = new ObjectInputStream(new GZIPInputStream(inputStream));
            try {
                this.model = (CRF) objectInputStream.readObject();
                this.trained = true;
                this.model.getInputPipe().getDataAlphabet().stopGrowth();
                if (objectInputStream != null) {
                    objectInputStream.close();
                }
            } catch (Throwable th2) {
                if (objectInputStream != null) {
                    objectInputStream.close();
                }
                throw th2;
            }
        } catch (Throwable th3) {
            if (0 == 0) {
                th = th3;
            } else if (null != th3) {
                th.addSuppressed(th3);
            }
            throw th;
        }
    }

    public ArrayList<String> readFile(File file) {
        ArrayList<String> arrayList = new ArrayList<>();
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                arrayList.add(readLine);
            }
            bufferedReader.close();
        } catch (IOException e) {
            e.printStackTrace();
            System.exit(-1);
        }
        return arrayList;
    }

    public CRF getModel() {
        return this.model;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void setModel(CRF crf) {
        this.trained = true;
        this.model = crf;
    }
}
