package gate.plugin.learningframework.data;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import gate.Annotation;
import gate.AnnotationSet;
import gate.Utils;
import gate.plugin.learningframework.LFUtils;
import gate.plugin.learningframework.features.FeatureExtractionBase;
import gate.plugin.learningframework.features.FeatureExtractionDense;
import gate.plugin.learningframework.features.FeatureInfo;
import gate.plugin.learningframework.features.FeatureSpecAttribute;
import gate.plugin.learningframework.features.SeqEncoder;
import gate.plugin.learningframework.features.TargetType;
import gate.plugin.learningframework.stats.Stats;
import gate.plugin.learningframework.stats.StatsForFeatures;
import gate.util.GateRuntimeException;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.log4j.Logger;

/* loaded from: input_file:gate/plugin/learningframework/data/CorpusRepresentationVolatileDense2JsonStream.class */
public class CorpusRepresentationVolatileDense2JsonStream extends CorpusRepresentationVolatileBase {
    public static final String DATA_FILE_NAME = "crvd.data.json";
    public static final String META_FILE_NAME = "crvd.meta.json";
    private FileOutputStream outStream;
    private File outDir;
    private File outDataFile;
    private File outMetaFile;
    private FeatureInfo featureInfo;
    private List<String> fnames;
    private Logger LOGGER = Logger.getLogger(CorpusRepresentationVolatileDense2JsonStream.class);
    private StatsForFeatures stats = new StatsForFeatures();
    private SummaryStatistics seqLenStats = new SummaryStatistics();
    protected Boolean isSequence = null;
    private final Object LOCKING_OBJECT = new Object();
    private int linesWritten = 0;

    public List<String> getTargetLabels() {
        Stats statistics = this.stats.getStatistics(StatsForFeatures.KEY_FOR_TARGET);
        return (statistics == null || !statistics.isString()) ? new ArrayList() : statistics.stringValues();
    }

    public int getNrFeatures() {
        return this.fnames.size();
    }

    public File getDataFile() {
        return this.outDataFile;
    }

    public File getMetaFile() {
        if (this.outMetaFile == null) {
            this.outMetaFile = new File(this.outDir, META_FILE_NAME);
        }
        return this.outMetaFile;
    }

    @Override // gate.plugin.learningframework.data.CorpusRepresentation
    public int nrInstances() {
        return this.linesWritten;
    }

    public CorpusRepresentationVolatileDense2JsonStream(File file, FeatureInfo featureInfo) {
        this.outDir = file;
        this.featureInfo = featureInfo;
        this.fnames = FeatureExtractionBase.featureSpecAttributes2FeatureNames(featureInfo.getAttributes());
    }

    @Override // gate.plugin.learningframework.data.CorpusRepresentationVolatileBase
    public void stopGrowth() {
    }

    @Override // gate.plugin.learningframework.data.CorpusRepresentationVolatileBase
    public void startGrowth() {
    }

    @Override // gate.plugin.learningframework.data.CorpusRepresentationVolatileBase, gate.plugin.learningframework.data.CorpusRepresentation
    public void add(AnnotationSet annotationSet, AnnotationSet annotationSet2, AnnotationSet annotationSet3, AnnotationSet annotationSet4, String str, TargetType targetType, String str2, String str3, SeqEncoder seqEncoder) {
        if (annotationSet2 == null) {
            if (this.isSequence == null) {
                this.isSequence = false;
            } else if (this.isSequence.booleanValue()) {
                throw new GateRuntimeException("Trying to add non-sequence after sequence has already been added");
            }
            Iterator it = annotationSet.inDocumentOrder().iterator();
            while (it.hasNext()) {
                writeData(internal2Json(labeledAnnotation2Instance((Annotation) it.next(), annotationSet3, annotationSet4, str, targetType, str2, seqEncoder), false));
            }
            return;
        }
        if (this.isSequence == null) {
            this.isSequence = true;
        } else if (!this.isSequence.booleanValue()) {
            throw new GateRuntimeException("Trying to add sequence after non-sequence has already been added");
        }
        Iterator it2 = annotationSet2.inDocumentOrder().iterator();
        while (it2.hasNext()) {
            List<InstanceRepresentation> instancesForSequence = instancesForSequence(annotationSet, (Annotation) it2.next(), annotationSet3, annotationSet4, str, targetType, seqEncoder);
            this.seqLenStats.addValue(instancesForSequence.size());
            writeData(internal2Json(instancesForSequence, false));
        }
    }

    public void writeData(String str) {
        try {
            synchronized (this.LOCKING_OBJECT) {
                this.outStream.write(str.getBytes("UTF-8"));
                this.outStream.write("\n".getBytes("UTF-8"));
                this.linesWritten++;
            }
        } catch (IOException e) {
            throw new GateRuntimeException("Could not write generated JSON", e);
        }
    }

    public List<InstanceRepresentation> instancesForSequence(AnnotationSet annotationSet, Annotation annotation, AnnotationSet annotationSet2, AnnotationSet annotationSet3, String str, TargetType targetType, SeqEncoder seqEncoder) {
        List inDocumentOrder = Utils.getContainedAnnotations(annotationSet, annotation).inDocumentOrder();
        ArrayList arrayList = new ArrayList(inDocumentOrder.size());
        Iterator it = inDocumentOrder.iterator();
        while (it.hasNext()) {
            arrayList.add(labeledAnnotation2Instance((Annotation) it.next(), annotationSet2, annotationSet3, str, targetType, null, seqEncoder));
        }
        return arrayList;
    }

    public List<InstanceRepresentation> unlabeledInstancesForSequence(AnnotationSet annotationSet, Annotation annotation, AnnotationSet annotationSet2) {
        List inDocumentOrder = Utils.getContainedAnnotations(annotationSet, annotation).inDocumentOrder();
        ArrayList arrayList = new ArrayList(inDocumentOrder.size());
        Iterator it = inDocumentOrder.iterator();
        while (it.hasNext()) {
            arrayList.add(unlabeledAnnotation2Instance((Annotation) it.next(), annotationSet2, null));
        }
        return arrayList;
    }

    public InstanceRepresentation labeledAnnotation2Instance(Annotation annotation, AnnotationSet annotationSet, AnnotationSet annotationSet2, String str, TargetType targetType, String str2, SeqEncoder seqEncoder) {
        InstanceRepresentation unlabeledAnnotation2Instance = unlabeledAnnotation2Instance(annotation, annotationSet, str2);
        addToStatsForFeatures(unlabeledAnnotation2Instance);
        if (annotationSet2 != null) {
            unlabeledAnnotation2Instance = FeatureExtractionDense.extractClassForSeqTagging(unlabeledAnnotation2Instance, annotationSet2, annotation, seqEncoder);
            this.stats.addValue(StatsForFeatures.KEY_FOR_TARGET, unlabeledAnnotation2Instance.getTargetValue());
        } else if (targetType == TargetType.NOMINAL) {
            unlabeledAnnotation2Instance = FeatureExtractionDense.extractClassTarget(unlabeledAnnotation2Instance, str, annotation, annotationSet);
            this.stats.addValue(StatsForFeatures.KEY_FOR_TARGET, unlabeledAnnotation2Instance.getTargetValue());
        } else if (targetType == TargetType.NUMERIC) {
            unlabeledAnnotation2Instance = FeatureExtractionDense.extractNumericTarget(unlabeledAnnotation2Instance, str, annotation, annotationSet);
            this.stats.addValue(StatsForFeatures.KEY_FOR_TARGET, unlabeledAnnotation2Instance.getTargetValue());
        }
        return unlabeledAnnotation2Instance;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v21, types: [gate.plugin.learningframework.data.InstanceRepresentation] */
    public InstanceRepresentation unlabeledAnnotation2Instance(Annotation annotation, AnnotationSet annotationSet, String str) {
        InstanceRepresentationDenseVolatile instanceRepresentationDenseVolatile = new InstanceRepresentationDenseVolatile();
        Iterator<FeatureSpecAttribute> it = this.featureInfo.getAttributes().iterator();
        while (it.hasNext()) {
            instanceRepresentationDenseVolatile = FeatureExtractionDense.extractFeature(instanceRepresentationDenseVolatile, it.next(), annotationSet, annotation);
        }
        if (str != null && !str.isEmpty()) {
            instanceRepresentationDenseVolatile.setInstanceWeight(LFUtils.anyToDoubleOrElse(annotation.getFeatures().get(str), 1.0d));
        }
        return instanceRepresentationDenseVolatile;
    }

    public void addToStatsForFeatures(InstanceRepresentation instanceRepresentation) {
        for (String str : this.fnames) {
            this.stats.addValue(str, instanceRepresentation.getFeature(str));
        }
    }

    public String internal2Json(InstanceRepresentation instanceRepresentation, boolean z) {
        ObjectMapper objectMapper = new ObjectMapper();
        List<Object> internal2array = internal2array(instanceRepresentation);
        if (z) {
            try {
                return objectMapper.writeValueAsString(internal2array);
            } catch (JsonProcessingException e) {
                throw new GateRuntimeException("Could not convert instance to json", e);
            }
        }
        ArrayList arrayList = new ArrayList(2);
        arrayList.add(internal2array);
        arrayList.add(instanceRepresentation.getTargetValue());
        try {
            return objectMapper.writeValueAsString(arrayList);
        } catch (JsonProcessingException e2) {
            throw new GateRuntimeException("Could not convert instance to json", e2);
        }
    }

    public String internal2Json(List<InstanceRepresentation> list, boolean z) {
        ObjectMapper objectMapper = new ObjectMapper();
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (InstanceRepresentation instanceRepresentation : list) {
            arrayList.add(internal2array(instanceRepresentation));
            if (!z) {
                arrayList2.add(instanceRepresentation.getTargetValue());
            }
        }
        if (z) {
            try {
                return objectMapper.writeValueAsString(arrayList);
            } catch (JsonProcessingException e) {
                throw new GateRuntimeException("Could not convert instance sequence to json", e);
            }
        }
        ArrayList arrayList3 = new ArrayList();
        arrayList3.add(arrayList);
        arrayList3.add(arrayList2);
        try {
            return objectMapper.writeValueAsString(arrayList3);
        } catch (JsonProcessingException e2) {
            throw new GateRuntimeException("Could not convert instance sequence to json", e2);
        }
    }

    private List<Object> internal2array(InstanceRepresentation instanceRepresentation) {
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = this.fnames.iterator();
        while (it.hasNext()) {
            arrayList.add(instanceRepresentation.getFeature(it.next()));
        }
        return arrayList;
    }

    @Override // gate.plugin.learningframework.data.CorpusRepresentation
    public void startAdding() {
        File file = new File(this.outDir, DATA_FILE_NAME);
        this.outDataFile = file;
        try {
            this.outStream = new FileOutputStream(file);
            saveMetadata();
        } catch (FileNotFoundException e) {
            throw new GateRuntimeException("Cannot open output stream to " + file, e);
        }
    }

    @Override // gate.plugin.learningframework.data.CorpusRepresentationVolatileBase, gate.plugin.learningframework.data.CorpusRepresentation
    public void finishAdding() {
        try {
            saveMetadata();
            this.outStream.close();
        } catch (IOException e) {
            throw new GateRuntimeException("Error closing output stream for corpus representation", e);
        }
    }

    @Override // gate.plugin.learningframework.data.CorpusRepresentation
    public Object getRepresentation() {
        throw new UnsupportedOperationException("Not supported by this corpus representation");
    }

    public void json4metadata(Writer writer) {
        System.err.println("DEBUG: writing the metadata file!!");
        try {
            ObjectMapper objectMapper = new ObjectMapper();
            HashMap hashMap = new HashMap();
            hashMap.put("featureInfo", this.featureInfo);
            hashMap.put("featureNames", this.fnames);
            hashMap.put("linesWritten", Integer.valueOf(this.linesWritten));
            hashMap.put("dataFile", this.outDataFile == null ? "" : this.outDataFile.getAbsolutePath());
            hashMap.put("isSequence", this.isSequence);
            hashMap.put("features", FeatureExtractionBase.featureSpecAttributes2FeatureInfos(this.featureInfo.getAttributes()));
            if (this.isSequence != null && this.isSequence.booleanValue()) {
                hashMap.put("sequLengths.mean", Double.valueOf(this.seqLenStats.getMean()));
                hashMap.put("sequLengths.min", Double.valueOf(this.seqLenStats.getMin()));
                hashMap.put("sequLengths.max", Double.valueOf(this.seqLenStats.getMax()));
                hashMap.put("sequLengths.variance", Double.valueOf(this.seqLenStats.getVariance()));
            }
            HashMap hashMap2 = new HashMap();
            for (String str : this.fnames) {
                Stats statistics = this.stats.getStatistics(str);
                if (statistics != null) {
                    hashMap2.put(str, statistics.getStatsObject());
                }
            }
            hashMap.put("featureStats", hashMap2);
            Stats statistics2 = this.stats.getStatistics(StatsForFeatures.KEY_FOR_TARGET);
            if (statistics2 != null) {
                hashMap.put("targetStats", statistics2.getStatsObject());
            }
            hashMap.put("savedOn", new SimpleDateFormat("yyyy.MM.dd,HH:mm:ss").format(new Date()));
            objectMapper.writeValue(writer, hashMap);
        } catch (IOException e) {
            throw new GateRuntimeException("Could not serialize metadata", e);
        }
    }

    public void saveMetadata() {
        this.outMetaFile = new File(this.outDir, META_FILE_NAME);
        try {
            FileOutputStream fileOutputStream = new FileOutputStream(this.outMetaFile);
            Throwable th = null;
            try {
                OutputStreamWriter outputStreamWriter = new OutputStreamWriter(fileOutputStream, "UTF-8");
                Throwable th2 = null;
                try {
                    try {
                        synchronized (this.LOCKING_OBJECT) {
                            json4metadata(outputStreamWriter);
                        }
                        if (outputStreamWriter != null) {
                            if (0 != 0) {
                                try {
                                    outputStreamWriter.close();
                                } catch (Throwable th3) {
                                    th2.addSuppressed(th3);
                                }
                            } else {
                                outputStreamWriter.close();
                            }
                        }
                        if (fileOutputStream != null) {
                            if (0 != 0) {
                                try {
                                    fileOutputStream.close();
                                } catch (Throwable th4) {
                                    th.addSuppressed(th4);
                                }
                            } else {
                                fileOutputStream.close();
                            }
                        }
                    } finally {
                    }
                } catch (Throwable th5) {
                    if (outputStreamWriter != null) {
                        if (th2 != null) {
                            try {
                                outputStreamWriter.close();
                            } catch (Throwable th6) {
                                th2.addSuppressed(th6);
                            }
                        } else {
                            outputStreamWriter.close();
                        }
                    }
                    throw th5;
                }
            } finally {
            }
        } catch (Exception e) {
            throw new GateRuntimeException("Could not write metadata to file", e);
        }
    }
}
