package gate.plugin.learningframework;

import gate.AnnotationSet;
import gate.Controller;
import gate.Document;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;
import gate.plugin.learningframework.data.CorpusRepresentation;
import gate.plugin.learningframework.engines.AlgorithmKind;
import gate.plugin.learningframework.export.CorpusExporter;
import gate.plugin.learningframework.export.Exporter;
import gate.plugin.learningframework.features.FeatureSpecification;
import gate.plugin.learningframework.features.SeqEncoder;
import gate.plugin.learningframework.features.SeqEncoderEnum;
import gate.plugin.learningframework.features.TargetType;
import gate.util.Files;
import gate.util.GateRuntimeException;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.log4j.Logger;

@CreoleResource(name = "LF_Export", helpURL = "https://gatenlp.github.io/gateplugin-LearningFramework/LF_Export", comment = "Export training instances in various formats for external training and analysis")
/* loaded from: input_file:gate/plugin/learningframework/LF_Export.class */
public class LF_Export extends LF_ExportBase {
    private static final long serialVersionUID = 606764899130852772L;
    private final Logger logger = Logger.getLogger(LF_Export.class.getCanonicalName());
    protected URL dataDirectory;
    protected String instanceWeightFeature;
    private URL featureSpecURL;
    protected ScalingMethod scaleFeatures;
    protected String targetFeature;
    protected List<String> classAnnotationTypes;
    protected Set<String> classAnnotationTypesSet;
    protected TargetType targetType;
    private CorpusRepresentation corpusRepresentation;
    private FeatureSpecification featureSpec;
    protected String sequenceSpan;
    private Exporter exporter;
    private SeqEncoderEnum seqEncoderEnum;
    private SeqEncoder seqEncoder;
    private boolean haveSequenceProblem;
    private boolean haveSequenceAlg;
    private CorpusExporter corpusExporter;

    public LF_Export() {
        try {
            this.dataDirectory = new File(".").getCanonicalFile().toURI().toURL();
            this.instanceWeightFeature = "";
            this.scaleFeatures = ScalingMethod.NONE;
            this.targetType = TargetType.NOMINAL;
            this.corpusRepresentation = null;
            this.featureSpec = null;
            this.seqEncoderEnum = SeqEncoderEnum.BIO;
            this.haveSequenceProblem = false;
            this.haveSequenceAlg = false;
            this.corpusExporter = null;
        } catch (IOException e) {
            throw new GateRuntimeException("Could not create URL for current directory to use as a default for dataDirectory", e);
        }
    }

    @CreoleParameter(comment = "The directory where all data will be stored and read from (default is current dir of Java process)")
    @RunTime
    @Optional
    public void setDataDirectory(URL url) {
        this.dataDirectory = url;
    }

    public URL getDataDirectory() {
        return this.dataDirectory;
    }

    @CreoleParameter(comment = "The feature that constains the instance weight. If empty, no instance weights are used", defaultValue = "")
    @RunTime
    @Optional
    public void setInstanceWeightFeature(String str) {
        this.instanceWeightFeature = str;
    }

    public String getInstanceWeightFeature() {
        return this.instanceWeightFeature;
    }

    @CreoleParameter(comment = "The feature specification file.")
    @RunTime
    public void setFeatureSpecURL(URL url) {
        this.featureSpecURL = url;
    }

    public URL getFeatureSpecURL() {
        return this.featureSpecURL;
    }

    @CreoleParameter(defaultValue = "NONE", comment = "If and how to scale features. ")
    @RunTime
    @Optional
    public void setScaleFeatures(ScalingMethod scalingMethod) {
        this.scaleFeatures = scalingMethod;
    }

    public ScalingMethod getScaleFeatures() {
        return this.scaleFeatures;
    }

    @CreoleParameter(comment = "If specified, export as classification or regression problem")
    @RunTime
    @Optional
    public void setTargetFeature(String str) {
        this.targetFeature = str;
    }

    public String getTargetFeature() {
        return this.targetFeature;
    }

    @CreoleParameter(comment = "If specified, annotation types which indicate the class for sequence tagging")
    @RunTime
    @Optional
    public void setClassAnnotationTypes(List<String> list) {
        this.classAnnotationTypes = list;
    }

    public List<String> getClassAnnotationTypes() {
        return this.classAnnotationTypes;
    }

    @CreoleParameter(comment = "Target type: classification or regression problem?", defaultValue = "NOMINAL")
    @RunTime
    @Optional
    public void setTargetType(TargetType targetType) {
        this.targetType = targetType;
    }

    public TargetType getTargetType() {
        return this.targetType;
    }

    @CreoleParameter(comment = "Sequence tagging export is not yet supported")
    @RunTime
    @Optional
    public void setSequenceSpan(String str) {
        this.sequenceSpan = str;
    }

    public String getSequenceSpan() {
        return this.sequenceSpan;
    }

    @CreoleParameter(comment = "Export format, some formats allow finer configuration via the algorithmParameters")
    @RunTime
    public void setExporter(Exporter exporter) {
        this.exporter = exporter;
    }

    public Exporter getExporter() {
        return this.exporter;
    }

    @CreoleParameter(comment = "The sequence to classification algorithm to use.")
    @RunTime
    @Optional
    public void setSeqEncoder(SeqEncoderEnum seqEncoderEnum) {
        this.seqEncoderEnum = seqEncoderEnum;
    }

    public SeqEncoderEnum getSeqEncoder() {
        return this.seqEncoderEnum;
    }

    @Override // gate.plugin.learningframework.AbstractDocumentProcessor
    public Document process(Document document) {
        AnnotationSet annotations = document.getAnnotations(getInputASName());
        AnnotationSet annotationSet = annotations.get(getInstanceType());
        if (this.haveSequenceAlg) {
            if (this.haveSequenceProblem) {
                this.corpusRepresentation.add(annotationSet, annotations.get(getSequenceSpan()), annotations, annotations.get(this.classAnnotationTypesSet), null, this.targetType, this.instanceWeightFeature, null, this.seqEncoder);
            } else {
                this.corpusRepresentation.add(annotationSet, annotations.get(getSequenceSpan()), annotations, null, getTargetFeature(), this.targetType, this.instanceWeightFeature, null, this.seqEncoder);
            }
        } else if (this.haveSequenceProblem) {
            this.corpusRepresentation.add(annotationSet, null, annotations, annotations.get(this.classAnnotationTypesSet), null, this.targetType, this.instanceWeightFeature, null, this.seqEncoder);
        } else {
            this.corpusRepresentation.add(annotationSet, null, annotations, null, getTargetFeature(), this.targetType, this.instanceWeightFeature, null, this.seqEncoder);
        }
        return document;
    }

    @Override // gate.plugin.learningframework.AbstractDocumentProcessor
    protected void beforeFirstDocument(Controller controller) {
        if (getExporter() == null) {
            throw new GateRuntimeException("Exporter parameter is null");
        }
        System.err.println("DEBUG: Before Documents.");
        if (getSeqEncoder().getEncoderClass() == null) {
            throw new GateRuntimeException("SeqEncoder class not yet implemented, please choose another one: " + getSeqEncoder());
        }
        this.featureSpec = new FeatureSpecification(this.featureSpecURL);
        System.err.println("DEBUG Read the feature specification: " + this.featureSpec);
        try {
            this.seqEncoder = (SeqEncoder) getSeqEncoder().getEncoderClass().getDeclaredConstructor(new Class[0]).newInstance(new Object[0]);
            this.seqEncoder.setOptions(getSeqEncoder().getOptions());
            if (getClassAnnotationTypes() == null) {
                setClassAnnotationTypes(new ArrayList());
            }
            if (!getClassAnnotationTypes().isEmpty()) {
                this.classAnnotationTypesSet = new HashSet();
                this.classAnnotationTypesSet.addAll(this.classAnnotationTypes);
                this.haveSequenceProblem = true;
                if (getTargetFeature() != null && !getTargetFeature().isEmpty()) {
                    throw new GateRuntimeException("Either targetFeature or classAnnotationTypes must be specified, not both");
                }
            } else {
                if (getTargetFeature() == null || getTargetFeature().isEmpty()) {
                    throw new GateRuntimeException("One of targetFeature or classAnnotationTypes must be specified");
                }
                this.haveSequenceProblem = false;
            }
            AlgorithmKind algorithmKind = this.exporter.getAlgorithmKind();
            if (this.haveSequenceProblem && algorithmKind == AlgorithmKind.REGRESSOR) {
                throw new GateRuntimeException("Cannot use a regressor for a sequence tagging problem");
            }
            if (!this.haveSequenceProblem && algorithmKind == AlgorithmKind.SEQUENCE_TAGGER) {
                throw new GateRuntimeException("Cannot use a sequence tagger if it is not a sequence tagging problem");
            }
            if (getExporter().getAlgorithmKind() == AlgorithmKind.SEQUENCE_TAGGER) {
                if (getSequenceSpan() == null || getSequenceSpan().isEmpty()) {
                    throw new GateRuntimeException("SequenceSpan parameter is required for Sequence exporter");
                }
            } else if (getSequenceSpan() != null && !getSequenceSpan().isEmpty()) {
                throw new GateRuntimeException("SequenceSpan parameter must not be specified unless Sequence exporter is used");
            }
            this.haveSequenceAlg = algorithmKind == AlgorithmKind.SEQUENCE_TAGGER;
            this.corpusExporter = CorpusExporter.create(this.exporter, getAlgorithmParameters(), this.featureSpec.getFeatureInfo(), getInstanceType(), this.dataDirectory);
            this.corpusRepresentation = this.corpusExporter.getCorpusRepresentation();
            System.err.println("DEBUG: setup of the export PR complete");
        } catch (IllegalAccessException | IllegalArgumentException | InstantiationException | NoSuchMethodException | SecurityException | InvocationTargetException e) {
            throw new GateRuntimeException("Could not create SeqEncoder instance", e);
        }
    }

    @Override // gate.plugin.learningframework.AbstractDocumentProcessor
    public void afterLastDocument(Controller controller, Throwable th) {
        Files.fileFromURL(getDataDirectory());
        this.corpusRepresentation.finishAdding();
        this.corpusExporter.export();
    }

    @Override // gate.plugin.learningframework.AbstractDocumentProcessor
    protected void finishedNoDocument(Controller controller, Throwable th) {
        this.logger.error("Processing finished, but got an error or no documents seen, cannot export!");
    }

    private Exception GateRuntimeException(String str) {
        throw new UnsupportedOperationException("Not supported yet.");
    }
}
