package com.gengoai.hermes.en;

import com.gengoai.apollo.ml.DataSetType;
import com.gengoai.apollo.ml.feature.FeatureExtractor;
import com.gengoai.apollo.ml.feature.Featurizer;
import com.gengoai.apollo.ml.model.FitParameters;
import com.gengoai.apollo.ml.model.Model;
import com.gengoai.apollo.ml.model.Params;
import com.gengoai.apollo.ml.model.PipelineModel;
import com.gengoai.apollo.ml.model.sequence.GreedyAvgPerceptron;
import com.gengoai.apollo.ml.observation.Variable;
import com.gengoai.apollo.ml.transform.MinCountFilter;
import com.gengoai.apollo.ml.transform.SingleSourceTransform;
import com.gengoai.conversion.Cast;
import com.gengoai.hermes.HString;
import com.gengoai.hermes.Types;
import com.gengoai.hermes.ml.HStringDataSetGenerator;
import com.gengoai.hermes.ml.POSTagger;
import com.gengoai.hermes.ml.feature.AffixFeaturizer;
import com.gengoai.hermes.ml.feature.Features;
import com.gengoai.hermes.ml.feature.PredefinedFeatures;
import com.gengoai.hermes.ml.trainer.SequenceTaggerTrainer;
import com.gengoai.hermes.morphology.StandardTokenizer;
import java.lang.invoke.SerializedLambda;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;

/* loaded from: input_file:com/gengoai/hermes/en/ENPOSTrainer.class */
public class ENPOSTrainer extends SequenceTaggerTrainer<POSTagger> {
    private FeatureExtractor<HString> createFeatureExtractor() {
        return Featurizer.chain(new Featurizer[]{new AffixFeaturizer(3, 3), Features.LowerCaseWord, Features.WordClass, Features.punctuationType, Features.IsTitleCase, Features.IsAllCaps, Features.IsTitleCase, Features.IsDigit}).withContext(new String[]{"LowerWord[-1]", "~LowerWord[-2]", "LowerWord[+1]", "~LowerWord[+2]", PredefinedFeatures.strictContext(Features.WordClass, -1), PredefinedFeatures.strictContext(Features.WordClass, -2), PredefinedFeatures.strictContext(Features.LowerCaseWord, 1), PredefinedFeatures.strictContext(Features.LowerCaseWord, 2)});
    }

    @Override // com.gengoai.hermes.ml.trainer.SequenceTaggerTrainer
    protected Model createSequenceLabeler(FitParameters<?> fitParameters) {
        return PipelineModel.builder().defaultInput(new SingleSourceTransform[]{new MinCountFilter(5)}).build(new GreedyAvgPerceptron((GreedyAvgPerceptron.Parameters) Cast.as(fitParameters)));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* JADX WARN: Can't rename method to resolve collision */
    @Override // com.gengoai.hermes.ml.trainer.SequenceTaggerTrainer
    public POSTagger createTagger(Model model, HStringDataSetGenerator hStringDataSetGenerator) {
        return new ENPOSTagger(hStringDataSetGenerator, model, LocalDateTime.now().format(DateTimeFormatter.ofPattern("YYYY_MM_DD")));
    }

    @Override // com.gengoai.hermes.ml.trainer.SequenceTaggerTrainer
    protected HStringDataSetGenerator getExampleGenerator() {
        return HStringDataSetGenerator.builder(Types.SENTENCE).m57dataSetType(DataSetType.InMemory).tokenSequence("input", createFeatureExtractor()).tokenSequence("output", hString -> {
            return Variable.binary(hString.pos().name());
        }).m58build();
    }

    @Override // com.gengoai.hermes.ml.trainer.SequenceTaggerTrainer
    public FitParameters<?> getFitParameters() {
        return new GreedyAvgPerceptron.Parameters().update(parameters -> {
            parameters.set(Params.Optimizable.maxIterations, 50);
            parameters.set(Params.verbose, true);
            parameters.set(Params.Optimizable.historySize, 3);
            parameters.set(Params.Optimizable.tolerance, Double.valueOf(1.0E-4d));
            parameters.validator.set(new ENPOSValidator());
        });
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case 537171166:
                if (implMethodName.equals("lambda$getExampleGenerator$c3ed6941$1")) {
                    z = false;
                    break;
                }
                break;
        }
        switch (z) {
            case StandardTokenizer.YYINITIAL /* 0 */:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("com/gengoai/apollo/ml/feature/ObservationExtractor") && serializedLambda.getFunctionalInterfaceMethodName().equals("extractObservation") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Lcom/gengoai/apollo/ml/observation/Observation;") && serializedLambda.getImplClass().equals("com/gengoai/hermes/en/ENPOSTrainer") && serializedLambda.getImplMethodSignature().equals("(Lcom/gengoai/hermes/HString;)Lcom/gengoai/apollo/ml/observation/Observation;")) {
                    return hString -> {
                        return Variable.binary(hString.pos().name());
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
