package de.julielab.geneexpbase.hpo;

import cc.mallet.classify.MaxEnt;
import cc.mallet.classify.MaxEntTrainer;
import cc.mallet.types.Alphabet;
import cc.mallet.types.AlphabetCarrying;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.Label;
import ciir.umass.edu.learning.RANKER_TYPE;
import ciir.umass.edu.metric.METRIC;
import com.google.inject.Injector;
import de.julielab.geneexpbase.GeneExpException;
import de.julielab.geneexpbase.GeneExpRuntimeException;
import de.julielab.geneexpbase.classification.FeatureUtils;
import de.julielab.geneexpbase.classification.MinMaxScalingStats;
import de.julielab.geneexpbase.classification.SVMClassifier;
import de.julielab.geneexpbase.classification.StandardizationStats;
import de.julielab.geneexpbase.classification.svm.SVMTrainOptions;
import de.julielab.geneexpbase.configuration.Configuration;
import de.julielab.geneexpbase.configuration.Parameters;
import de.julielab.geneexpbase.data.DocumentLoader;
import de.julielab.geneexpbase.data.DocumentSourceFiles;
import de.julielab.geneexpbase.data.UnknownCorpusException;
import de.julielab.geneexpbase.genemodel.GeneDocument;
import de.julielab.geneexpbase.scoring.Scorer;
import de.julielab.java.utilities.FileUtilities;
import de.julielab.ml.RankLibRanker;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import spark.Route;

/* loaded from: input_file:de/julielab/geneexpbase/hpo/HpoRoute.class */
public abstract class HpoRoute implements Route, Serializable {
    public static final Set<String> KNOWN_ML_ALGORITHMS;
    private static final Pattern ML_PREFIX_PATTERN;
    protected static AtomicInteger runCounter;
    private final DocumentLoader documentLoader;
    protected transient Configuration configuration;
    protected transient Map<String, Map<String, Pair<List<List<GeneDocument>>, List<GeneDocument>>>> loadedCorpora = new HashMap();
    protected transient Logger log;
    protected transient Injector injector;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* loaded from: input_file:de/julielab/geneexpbase/hpo/HpoRoute$Metric.class */
    public enum Metric {
        RECALL,
        MAX_RECALL,
        PRECISION,
        F,
        NDCG,
        P1,
        MAX_REC_10,
        RECALL_REJECTION,
        PRECISION_REJECTION,
        F_REJECTION
    }

    public HpoRoute(Logger logger, Configuration configuration) {
        this.log = logger;
        this.configuration = configuration;
        this.injector = createGuiceInjector(configuration);
        this.documentLoader = (DocumentLoader) this.injector.getInstance(DocumentLoader.class);
    }

    protected abstract Injector createGuiceInjector(Configuration configuration);

    public abstract String getRouteEndpoint();

    public abstract int getNumSplits();

    public abstract int getDevSamplingFrequency();

    /* JADX WARN: Removed duplicated region for block: B:39:0x0188 A[Catch: Throwable -> 0x02b3, TryCatch #0 {Throwable -> 0x02b3, blocks: (B:8:0x002f, B:9:0x006d, B:11:0x0077, B:12:0x008f, B:13:0x00d8, B:16:0x00e8, B:19:0x00f8, B:22:0x0108, B:25:0x0118, B:28:0x0128, B:31:0x0138, B:34:0x0149, B:38:0x0159, B:39:0x0188, B:42:0x0193, B:44:0x019e, B:46:0x01b3, B:48:0x01c8, B:50:0x01d9, B:52:0x01ea, B:54:0x01fb, B:56:0x020c, B:59:0x023c, B:61:0x025e, B:62:0x0280, B:64:0x02a2, B:65:0x02af, B:70:0x0273), top: B:7:0x002f }] */
    /* JADX WARN: Removed duplicated region for block: B:42:0x0193 A[Catch: Throwable -> 0x02b3, TryCatch #0 {Throwable -> 0x02b3, blocks: (B:8:0x002f, B:9:0x006d, B:11:0x0077, B:12:0x008f, B:13:0x00d8, B:16:0x00e8, B:19:0x00f8, B:22:0x0108, B:25:0x0118, B:28:0x0128, B:31:0x0138, B:34:0x0149, B:38:0x0159, B:39:0x0188, B:42:0x0193, B:44:0x019e, B:46:0x01b3, B:48:0x01c8, B:50:0x01d9, B:52:0x01ea, B:54:0x01fb, B:56:0x020c, B:59:0x023c, B:61:0x025e, B:62:0x0280, B:64:0x02a2, B:65:0x02af, B:70:0x0273), top: B:7:0x002f }] */
    /* JADX WARN: Removed duplicated region for block: B:44:0x019e A[Catch: Throwable -> 0x02b3, TryCatch #0 {Throwable -> 0x02b3, blocks: (B:8:0x002f, B:9:0x006d, B:11:0x0077, B:12:0x008f, B:13:0x00d8, B:16:0x00e8, B:19:0x00f8, B:22:0x0108, B:25:0x0118, B:28:0x0128, B:31:0x0138, B:34:0x0149, B:38:0x0159, B:39:0x0188, B:42:0x0193, B:44:0x019e, B:46:0x01b3, B:48:0x01c8, B:50:0x01d9, B:52:0x01ea, B:54:0x01fb, B:56:0x020c, B:59:0x023c, B:61:0x025e, B:62:0x0280, B:64:0x02a2, B:65:0x02af, B:70:0x0273), top: B:7:0x002f }] */
    /* JADX WARN: Removed duplicated region for block: B:46:0x01b3 A[Catch: Throwable -> 0x02b3, TryCatch #0 {Throwable -> 0x02b3, blocks: (B:8:0x002f, B:9:0x006d, B:11:0x0077, B:12:0x008f, B:13:0x00d8, B:16:0x00e8, B:19:0x00f8, B:22:0x0108, B:25:0x0118, B:28:0x0128, B:31:0x0138, B:34:0x0149, B:38:0x0159, B:39:0x0188, B:42:0x0193, B:44:0x019e, B:46:0x01b3, B:48:0x01c8, B:50:0x01d9, B:52:0x01ea, B:54:0x01fb, B:56:0x020c, B:59:0x023c, B:61:0x025e, B:62:0x0280, B:64:0x02a2, B:65:0x02af, B:70:0x0273), top: B:7:0x002f }] */
    /* JADX WARN: Removed duplicated region for block: B:48:0x01c8 A[Catch: Throwable -> 0x02b3, TryCatch #0 {Throwable -> 0x02b3, blocks: (B:8:0x002f, B:9:0x006d, B:11:0x0077, B:12:0x008f, B:13:0x00d8, B:16:0x00e8, B:19:0x00f8, B:22:0x0108, B:25:0x0118, B:28:0x0128, B:31:0x0138, B:34:0x0149, B:38:0x0159, B:39:0x0188, B:42:0x0193, B:44:0x019e, B:46:0x01b3, B:48:0x01c8, B:50:0x01d9, B:52:0x01ea, B:54:0x01fb, B:56:0x020c, B:59:0x023c, B:61:0x025e, B:62:0x0280, B:64:0x02a2, B:65:0x02af, B:70:0x0273), top: B:7:0x002f }] */
    /* JADX WARN: Removed duplicated region for block: B:50:0x01d9 A[Catch: Throwable -> 0x02b3, TryCatch #0 {Throwable -> 0x02b3, blocks: (B:8:0x002f, B:9:0x006d, B:11:0x0077, B:12:0x008f, B:13:0x00d8, B:16:0x00e8, B:19:0x00f8, B:22:0x0108, B:25:0x0118, B:28:0x0128, B:31:0x0138, B:34:0x0149, B:38:0x0159, B:39:0x0188, B:42:0x0193, B:44:0x019e, B:46:0x01b3, B:48:0x01c8, B:50:0x01d9, B:52:0x01ea, B:54:0x01fb, B:56:0x020c, B:59:0x023c, B:61:0x025e, B:62:0x0280, B:64:0x02a2, B:65:0x02af, B:70:0x0273), top: B:7:0x002f }] */
    /* JADX WARN: Removed duplicated region for block: B:52:0x01ea A[Catch: Throwable -> 0x02b3, TryCatch #0 {Throwable -> 0x02b3, blocks: (B:8:0x002f, B:9:0x006d, B:11:0x0077, B:12:0x008f, B:13:0x00d8, B:16:0x00e8, B:19:0x00f8, B:22:0x0108, B:25:0x0118, B:28:0x0128, B:31:0x0138, B:34:0x0149, B:38:0x0159, B:39:0x0188, B:42:0x0193, B:44:0x019e, B:46:0x01b3, B:48:0x01c8, B:50:0x01d9, B:52:0x01ea, B:54:0x01fb, B:56:0x020c, B:59:0x023c, B:61:0x025e, B:62:0x0280, B:64:0x02a2, B:65:0x02af, B:70:0x0273), top: B:7:0x002f }] */
    /* JADX WARN: Removed duplicated region for block: B:54:0x01fb A[Catch: Throwable -> 0x02b3, TryCatch #0 {Throwable -> 0x02b3, blocks: (B:8:0x002f, B:9:0x006d, B:11:0x0077, B:12:0x008f, B:13:0x00d8, B:16:0x00e8, B:19:0x00f8, B:22:0x0108, B:25:0x0118, B:28:0x0128, B:31:0x0138, B:34:0x0149, B:38:0x0159, B:39:0x0188, B:42:0x0193, B:44:0x019e, B:46:0x01b3, B:48:0x01c8, B:50:0x01d9, B:52:0x01ea, B:54:0x01fb, B:56:0x020c, B:59:0x023c, B:61:0x025e, B:62:0x0280, B:64:0x02a2, B:65:0x02af, B:70:0x0273), top: B:7:0x002f }] */
    /* JADX WARN: Removed duplicated region for block: B:56:0x020c A[Catch: Throwable -> 0x02b3, TryCatch #0 {Throwable -> 0x02b3, blocks: (B:8:0x002f, B:9:0x006d, B:11:0x0077, B:12:0x008f, B:13:0x00d8, B:16:0x00e8, B:19:0x00f8, B:22:0x0108, B:25:0x0118, B:28:0x0128, B:31:0x0138, B:34:0x0149, B:38:0x0159, B:39:0x0188, B:42:0x0193, B:44:0x019e, B:46:0x01b3, B:48:0x01c8, B:50:0x01d9, B:52:0x01ea, B:54:0x01fb, B:56:0x020c, B:59:0x023c, B:61:0x025e, B:62:0x0280, B:64:0x02a2, B:65:0x02af, B:70:0x0273), top: B:7:0x002f }] */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public java.lang.Object handle(spark.Request r11, spark.Response r12) throws java.lang.Exception {
        /*
            Method dump skipped, instructions count: 767
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: de.julielab.geneexpbase.hpo.HpoRoute.handle(spark.Request, spark.Response):java.lang.Object");
    }

    protected abstract List<HpoInstance> getActiveCorpora();

    protected abstract String getTaskName();

    protected Pair<Double, Integer> getResultScalingFactors(HpoInstance hpoInstance) {
        if (hpoInstance.getSplitType() != SplitType.TESTSPLIT && hpoInstance.getSplitType() != SplitType.TRAINSPLIT) {
            return new ImmutablePair(Double.valueOf(1.0d), 1);
        }
        int i = 0;
        int i2 = 0;
        for (HpoInstance hpoInstance2 : getActiveCorpora()) {
            File splitMappingFile = getSplitMappingFile(getTaskName(), hpoInstance2.getCorpus(), hpoInstance2.getSubcorpus(), hpoInstance.isMergeCorpora());
            for (int i3 = 0; i3 < getNumSplits(); i3++) {
                hpoInstance2.setCrossvalRound(i3);
                hpoInstance2.setSplitType(hpoInstance.getSplitType());
                i = (int) (i + getCorpusPartition(hpoInstance2, splitMappingFile).stream().flatMap((v0) -> {
                    return v0.getGenes();
                }).count());
                i2++;
            }
        }
        int count = (int) getDocuments4Instance(hpoInstance).stream().flatMap((v0) -> {
            return v0.getGenes();
        }).count();
        double d = count / i;
        this.log.debug("Partition for instance {} has size {}. Total number of genes is {}. So the fraction of this data is {}", new Object[]{hpoInstance, Integer.valueOf(count), Integer.valueOf(i), Double.valueOf(d)});
        return new ImmutablePair(Double.valueOf(d), Integer.valueOf(i2));
    }

    protected List<GeneDocument> getAllCorporaTrainingDocuments4Instance(HpoInstance hpoInstance) {
        boolean z = hpoInstance.getSplitType() == SplitType.DEV;
        List<GeneDocument> arrayList = z ? new ArrayList<>() : getDocuments4Instance(hpoInstance.getCorpus(), hpoInstance.getSubcorpus(), SplitType.TRAINSPLIT, hpoInstance.isMergeCorpora(), hpoInstance.getCrossvalRound());
        for (HpoInstance hpoInstance2 : getActiveCorpora()) {
            if (z || !hpoInstance2.getCorpus().equals(hpoInstance.getCorpus()) || !hpoInstance2.getSubcorpus().equals(hpoInstance.getSubcorpus())) {
                arrayList.addAll(getDocuments4Instance(hpoInstance2.getCorpus(), hpoInstance2.getSubcorpus(), SplitType.TRAINSPLIT, hpoInstance.isMergeCorpora(), 0));
                arrayList.addAll(getDocuments4Instance(hpoInstance2.getCorpus(), hpoInstance2.getSubcorpus(), SplitType.TESTSPLIT, hpoInstance.isMergeCorpora(), 0));
            }
        }
        return arrayList;
    }

    protected List<GeneDocument> getDocuments4Budget(List<GeneDocument> list, int i, int i2) {
        this.log.debug("Shorting list of {} documents down for budget {}/{}", new Object[]{Integer.valueOf(list.size()), Integer.valueOf(i), Integer.valueOf(i2)});
        if (!$assertionsDisabled && i <= 0) {
            throw new AssertionError("The given resource budget is " + i + " but it must be positive.");
        }
        if (!$assertionsDisabled && i2 < i) {
            throw new AssertionError("The specified maximum budget of " + i2 + " is not larger or equal to the current resource budget.");
        }
        List<GeneDocument> subList = list.subList(0, (int) (list.size() * (i / i2)));
        this.log.debug("Budgeted list has length {}", Integer.valueOf(subList.size()));
        return subList;
    }

    protected List<GeneDocument> getDocuments4Budget4Instance(HpoInstance hpoInstance, int i, int i2) {
        List<GeneDocument> documents4Instance = getDocuments4Instance(hpoInstance);
        return i == Integer.MAX_VALUE ? documents4Instance : getDocuments4Budget(documents4Instance, i, i2);
    }

    protected List<GeneDocument> getDocuments4Instance(HpoInstance hpoInstance) {
        String corpus = hpoInstance.getCorpus();
        String subcorpus = hpoInstance.getSubcorpus();
        SplitType splitType = hpoInstance.getSplitType();
        File splitMappingFile = getSplitMappingFile(getTaskName(), corpus, subcorpus, hpoInstance.isMergeCorpora());
        if (hpoInstance.isMergeCorpora() || !(splitType == SplitType.DEV || splitType == SplitType.TRAIN)) {
            if (!hpoInstance.isMergeCorpora() || hpoInstance.getSplitType() != SplitType.TESTSPLIT) {
                return getCorpusPartition(hpoInstance, splitMappingFile);
            }
            boolean z = false;
            try {
                HpoCorpusRegistry.getCorpusFiles(hpoInstance);
                z = true;
            } catch (UnknownCorpusException e) {
            }
            return !z ? getCorpusPartition(hpoInstance, splitMappingFile) : getCorpusPartition(new HpoInstance(hpoInstance.getCorpus(), hpoInstance.getSubcorpus(), false, hpoInstance.getSplitType(), hpoInstance.getCrossvalRound(), hpoInstance.getInstanceInfo()), getSplitMappingFile(getTaskName(), hpoInstance.getCorpus(), hpoInstance.getSubcorpus(), false));
        }
        ArrayList arrayList = new ArrayList();
        for (HpoInstance hpoInstance2 : getActiveCorpora()) {
            hpoInstance2.setSplitType(splitType);
            arrayList.addAll(getCorpusPartition(hpoInstance2, splitMappingFile));
        }
        return arrayList;
    }

    protected File getSplitMappingFile(String str, String str2, String str3, boolean z) {
        File file = new File("splitmappings");
        if (!file.exists()) {
            file.mkdirs();
        }
        return new File(file, str + "-" + String.join("-", str2, str3, String.valueOf(getNumSplits())) + (z ? "-merged-" : "") + "split-" + getDevSamplingFrequency() + "devfreq.txt");
    }

    protected Parameters parseParameters(List<String> list, Configuration configuration) {
        Parameters parameters = new Parameters((Properties) configuration);
        for (int i = 0; i < list.size(); i++) {
            String str = list.get(i);
            if (i % 2 == 1) {
                parameters.put(list.get(i - 1), str);
            }
        }
        return parameters;
    }

    protected List<GeneDocument> getDocuments4Instance(String str, String str2, SplitType splitType, boolean z, int i) {
        return getDocuments4Instance(new HpoInstance(str, str2, z, splitType, i, null));
    }

    protected List<GeneDocument> getCorpusSplitByType(SplitType splitType, int i, Pair<List<List<GeneDocument>>, List<GeneDocument>> pair) {
        if (splitType == SplitType.DEV) {
            return (List) pair.getRight();
        }
        if (splitType == SplitType.TRAIN) {
            return (List) ((List) pair.getLeft()).stream().flatMap((v0) -> {
                return v0.stream();
            }).collect(Collectors.toList());
        }
        if (splitType == SplitType.TESTSPLIT) {
            return (List) ((List) pair.getLeft()).get(i);
        }
        if (splitType == SplitType.TRAINSPLIT) {
            return getNumSplits() == 1 ? (List) ((List) pair.getLeft()).get(0) : (List) IntStream.range(0, getNumSplits()).filter(i2 -> {
                return i2 != i;
            }).mapToObj(i3 -> {
                return (List) ((List) pair.getLeft()).get(i3);
            }).flatMap((v0) -> {
                return v0.stream();
            }).collect(Collectors.toList());
        }
        throw new IllegalArgumentException("Unsupported split type: " + splitType + ". Should be one of 'dev', 'train', 'trainsplit' or 'testsplit'.");
    }

    protected List<GeneDocument> getCorpusPartition(HpoInstance hpoInstance, File file) {
        List<GeneDocument> corpusSplitByType;
        synchronized (this.loadedCorpora) {
            corpusSplitByType = getCorpusSplitByType(hpoInstance.getSplitType(), hpoInstance.getCrossvalRound(), this.loadedCorpora.compute(hpoInstance.getCorpus() + (hpoInstance.isMergeCorpora() ? "-merged" : ""), (str, map) -> {
                return map != null ? map : new HashMap();
            }).compute(hpoInstance.getSubcorpus(), (str2, pair) -> {
                if (pair != null) {
                    return pair;
                }
                try {
                    return loadData(hpoInstance, file);
                } catch (GeneExpException | IOException e) {
                    throw new GeneExpRuntimeException(e);
                }
            }));
        }
        return corpusSplitByType;
    }

    protected Pair<List<List<GeneDocument>>, List<GeneDocument>> loadData(HpoInstance hpoInstance, File file) throws IOException, GeneExpException {
        Pair<List<List<GeneDocument>>, List<GeneDocument>> trainDevSplit;
        if (hpoInstance.isMergeCorpora()) {
            List<GeneDocument> arrayList = new ArrayList<>();
            Iterator<HpoInstance> it = getActiveCorpora().iterator();
            while (it.hasNext()) {
                DocumentSourceFiles corpusFiles = HpoCorpusRegistry.getCorpusFiles(it.next());
                List list = (List) this.documentLoader.getDocuments(corpusFiles).collect(Collectors.toList());
                this.log.info("Loaded {} documents of {}", Integer.valueOf(list.size()), corpusFiles.getName());
                arrayList.addAll(list);
            }
            this.log.info("Loaded all active corpora with a union of {} documents.", Integer.valueOf(arrayList.size()));
            trainDevSplit = getTrainDevSplit(arrayList, file);
            this.log.info("TrainDev distribution - training: {} documents", Integer.valueOf(((List) trainDevSplit.getLeft()).stream().mapToInt((v0) -> {
                return v0.size();
            }).sum()));
            this.log.info("TrainDev distribution - devset: {} documents", Integer.valueOf(((List) trainDevSplit.getRight()).size()));
            this.log.info("TrainDev distribution - cross validation sets: {} documents, respectively", ((List) trainDevSplit.getLeft()).stream().map((v0) -> {
                return v0.size();
            }).map((v0) -> {
                return String.valueOf(v0);
            }).collect(Collectors.joining(", ")));
        } else {
            DocumentSourceFiles corpusFiles2 = HpoCorpusRegistry.getCorpusFiles(hpoInstance);
            String subcorpus = hpoInstance.getSubcorpus();
            List<GeneDocument> list2 = (List) this.documentLoader.getDocuments(corpusFiles2).collect(Collectors.toList());
            this.log.info("Loaded {} documents of {}", Integer.valueOf(list2.size()), corpusFiles2.getName());
            trainDevSplit = getTrainDevSplit(list2, file);
            this.log.info("TrainDev distribution - training: {} documents", Integer.valueOf(((List) trainDevSplit.getLeft()).stream().mapToInt((v0) -> {
                return v0.size();
            }).sum()));
            this.log.info("TrainDev distribution - devset: {} documents", Integer.valueOf(((List) trainDevSplit.getRight()).size()));
            this.log.info("TrainDev distribution - cross validation sets: {} documents, respectively", ((List) trainDevSplit.getLeft()).stream().map((v0) -> {
                return v0.size();
            }).map((v0) -> {
                return String.valueOf(v0);
            }).collect(Collectors.joining(", ")));
            if (this.log.isInfoEnabled()) {
                this.log.info("Gene number distribution for data at {}/{}:", corpusFiles2.getName(), subcorpus);
                List list3 = (List) trainDevSplit.getLeft();
                for (int i = 0; i < list3.size(); i++) {
                    this.log.info("Split {} of data at {}/{}: {}", new Object[]{Integer.valueOf(i), corpusFiles2.getName(), subcorpus, Long.valueOf(((List) list3.get(i)).stream().flatMap((v0) -> {
                        return v0.getGenes();
                    }).count())});
                }
                this.log.info("Genes in dev set for data at {}/{}: {}", new Object[]{corpusFiles2.getName(), subcorpus, Long.valueOf(((List) trainDevSplit.getRight()).stream().flatMap((v0) -> {
                    return v0.getGenes();
                }).count())});
            }
        }
        return trainDevSplit;
    }

    protected Pair<List<List<GeneDocument>>, List<GeneDocument>> loadDataSplits(List<GeneDocument> list, File file) {
        try {
            BufferedReader readerFromFile = FileUtilities.getReaderFromFile(file);
            try {
                Map map = (Map) readerFromFile.lines().filter(Predicate.not((v0) -> {
                    return v0.isBlank();
                })).filter(str -> {
                    return !str.startsWith("#");
                }).map(str2 -> {
                    return str2.split("\t");
                }).collect(Collectors.toMap(strArr -> {
                    return strArr[0];
                }, strArr2 -> {
                    return strArr2[1];
                }));
                if (readerFromFile != null) {
                    readerFromFile.close();
                }
                ArrayList arrayList = new ArrayList();
                ArrayList arrayList2 = new ArrayList();
                for (int i = 0; i < getNumSplits(); i++) {
                    arrayList2.add(new ArrayList());
                }
                for (GeneDocument geneDocument : list) {
                    String str3 = (String) map.get(geneDocument.getId());
                    if (str3 == null) {
                        throw new IllegalStateException("The stored data split at " + file.getAbsolutePath() + " is not compatible to the current data. It does not have an entry for document ID " + geneDocument.getId() + ". A common cause of this error is the evaluation of a corpus that is not configured in the active corpora of the optimization route.");
                    }
                    if (str3.equals("dev")) {
                        arrayList.add(geneDocument);
                    } else {
                        try {
                            int parseInt = Integer.parseInt(str3);
                            if (parseInt >= getNumSplits()) {
                                throw new IndexOutOfBoundsException(parseInt);
                            }
                            ((List) arrayList2.get(parseInt)).add(geneDocument);
                        } catch (IndexOutOfBoundsException e) {
                            throw new IllegalArgumentException("The split mapping file at " + file.getAbsolutePath() + " defines an illegal split index: " + str3);
                        } catch (NumberFormatException e2) {
                            throw new IllegalArgumentException("The split mapping file at " + file.getAbsolutePath() + " defines an illegal split index: " + str3);
                        }
                    }
                }
                return new ImmutablePair(arrayList2, arrayList);
            } finally {
            }
        } catch (IOException e3) {
            throw new GeneExpRuntimeException(e3);
        }
    }

    protected void saveDataSplit(List<List<GeneDocument>> list, List<GeneDocument> list2, File file) {
        int i = 0;
        try {
            BufferedWriter writerToFile = FileUtilities.getWriterToFile(file);
            for (int i2 = 0; i2 < list.size(); i2++) {
                try {
                    List<GeneDocument> list3 = list.get(i2);
                    for (int i3 = 0; i3 < list3.size(); i3++) {
                        writerToFile.write(String.join("\t", list3.get(i3).getId(), String.valueOf(i2)));
                        writerToFile.newLine();
                        i++;
                    }
                } finally {
                }
            }
            Iterator<GeneDocument> it = list2.iterator();
            while (it.hasNext()) {
                writerToFile.write(String.join("\t", it.next().getId(), "dev"));
                writerToFile.newLine();
                i++;
            }
            System.out.println("Stored documents: " + i);
            if (writerToFile != null) {
                writerToFile.close();
            }
        } catch (IOException e) {
            throw new GeneExpRuntimeException(e);
        }
    }

    protected Pair<List<List<GeneDocument>>, List<GeneDocument>> getTrainDevSplit(List<GeneDocument> list, File file) {
        this.log.info("Partitioning {} documents into {} cross validation partitions and a development set with a sampling frequency of {}.", new Object[]{Integer.valueOf(list.size()), Integer.valueOf(getNumSplits()), Integer.valueOf(getDevSamplingFrequency())});
        if (file.exists()) {
            return loadDataSplits(list, file);
        }
        List<GeneDocument> arrayList = new ArrayList();
        List<GeneDocument> arrayList2 = new ArrayList();
        if (getDevSamplingFrequency() > 0) {
            this.log.info("Using 1/{} of the data as dev set.", Integer.valueOf(getDevSamplingFrequency()));
            for (int i = 0; i < list.size(); i++) {
                GeneDocument geneDocument = list.get(i);
                if (i % getDevSamplingFrequency() == 0) {
                    arrayList.add(geneDocument);
                } else {
                    arrayList2.add(geneDocument);
                }
            }
        } else {
            this.log.info("Dev sampling frequency is set to {}. Creating {} cross-validation splits without a dev set.", Integer.valueOf(getDevSamplingFrequency()), Integer.valueOf(getNumSplits()));
            arrayList2 = list;
            arrayList = Collections.emptyList();
        }
        ArrayList arrayList3 = new ArrayList();
        IntStream.range(0, getNumSplits()).forEach(i2 -> {
            arrayList3.add(new ArrayList());
        });
        for (int i3 = 0; i3 < arrayList2.size(); i3++) {
            arrayList3.get(i3 % getNumSplits()).add(arrayList2.get(i3));
        }
        saveDataSplit(arrayList3, arrayList, file);
        return new ImmutablePair(arrayList3, arrayList);
    }

    protected List<GeneDocument> getTrainingData4Budget4Instance(HpoInstance hpoInstance, boolean z, boolean z2, int i, int i2) {
        this.log.debug("Obtaining data for instance {} with budget {}/{}", new Object[]{hpoInstance, Integer.valueOf(i), Integer.valueOf(i2)});
        List<GeneDocument> trainingData = getTrainingData(hpoInstance, z, z2);
        return i == Integer.MAX_VALUE ? trainingData : getDocuments4Budget(trainingData, i, i2);
    }

    protected List<GeneDocument> getTrainingData(HpoInstance hpoInstance, boolean z, boolean z2) {
        List<GeneDocument> documents4Instance;
        if (hpoInstance.getSplitType() == SplitType.DEV && !hpoInstance.isMergeCorpora()) {
            documents4Instance = getAllCorporaTrainingDocuments4Instance(hpoInstance);
            if (z) {
                ArrayList arrayList = new ArrayList(documents4Instance);
                arrayList.addAll(getDocuments4Instance(hpoInstance));
                this.log.debug("Training with DEV data. Train size is {}, DEV size is {}, total: {}", new Object[]{Integer.valueOf(documents4Instance.size()), Integer.valueOf(arrayList.size() - documents4Instance.size()), Integer.valueOf(arrayList.size())});
                documents4Instance = arrayList;
            }
        } else if (hpoInstance.getSplitType() == SplitType.DEV) {
            documents4Instance = getDocuments4Instance(hpoInstance.getCorpus(), hpoInstance.getSubcorpus(), SplitType.TRAIN, hpoInstance.isMergeCorpora(), -1);
            if (z) {
                ArrayList arrayList2 = new ArrayList(documents4Instance);
                arrayList2.addAll(getDocuments4Instance(hpoInstance));
                this.log.debug("Training with DEV data. Train size is {}, DEV size is {}, total: {}", new Object[]{Integer.valueOf(documents4Instance.size()), Integer.valueOf(arrayList2.size() - documents4Instance.size()), Integer.valueOf(arrayList2.size())});
                documents4Instance = arrayList2;
            }
        } else {
            documents4Instance = (!z2 || hpoInstance.isMergeCorpora()) ? getDocuments4Instance(hpoInstance.getCorpus(), hpoInstance.getSubcorpus(), SplitType.TRAINSPLIT, hpoInstance.isMergeCorpora(), hpoInstance.getCrossvalRound()) : getAllCorporaTrainingDocuments4Instance(hpoInstance);
        }
        return documents4Instance;
    }

    protected abstract Metric getDefaultMetric();

    protected abstract String calculateScore(HpoInstance hpoInstance, Parameters parameters, int i, int i2, int i3, int i4, Metric metric, int i5);

    protected AlphabetCarrying train(InstanceList instanceList, Parameters parameters, String str, int i) {
        MaxEnt maxEnt = null;
        String str2 = (String) parameters.get(Configuration.dot(str, Configuration.PARAM_ALGORITHM));
        this.log.info("Got data alphabet of size {} for training", Integer.valueOf(instanceList.getAlphabet().size()));
        if (parameters.getBoolean(Configuration.dot(str, Configuration.PARAM_STANDARDIZE_FEATURES))) {
            this.log.info("Performing train feature standardization (Z-score normalization)");
            StandardizationStats standardizeFeatures = FeatureUtils.standardizeFeatures(instanceList);
            parameters.put(Configuration.dot(str, Configuration.KEY_STANDARDIZATION_VALUES), standardizeFeatures);
            this.log.info("Got standardization parameters of length {} (means), {} (stdevs)", Integer.valueOf(standardizeFeatures.means.length), Integer.valueOf(standardizeFeatures.stdDeviations.length));
        }
        if (parameters.getBoolean(Configuration.dot(str, Configuration.PARAM_MINMAX_SCALE_FEATURES))) {
            this.log.info("Performing train feature min-max scaling (potentially on the already Z-score normalized data).");
            MinMaxScalingStats scaleFeatures = FeatureUtils.scaleFeatures(instanceList);
            parameters.put(Configuration.dot(str, Configuration.KEY_MINMAX_SCALING_VALUES), scaleFeatures);
            this.log.info("Got scaling parameters of length {} (max vals), {} (min vals)", Integer.valueOf(scaleFeatures.maxValues.length), Integer.valueOf(scaleFeatures.minValues.length));
        }
        if (!instanceList.isEmpty() && this.log.isDebugEnabled()) {
            this.log.debug("Example feature vector after potential preprocessing of gene {}: [focus tax {}] {}", new Object[]{((Instance) instanceList.get(0)).getProperty("gm"), ((Instance) instanceList.get(0)).getSource(), ((FeatureVector) ((Instance) instanceList.get(0)).getData()).toString(true)});
        }
        if (str2.equals(Configuration.VALUE_MAXENT)) {
            MaxEntTrainer maxEntTrainer = new MaxEntTrainer();
            this.log.info("Training maximum entropy model");
            maxEnt = maxEntTrainer.train(instanceList);
            parameters.put(Configuration.dot(str, Configuration.KEY_CLASSIFIER), maxEnt);
            this.log.info("Finished maximum entropy training.");
        } else if (str2.equals(Configuration.VALUE_SVM)) {
            SVMTrainOptions sVMTrainOptions = new SVMTrainOptions();
            sVMTrainOptions.svmType = Integer.parseInt((String) parameters.get(Configuration.dot(str, Configuration.PARAM_SVM_TYPE)));
            if (parameters.containsKey(Configuration.dot(str, Configuration.PARAM_SVM_C))) {
                sVMTrainOptions.C = Double.parseDouble((String) parameters.get(Configuration.dot(str, Configuration.PARAM_SVM_C)));
            }
            if (parameters.containsKey(Configuration.dot(str, Configuration.PARAM_SVM_KERNEL_TYPE))) {
                sVMTrainOptions.kernelType = Integer.parseInt((String) parameters.get(Configuration.dot(str, Configuration.PARAM_SVM_KERNEL_TYPE)));
            }
            if (parameters.containsKey(Configuration.dot(str, Configuration.PARAM_SVM_COEF0))) {
                sVMTrainOptions.coef0 = Double.parseDouble((String) parameters.get(Configuration.dot(str, Configuration.PARAM_SVM_COEF0)));
            }
            if (parameters.containsKey(Configuration.dot(str, Configuration.PARAM_SVM_GAMMA))) {
                sVMTrainOptions.svmGamma = Double.parseDouble((String) parameters.get(Configuration.dot(str, Configuration.PARAM_SVM_GAMMA)));
            }
            if (parameters.containsKey(Configuration.dot(str, Configuration.PARAM_SVM_DEGREE))) {
                sVMTrainOptions.svmDegree = Integer.parseInt((String) parameters.get(Configuration.dot(str, Configuration.PARAM_SVM_DEGREE)));
            }
            sVMTrainOptions.probability = true;
            long size = ((List) instanceList.stream().filter(instance -> {
                return ((Label) instance.getTarget()).getEntry().equals(Float.valueOf(1.0f));
            }).collect(Collectors.toList())).size();
            long size2 = instanceList.size() - size;
            boolean z = size2 > size;
            double d = z ? size / size2 : size2 / size;
            Alphabet targetAlphabet = instanceList.getTargetAlphabet();
            int lookupIndex = targetAlphabet.lookupIndex(Float.valueOf(1.0f));
            int lookupIndex2 = targetAlphabet.lookupIndex(Float.valueOf(0.0f));
            if (z) {
                sVMTrainOptions.addClassWeight(lookupIndex, d);
                sVMTrainOptions.addClassWeight(lookupIndex2, 1.0d - d);
            } else {
                sVMTrainOptions.addClassWeight(lookupIndex, 1.0d - d);
                sVMTrainOptions.addClassWeight(lookupIndex2, d);
            }
            MaxEnt sVMClassifier = new SVMClassifier();
            this.log.info("Training SVM with the following options: {}", sVMTrainOptions);
            sVMClassifier.train(instanceList, sVMTrainOptions);
            parameters.put(Configuration.dot(str, Configuration.KEY_CLASSIFIER), sVMClassifier);
            maxEnt = sVMClassifier;
            this.log.info("Finished SVM training.");
        } else if (str2.equals(Configuration.VALUE_LTR)) {
            RANKER_TYPE valueOf = RANKER_TYPE.valueOf((String) parameters.get(Configuration.dot(str, Configuration.PARAM_LTR_ALGORITHM)));
            MaxEnt rankLibRanker = new RankLibRanker(valueOf, (int[]) null, METRIC.valueOf((String) parameters.get(Configuration.dot(str, Configuration.PARAM_LTR_METRIC))), Integer.parseInt((String) parameters.get(Configuration.dot(str, Configuration.PARAM_LTR_K))), (String) null);
            this.log.info("Training {} model.", valueOf);
            rankLibRanker.train(instanceList, true, 0.8f, i);
            parameters.put(Configuration.dot(str, Configuration.KEY_RANKER), rankLibRanker);
            maxEnt = rankLibRanker;
            this.log.info("Finished training {} model.", valueOf);
        }
        return maxEnt;
    }

    protected HpoInstance parseInstanceName(String str, String str2) {
        String str3;
        String str4;
        String str5;
        SplitType splitType;
        String[] split = str.split("-");
        int i = -1;
        boolean z = split.length > 1 && (split[1].equals("merged") || split[2].equals("merged"));
        if (split.length == 5 && split[2].equals("merged")) {
            str3 = split[0];
            str4 = split[1];
            str5 = split[3];
            i = Integer.parseInt(split[4]);
        } else if (split.length > 2) {
            str3 = split[0];
            str4 = split[1];
            str5 = split[2];
            if (split.length > 3) {
                try {
                    i = Integer.parseInt(split[3]);
                } catch (NumberFormatException e) {
                    str2 = split[3];
                }
            }
        } else {
            if (split.length != 1) {
                throw new IllegalArgumentException("Illegal instance name: " + str);
            }
            str3 = "allactive";
            str4 = "dev";
            str5 = str;
        }
        String str6 = str5;
        boolean z2 = -1;
        switch (str6.hashCode()) {
            case -2000736750:
                if (str6.equals("trainsplit")) {
                    z2 = true;
                    break;
                }
                break;
            case -1161764408:
                if (str6.equals("testsplit")) {
                    z2 = false;
                    break;
                }
                break;
            case 99349:
                if (str6.equals("dev")) {
                    z2 = 2;
                    break;
                }
                break;
            case 110621192:
                if (str6.equals(HpoCorpusRegistry.TRAIN)) {
                    z2 = 3;
                    break;
                }
                break;
        }
        switch (z2) {
            case Scorer.SIMPLE_SCORER /* 0 */:
                splitType = SplitType.TESTSPLIT;
                break;
            case Scorer.TOKEN_JAROWINKLER_SCORER /* 1 */:
                splitType = SplitType.TRAINSPLIT;
                break;
            case Scorer.MAXENT_SCORER /* 2 */:
                splitType = SplitType.DEV;
                break;
            case Scorer.JAROWINKLER_SCORER /* 3 */:
                splitType = SplitType.TRAIN;
                break;
            default:
                throw new IllegalArgumentException("Illegal split type '" + str5 + "'.");
        }
        return new HpoInstance(str3, str4, z, splitType, i, str2);
    }

    protected List<String> getMachineLearningPrefixes(Parameters parameters) {
        Stream<String> stream = parameters.keySet().stream();
        Pattern pattern = ML_PREFIX_PATTERN;
        Objects.requireNonNull(pattern);
        return (List) stream.map((v1) -> {
            return r1.matcher(v1);
        }).filter((v0) -> {
            return v0.matches();
        }).map(matcher -> {
            return matcher.group(1);
        }).filter(str -> {
            return KNOWN_ML_ALGORITHMS.contains(parameters.getOrDefault(Configuration.dot(str, Configuration.PARAM_ALGORITHM), ""));
        }).distinct().collect(Collectors.toList());
    }

    static {
        $assertionsDisabled = !HpoRoute.class.desiredAssertionStatus();
        KNOWN_ML_ALGORITHMS = Set.of(Configuration.VALUE_MAXENT, Configuration.VALUE_SVM, Configuration.VALUE_LTR);
        ML_PREFIX_PATTERN = Pattern.compile("(.*?\\.ml\\.[^.]+).*$");
        runCounter = new AtomicInteger(0);
    }
}
