package de.viadee.ki.sparkimporter.processing.steps.dataprocessing;

import de.viadee.ki.sparkimporter.configuration.Configuration;
import de.viadee.ki.sparkimporter.configuration.preprocessing.PreprocessingConfiguration;
import de.viadee.ki.sparkimporter.configuration.preprocessing.VariableConfiguration;
import de.viadee.ki.sparkimporter.configuration.preprocessing.VariableNameMapping;
import de.viadee.ki.sparkimporter.configuration.util.ConfigurationUtils;
import de.viadee.ki.sparkimporter.processing.PreprocessingRunner;
import de.viadee.ki.sparkimporter.processing.interfaces.PreprocessingStepInterface;
import de.viadee.ki.sparkimporter.util.SparkBroadcastHelper;
import de.viadee.ki.sparkimporter.util.SparkImporterLogger;
import de.viadee.ki.sparkimporter.util.SparkImporterUtils;
import de.viadee.ki.sparkimporter.util.SparkImporterVariables;
import java.lang.invoke.SerializedLambda;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;

/* loaded from: input_file:de/viadee/ki/sparkimporter/processing/steps/dataprocessing/DetermineProcessVariablesStep.class */
public class DetermineProcessVariablesStep implements PreprocessingStepInterface {
    @Override // de.viadee.ki.sparkimporter.processing.interfaces.PreprocessingStepInterface
    public Dataset<Row> runPreprocessingStep(Dataset<Row> dataset, boolean z, String str, Map<String, Object> map) {
        return doVariableTypeEscalation(doVariableTypeDetermination(doVariableNameMapping(doFilterVariables(dataset, z), z), z));
    }

    private Dataset<Row> doFilterVariables(Dataset<Row> dataset, boolean z) {
        PreprocessingConfiguration preprocessingConfiguration;
        ArrayList arrayList = new ArrayList();
        Configuration configuration = ConfigurationUtils.getInstance().getConfiguration();
        if (configuration != null && (preprocessingConfiguration = configuration.getPreprocessingConfiguration()) != null) {
            for (VariableConfiguration variableConfiguration : preprocessingConfiguration.getVariableConfiguration()) {
                if (!variableConfiguration.isUseVariable()) {
                    arrayList.add(variableConfiguration.getVariableName());
                    SparkImporterLogger.getInstance().writeInfo("The variable '" + variableConfiguration.getVariableName() + "' will be filtered out. Comment: " + variableConfiguration.getComment());
                }
            }
        }
        final List list = (List) dataset.select(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_NAME, new String[0]).distinct().collectAsList().stream().map(row -> {
            return row.getString(0);
        }).collect(Collectors.toList());
        arrayList.stream().forEach(new Consumer<String>() { // from class: de.viadee.ki.sparkimporter.processing.steps.dataprocessing.DetermineProcessVariablesStep.1
            @Override // java.util.function.Consumer
            public void accept(String str) {
                if (list.contains(str)) {
                    return;
                }
                SparkImporterLogger.getInstance().writeWarn("The variable '" + str + "' is configured to be filtered, but does not exist in the data.");
            }
        });
        Dataset<Row> filter = dataset.filter(row2 -> {
            String str = (String) row2.getAs(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_NAME);
            boolean z2 = !arrayList.contains(str);
            if (str != null && str.startsWith("_CORRELATION_ID_")) {
                z2 = false;
            }
            return z2;
        });
        if (z) {
            SparkImporterUtils.getInstance().writeDatasetToCSV(filter, "variable_filter");
        }
        return filter;
    }

    private Dataset<Row> doVariableNameMapping(Dataset<Row> dataset, boolean z) {
        PreprocessingConfiguration preprocessingConfiguration;
        HashMap hashMap = new HashMap();
        Configuration configuration = ConfigurationUtils.getInstance().getConfiguration();
        if (configuration != null && (preprocessingConfiguration = configuration.getPreprocessingConfiguration()) != null) {
            for (VariableNameMapping variableNameMapping : preprocessingConfiguration.getVariableNameMappings()) {
                if (variableNameMapping.getOldName().equals("") || variableNameMapping.getNewName().equals("")) {
                    SparkImporterLogger.getInstance().writeWarn("Ignoring variable name mapping '" + variableNameMapping.getOldName() + "' -> '" + variableNameMapping.getNewName() + "'.");
                } else {
                    hashMap.put(variableNameMapping.getOldName(), variableNameMapping.getNewName());
                }
            }
        }
        for (String str : hashMap.keySet()) {
            String str2 = (String) hashMap.get(str);
            SparkImporterLogger.getInstance().writeInfo("Renaming variable '" + str + "' to '" + str2 + "' as per user configuration.");
            dataset = dataset.withColumn(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_NAME, functions.when(dataset.col(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_NAME).equalTo(str), functions.lit(str2)).otherwise(dataset.col(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_NAME)));
        }
        if (z) {
            SparkImporterUtils.getInstance().writeDatasetToCSV(dataset, "variable_name_mapping");
        }
        return dataset;
    }

    private Dataset<Row> doVariableTypeDetermination(Dataset<Row> dataset, boolean z) {
        Dataset<Row> filter = dataset.select(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_NAME, new String[]{SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_TYPE, SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_REVISION}).groupBy(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_NAME, new String[]{SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_TYPE}).agg(functions.max(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_REVISION).alias(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_REVISION), new Column[0]).filter("name_ <> 'null'");
        HashMap hashMap = new HashMap();
        Iterator localIterator = filter.toLocalIterator();
        while (localIterator.hasNext()) {
            Row row = (Row) localIterator.next();
            String string = row.getString(0);
            String string2 = row.getString(1);
            if (string2 == null) {
                string2 = "string";
            }
            hashMap.put(string, string2);
        }
        SparkBroadcastHelper.getInstance().broadcastVariable(SparkBroadcastHelper.BROADCAST_VARIABLE.PROCESS_VARIABLES_RAW, hashMap);
        if (z) {
            SparkImporterUtils.getInstance().writeDatasetToCSV(filter, "variables_types_help");
        }
        return dataset;
    }

    private Dataset<Row> doVariableTypeEscalation(Dataset<Row> dataset) {
        Map<String, String> map = (Map) SparkBroadcastHelper.getInstance().getBroadcastVariable(SparkBroadcastHelper.BROADCAST_VARIABLE.PROCESS_VARIABLES_RAW);
        Object obj = "";
        String str = "";
        int i = 0;
        int i2 = 0;
        for (String str2 : map.keySet()) {
            String str3 = map.get(str2);
            processVariable(map, str2, str3, 0, obj, str, i, i2);
            if (!str2.equals(obj)) {
                obj = str2;
                str = str3;
                i = 0;
                i2 = 1;
            }
        }
        processVariable(map, "", "", 0, obj, str, i, i2);
        SparkBroadcastHelper.getInstance().broadcastVariable(SparkBroadcastHelper.BROADCAST_VARIABLE.PROCESS_VARIABLES_ESCALATED, map);
        ArrayList arrayList = new ArrayList();
        for (String str4 : map.keySet()) {
            arrayList.add(RowFactory.create(new Object[]{str4, map.get(str4)}));
        }
        if (PreprocessingRunner.initialConfigToBeWritten) {
            Configuration configuration = ConfigurationUtils.getInstance().getConfiguration();
            for (String str5 : map.keySet()) {
                String str6 = map.get(str5);
                VariableConfiguration variableConfiguration = new VariableConfiguration();
                variableConfiguration.setVariableName(str5);
                variableConfiguration.setVariableType(str6);
                variableConfiguration.setUseVariable(true);
                variableConfiguration.setComment("");
                configuration.getPreprocessingConfiguration().getVariableConfiguration().add(variableConfiguration);
            }
        }
        Dataset<Row> orderBy = SparkSession.builder().getOrCreate().createDataFrame(arrayList, new StructType(new StructField[]{new StructField(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_NAME, DataTypes.StringType, false, Metadata.empty()), new StructField(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_TYPE, DataTypes.StringType, false, Metadata.empty())})).toDF().orderBy(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_NAME, new String[0]);
        SparkImporterLogger.getInstance().writeInfo("Found " + orderBy.count() + " process variables.");
        SparkImporterUtils.getInstance().writeDatasetToCSV(orderBy, "variable_types_escalated");
        return dataset;
    }

    private void processVariable(Map<String, String> map, String str, String str2, int i, String str3, String str4, int i2, int i3) {
        if (str.equals(str3)) {
            int i4 = i3 + 1;
            if (!str4.equals("null") && !str4.equals("")) {
                if (str2.equals("null") || !str2.equals("")) {
                }
            }
            Math.max(i, i2);
            return;
        }
        if (i3 != 1) {
            if (i3 > 1) {
                map.put(str3, str4);
            }
        } else if (str4.equals("null") || str4.equals("")) {
            map.put(str3, "string");
        } else {
            map.put(str3, str4);
        }
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case 1401358541:
                if (implMethodName.equals("lambda$doFilterVariables$166b4853$1")) {
                    z = false;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/FilterFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Z") && serializedLambda.getImplClass().equals("de/viadee/ki/sparkimporter/processing/steps/dataprocessing/DetermineProcessVariablesStep") && serializedLambda.getImplMethodSignature().equals("(Ljava/util/List;Lorg/apache/spark/sql/Row;)Z")) {
                    List list = (List) serializedLambda.getCapturedArg(0);
                    return row2 -> {
                        String str = (String) row2.getAs(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_NAME);
                        boolean z2 = !list.contains(str);
                        if (str != null && str.startsWith("_CORRELATION_ID_")) {
                            z2 = false;
                        }
                        return z2;
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
