package de.viadee.ki.sparkimporter.processing.steps.dataprocessing;

import de.viadee.ki.sparkimporter.configuration.Configuration;
import de.viadee.ki.sparkimporter.configuration.preprocessing.ColumnConfiguration;
import de.viadee.ki.sparkimporter.configuration.util.ConfigurationUtils;
import de.viadee.ki.sparkimporter.processing.PreprocessingRunner;
import de.viadee.ki.sparkimporter.processing.interfaces.PreprocessingStepInterface;
import de.viadee.ki.sparkimporter.util.SparkImporterUtils;
import de.viadee.ki.sparkimporter.util.SparkImporterVariables;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;

/* loaded from: input_file:de/viadee/ki/sparkimporter/processing/steps/dataprocessing/ReduceColumnsStep.class */
public class ReduceColumnsStep implements PreprocessingStepInterface {
    @Override // de.viadee.ki.sparkimporter.processing.interfaces.PreprocessingStepInterface
    public Dataset<Row> runPreprocessingStep(Dataset<Row> dataset, boolean z, String str, Map<String, Object> map) {
        List<StructField> asList = Arrays.asList(dataset.schema().fields());
        List<String> asList2 = Arrays.asList(dataset.columns());
        ArrayList arrayList = new ArrayList();
        arrayList.add(SparkImporterVariables.VAR_PROCESS_INSTANCE_ID);
        arrayList.add(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_NAME);
        arrayList.add(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_TYPE);
        arrayList.add(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_REVISION);
        arrayList.add(SparkImporterVariables.VAR_STATE);
        arrayList.add(SparkImporterVariables.VAR_LONG);
        arrayList.add(SparkImporterVariables.VAR_DOUBLE);
        arrayList.add(SparkImporterVariables.VAR_TEXT);
        arrayList.add(SparkImporterVariables.VAR_TEXT2);
        if (str.equals(SparkImporterVariables.DATA_LEVEL_ACTIVITY)) {
            arrayList.add(SparkImporterVariables.VAR_ACT_INST_ID);
            arrayList.add(SparkImporterVariables.VAR_START_TIME);
            arrayList.add(SparkImporterVariables.VAR_END_TIME);
            arrayList.add(SparkImporterVariables.VAR_DURATION);
        }
        if (PreprocessingRunner.initialConfigToBeWritten) {
            Configuration configuration = ConfigurationUtils.getInstance().getConfiguration();
            for (String str2 : asList2) {
                if (!arrayList.contains(str2)) {
                    ColumnConfiguration columnConfiguration = new ColumnConfiguration();
                    columnConfiguration.setColumnName(str2);
                    columnConfiguration.setColumnType(getColumnTypeString(asList, str2));
                    columnConfiguration.setUseColumn(true);
                    columnConfiguration.setComment("");
                    configuration.getPreprocessingConfiguration().getColumnConfiguration().add(columnConfiguration);
                }
            }
        }
        ArrayList arrayList2 = new ArrayList();
        Iterator it = asList2.iterator();
        while (it.hasNext()) {
            arrayList2.add(RowFactory.create(new Object[]{(String) it.next()}));
        }
        PreprocessingRunner.helper_datasets.put("startColumns_" + str, SparkSession.builder().getOrCreate().createDataFrame(arrayList2, new StructType(new StructField[]{new StructField("column_name", DataTypes.StringType, false, Metadata.empty())})).toDF());
        ArrayList arrayList3 = new ArrayList();
        arrayList3.add(new Column(SparkImporterVariables.VAR_PROCESS_INSTANCE_ID));
        arrayList3.add(new Column(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_NAME));
        arrayList3.add(new Column(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_TYPE));
        arrayList3.add(new Column(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_REVISION));
        arrayList3.add(new Column(SparkImporterVariables.VAR_STATE));
        arrayList3.add(new Column(SparkImporterVariables.VAR_LONG));
        arrayList3.add(new Column(SparkImporterVariables.VAR_DOUBLE));
        arrayList3.add(new Column(SparkImporterVariables.VAR_TEXT));
        arrayList3.add(new Column(SparkImporterVariables.VAR_TEXT2));
        if (str.equals(SparkImporterVariables.DATA_LEVEL_ACTIVITY)) {
            arrayList3.add(new Column(SparkImporterVariables.VAR_ACT_INST_ID));
            arrayList3.add(new Column(SparkImporterVariables.VAR_START_TIME));
            arrayList3.add(new Column(SparkImporterVariables.VAR_END_TIME));
            arrayList3.add(new Column(SparkImporterVariables.VAR_DURATION));
        }
        if (Arrays.asList(dataset.columns()).contains(SparkImporterVariables.VAR_TIMESTAMP)) {
            arrayList3.add(new Column(SparkImporterVariables.VAR_TIMESTAMP));
        }
        Dataset<Row> filter = dataset.select(SparkImporterUtils.getInstance().asSeq(arrayList3)).filter("proc_inst_id_ <> 'null'");
        if (z) {
            SparkImporterUtils.getInstance().writeDatasetToCSV(filter, "reduced_columns");
        }
        return filter;
    }

    private String getColumnTypeString(List<StructField> list, String str) {
        DataType dataType = DataTypes.StringType;
        Iterator<StructField> it = list.iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            StructField next = it.next();
            if (next.name().equals(str)) {
                dataType = next.dataType();
                break;
            }
        }
        return dataType.equals(DataTypes.IntegerType) ? "integer" : dataType.equals(DataTypes.LongType) ? "long" : dataType.equals(DataTypes.DoubleType) ? "double" : dataType.equals(DataTypes.BooleanType) ? "boolean" : dataType.equals(DataTypes.TimestampType) ? "timestamp" : dataType.equals(DataTypes.DateType) ? "date" : dataType.equals(DataTypes.FloatType) ? "float" : "string";
    }
}
