package de.viadee.ki.sparkimporter.processing.steps.dataprocessing;

import de.viadee.ki.sparkimporter.processing.PreprocessingRunner;
import de.viadee.ki.sparkimporter.processing.interfaces.PreprocessingStepInterface;
import de.viadee.ki.sparkimporter.util.SparkImporterUtils;
import de.viadee.ki.sparkimporter.util.SparkImporterVariables;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import scala.collection.Seq;

/* loaded from: input_file:de/viadee/ki/sparkimporter/processing/steps/dataprocessing/AddReducedColumnsToDatasetStep.class */
public class AddReducedColumnsToDatasetStep implements PreprocessingStepInterface {
    @Override // de.viadee.ki.sparkimporter.processing.interfaces.PreprocessingStepInterface
    public Dataset<Row> runPreprocessingStep(Dataset<Row> dataset, boolean z, String str, Map<String, Object> map) {
        List asList = Arrays.asList(dataset.columns());
        Dataset<Row> dataset2 = PreprocessingRunner.helper_datasets.get("startColumns_" + str);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        List asList2 = Arrays.asList(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_TYPE, SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_REVISION, SparkImporterVariables.VAR_LONG, SparkImporterVariables.VAR_DOUBLE, SparkImporterVariables.VAR_TEXT, SparkImporterVariables.VAR_TEXT2, SparkImporterVariables.VAR_TIMESTAMP, SparkImporterVariables.VAR_SEQUENCE_COUNTER, SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_INSTANCE_ID);
        if (!SparkImporterVariables.isDevProcessStateColumnWorkaroundEnabled()) {
            asList2 = (List) Stream.concat(asList2.stream(), Stream.of(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_NAME)).collect(Collectors.toList());
        }
        arrayList2.add(new Column(SparkImporterVariables.VAR_PROCESS_INSTANCE_ID));
        arrayList2.add(new Column(SparkImporterVariables.VAR_STATE));
        arrayList2.add(new Column(SparkImporterVariables.VAR_ACT_INST_ID));
        Iterator it = dataset2.collectAsList().iterator();
        while (it.hasNext()) {
            String string = ((Row) it.next()).getString(0);
            if (!asList.contains(string) && !asList2.contains(string)) {
                arrayList.add(string);
                arrayList2.add(new Column(string));
            }
        }
        Seq asSeq = SparkImporterUtils.getInstance().asSeq(arrayList2);
        Dataset<Row> dataset3 = PreprocessingRunner.helper_datasets.get("initial_" + str);
        HashMap hashMap = new HashMap();
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            hashMap.put((String) it2.next(), "first");
        }
        Column isNotNull = dataset3.col(SparkImporterVariables.VAR_STATE).isNotNull();
        if (SparkImporterVariables.isDevProcessStateColumnWorkaroundEnabled() && str.equals(SparkImporterVariables.DATA_LEVEL_PROCESS)) {
            isNotNull = dataset3.col(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_NAME).isNull();
        }
        Dataset withColumnRenamed = str.equals(SparkImporterVariables.DATA_LEVEL_PROCESS) ? dataset3.select(asSeq).filter(isNotNull).groupBy(SparkImporterVariables.VAR_PROCESS_INSTANCE_ID, new String[0]).agg(hashMap).withColumnRenamed(SparkImporterVariables.VAR_PROCESS_INSTANCE_ID, "proc_inst_id__right") : dataset3.select(asSeq).filter(dataset3.col(SparkImporterVariables.VAR_ACT_ID).isNotNull()).groupBy(SparkImporterVariables.VAR_PROCESS_INSTANCE_ID, new String[]{SparkImporterVariables.VAR_ACT_INST_ID}).agg(hashMap).withColumnRenamed(SparkImporterVariables.VAR_PROCESS_INSTANCE_ID, "proc_inst_id__right").withColumnRenamed(SparkImporterVariables.VAR_ACT_INST_ID, "act_inst_id__right");
        Pattern compile = Pattern.compile("(first)\\((.+)\\)");
        for (String str2 : withColumnRenamed.columns()) {
            Matcher matcher = compile.matcher(str2);
            if (matcher.find()) {
                withColumnRenamed = withColumnRenamed.withColumnRenamed(str2, matcher.group(2));
            }
        }
        Dataset join = str.equals(SparkImporterVariables.DATA_LEVEL_PROCESS) ? dataset.join(withColumnRenamed, dataset.col(SparkImporterVariables.VAR_PROCESS_INSTANCE_ID).equalTo(withColumnRenamed.col("proc_inst_id__right")), "left") : dataset.join(withColumnRenamed, dataset.col(SparkImporterVariables.VAR_PROCESS_INSTANCE_ID).equalTo(withColumnRenamed.col("proc_inst_id__right")).and(dataset.col(SparkImporterVariables.VAR_ACT_INST_ID).equalTo(withColumnRenamed.col("act_inst_id__right"))), "left");
        if (SparkImporterVariables.isDevProcessStateColumnWorkaroundEnabled() && str.equals(SparkImporterVariables.DATA_LEVEL_PROCESS)) {
            join = join.drop(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_NAME);
        }
        Dataset<Row> drop = join.drop("proc_inst_id__right");
        if (str.equals(SparkImporterVariables.DATA_LEVEL_ACTIVITY)) {
            drop = drop.drop("act_inst_id__right");
        }
        if (z) {
            SparkImporterUtils.getInstance().writeDatasetToCSV(drop, "joined_columns");
        }
        return drop;
    }
}
