package de.viadee.ki.sparkimporter.processing.steps.dataprocessing;

import de.viadee.ki.sparkimporter.processing.interfaces.PreprocessingStepInterface;
import de.viadee.ki.sparkimporter.util.SparkImporterLogger;
import de.viadee.ki.sparkimporter.util.SparkImporterUtils;
import de.viadee.ki.sparkimporter.util.SparkImporterVariables;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions;

/* loaded from: input_file:de/viadee/ki/sparkimporter/processing/steps/dataprocessing/AggregateActivityInstancesStep.class */
public class AggregateActivityInstancesStep implements PreprocessingStepInterface {
    @Override // de.viadee.ki.sparkimporter.processing.interfaces.PreprocessingStepInterface
    public Dataset<Row> runPreprocessingStep(Dataset<Row> dataset, boolean z, String str, Map<String, Object> map) {
        HashMap hashMap = new HashMap();
        for (String str2 : dataset.columns()) {
            if (!str2.equals(SparkImporterVariables.VAR_PROCESS_INSTANCE_ID)) {
                if (str2.equals(SparkImporterVariables.VAR_DURATION)) {
                    hashMap.put(str2, "max");
                } else if (str2.equals(SparkImporterVariables.VAR_STATE)) {
                    hashMap.put(str2, "ProcessState");
                } else if (!str2.equals(SparkImporterVariables.VAR_ACT_INST_ID)) {
                    hashMap.put(str2, "AllButEmptyString");
                }
            }
        }
        Dataset agg = dataset.filter(functions.not(functions.isnull(dataset.col(SparkImporterVariables.VAR_ACT_INST_ID)))).groupBy(SparkImporterVariables.VAR_PROCESS_INSTANCE_ID, new String[]{SparkImporterVariables.VAR_ACT_INST_ID}).agg(hashMap);
        Pattern compile = Pattern.compile("(max|allbutemptystring|processstate)\\((.+)\\)");
        for (String str3 : dataset.columns()) {
            Matcher matcher = compile.matcher(str3);
            if (matcher.find()) {
                dataset = dataset.withColumnRenamed(str3, matcher.group(2));
            }
        }
        Dataset union = dataset.filter(functions.isnull(dataset.col(SparkImporterVariables.VAR_STATE))).groupBy(SparkImporterVariables.VAR_PROCESS_INSTANCE_ID, new String[]{SparkImporterVariables.VAR_ACT_INST_ID}).agg(hashMap).union(agg);
        for (String str4 : union.columns()) {
            Matcher matcher2 = compile.matcher(str4);
            if (matcher2.find()) {
                union = union.withColumnRenamed(str4, matcher2.group(2));
            }
        }
        Dataset<Row> sort = union.drop(SparkImporterVariables.VAR_PROCESS_INSTANCE_VARIABLE_NAME).sort(SparkImporterVariables.VAR_START_TIME, new String[0]);
        SparkImporterLogger.getInstance().writeInfo("Found " + sort.count() + " activity instances.");
        if (z) {
            SparkImporterUtils.getInstance().writeDatasetToCSV(sort, "agg_of_activity_instances");
        }
        return sort;
    }
}
