package de.viadee.ki.sparkimporter.util;

import de.viadee.ki.sparkimporter.processing.PreprocessingRunner;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.nio.file.CopyOption;
import java.nio.file.Files;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import scala.collection.Iterator;
import scala.collection.JavaConversions;
import scala.collection.JavaConverters;
import scala.collection.Seq;

/* loaded from: input_file:de/viadee/ki/sparkimporter/util/SparkImporterUtils.class */
public class SparkImporterUtils {
    private static SparkImporterUtils instance;

    private SparkImporterUtils() {
    }

    public static synchronized SparkImporterUtils getInstance() {
        if (instance == null) {
            instance = new SparkImporterUtils();
        }
        return instance;
    }

    public String md5CecksumOfObject(Object obj) throws IOException, NoSuchAlgorithmException {
        if (obj == null) {
            return "";
        }
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(byteArrayOutputStream);
        objectOutputStream.writeObject(obj);
        objectOutputStream.close();
        MessageDigest messageDigest = MessageDigest.getInstance("MD5");
        messageDigest.update(byteArrayOutputStream.toByteArray());
        return DigestUtils.md5Hex(new String(new Base64().encode(messageDigest.digest()))).toUpperCase();
    }

    public void writeDatasetToParquet(Dataset<Row> dataset, String str) {
        String str2 = SparkImporterVariables.getTargetFolder() + "/";
        String str3 = str.equals("result") ? str2 + "result" : str2 + "intermediate/" + String.format("%02d", Integer.valueOf(PreprocessingRunner.getNextCounter())) + "_" + str;
        dataset.repartition(new Column[]{dataset.col(SparkImporterVariables.VAR_PROCESS_INSTANCE_ID)}).write().mode(SparkImporterVariables.getSaveMode()).save(str3 + "/parquet");
        if (SparkImporterVariables.getOutputFormat().equals(SparkImporterVariables.OUTPUT_FORMAT_CSV) && str.equals("result")) {
            SparkSession.builder().getOrCreate().read().load(str3 + "/parquet").coalesce(1).write().option("header", "true").option("delimiter", "|").option("timestampFormat", "yyyy-MM-dd'T'HH:mm:ss.SSS").option("ignoreLeadingWhiteSpace", "false").option("ignoreTrailingWhiteSpace", "false").mode(SparkImporterVariables.getSaveMode()).csv(str3 + "/csv");
            if (str3.startsWith("hdfs")) {
                Path path = new Path(str3);
                Configuration configuration = new Configuration();
                try {
                    FileSystem fileSystem = FileSystem.get(configuration);
                    if (!fileSystem.isDirectory(path)) {
                        throw new IllegalStateException("Cannot find result folder!");
                    }
                    RemoteIterator listFiles = fileSystem.listFiles(path, false);
                    while (listFiles.hasNext()) {
                        LocatedFileStatus locatedFileStatus = (LocatedFileStatus) listFiles.next();
                        if (locatedFileStatus.isFile() && locatedFileStatus.getPath().getName().contains("part-")) {
                            FileUtil.copy(fileSystem, locatedFileStatus.getPath(), fileSystem, new Path(str3 + "/result.csv"), true, configuration);
                        }
                    }
                    FileUtil.fullyDeleteContents(new File(str3 + "/csv"));
                    FileUtil.copy(fileSystem, new Path(str3 + "/result.csv"), fileSystem, new Path(str3 + "/csv/result.csv"), true, configuration);
                    return;
                } catch (IOException e) {
                    SparkImporterLogger.getInstance().writeError("An error occurred during the renaming of the result file in HDFS. Exception: " + e.getMessage());
                    return;
                }
            }
            File file = new File(str3 + "/csv");
            if (!file.isDirectory()) {
                throw new IllegalStateException("Cannot find result folder!");
            }
            File file2 = new File(file + "/../result.csv");
            for (File file3 : file.listFiles()) {
                if (file3.getName().startsWith("part-")) {
                    try {
                        Files.move(file3.toPath(), file2.toPath(), new CopyOption[0]);
                    } catch (IOException e2) {
                        SparkImporterLogger.getInstance().writeError("An error occurred during the renaming of the result file. Exception: " + e2.getMessage());
                    }
                }
            }
            try {
                FileUtils.cleanDirectory(file);
                Files.move(file2.toPath(), new File(file + "/result.csv").toPath(), new CopyOption[0]);
            } catch (IOException e3) {
                SparkImporterLogger.getInstance().writeError("An error occurred during the renaming of the result file. Exception: " + e3.getMessage());
            }
        }
    }

    public void writeDatasetToCSV(Dataset<Row> dataset, String str) {
        writeDatasetToCSV(dataset, str, "|");
    }

    private void writeDatasetToCSV(Dataset<Row> dataset, String str, String str2) {
        if (1 != 0) {
            dataset = dataset.coalesce(1);
        }
        String str3 = SparkImporterVariables.getTargetFolder() + "/";
        dataset.write().option("header", "true").option("delimiter", str2).option("ignoreLeadingWhiteSpace", "false").option("ignoreTrailingWhiteSpace", "false").mode(SparkImporterVariables.getSaveMode()).csv(str.equals("result") ? str3 + "result" : str3 + "intermediate/" + String.format("%02d", Integer.valueOf(PreprocessingRunner.getNextCounter())) + "_" + str);
    }

    public Dataset<Row> removeDuplicatedColumns(Dataset<Row> dataset) {
        String[] columns = dataset.columns();
        HashMap hashMap = new HashMap();
        Pattern compile = Pattern.compile("(\\w+_)\\d*");
        for (String str : columns) {
            Matcher matcher = compile.matcher(str);
            if (matcher.matches() && !hashMap.keySet().contains(matcher.group(1))) {
                hashMap.put(matcher.group(1), new Column(str));
            }
        }
        Seq seq = ((Iterator) JavaConverters.asScalaIteratorConverter(hashMap.values().iterator()).asScala()).toSeq();
        if (columns.length == hashMap.size()) {
            return dataset;
        }
        Dataset<Row> df = dataset.select(seq).toDF();
        HashMap hashMap2 = new HashMap();
        for (String str2 : hashMap.keySet()) {
            hashMap2.put(((Column) hashMap.get(str2)).toString(), str2);
        }
        for (String str3 : df.columns()) {
            df = df.withColumnRenamed(str3, (String) hashMap2.get(str3));
        }
        return df;
    }

    public Dataset<Row> removeEmptyLinesAfterImport(Dataset<Row> dataset) {
        return dataset.filter("proc_inst_id_ <> 'null'").filter("proc_inst_id_ <> ''");
    }

    public <T> Seq<T> asSeq(List<T> list) {
        return JavaConversions.asScalaBuffer(list);
    }
}
