package uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.TreeSet;
import java.util.concurrent.Callable;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.hadoop.ParquetReader;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.collection.JavaConversions;
import scala.collection.Seq;
import uk.gov.gchq.gaffer.operation.OperationException;
import uk.gov.gchq.gaffer.parquetstore.ParquetStore;
import uk.gov.gchq.gaffer.parquetstore.io.reader.ParquetElementReader;
import uk.gov.gchq.gaffer.parquetstore.utils.GafferGroupObjectConverter;
import uk.gov.gchq.gaffer.parquetstore.utils.SchemaUtils;

/* loaded from: input_file:uk/gov/gchq/gaffer/parquetstore/operation/handler/utilities/SortFullGroup.class */
public class SortFullGroup implements Callable<OperationException> {
    private static final Logger LOGGER = LoggerFactory.getLogger(SortFullGroup.class);
    private final String group;
    private final boolean isEntity;
    private boolean isReversed;
    private final SchemaUtils schemaUtils;
    private final GafferGroupObjectConverter converter;
    private final List<String> sortColumns;
    private final List<String> inputFiles;
    private final String outputDir;
    private final int numberOfOutputFiles;
    private CompressionCodecName compressionCodecName;
    private final SparkSession spark;
    private final FileSystem fs;

    /* loaded from: input_file:uk/gov/gchq/gaffer/parquetstore/operation/handler/utilities/SortFullGroup$SeqComparator.class */
    public static class SeqComparator implements Comparator<Seq<Object>> {
        @Override // java.util.Comparator
        public int compare(Seq<Object> seq, Seq<Object> seq2) {
            Iterator asJavaIterator = JavaConversions.asJavaIterator(seq.iterator());
            Iterator asJavaIterator2 = JavaConversions.asJavaIterator(seq2.iterator());
            while (asJavaIterator.hasNext()) {
                Comparable comparable = (Comparable) asJavaIterator.next();
                if (!asJavaIterator2.hasNext()) {
                    throw new RuntimeException("Should be comparing two Seqs of equal size, got " + seq + " and " + seq2);
                }
                int compareTo = comparable.compareTo((Comparable) asJavaIterator2.next());
                if (0 != compareTo) {
                    return compareTo;
                }
            }
            return 0;
        }
    }

    public SortFullGroup(String str, boolean z, boolean z2, SchemaUtils schemaUtils, List<String> list, List<String> list2, String str2, int i, CompressionCodecName compressionCodecName, SparkSession sparkSession, FileSystem fileSystem) {
        this.group = str;
        this.isEntity = z;
        this.isReversed = z2;
        this.schemaUtils = schemaUtils;
        this.converter = schemaUtils.getConverter(str);
        this.sortColumns = list;
        this.inputFiles = list2;
        this.outputDir = str2;
        this.numberOfOutputFiles = i;
        this.compressionCodecName = compressionCodecName;
        this.spark = sparkSession;
        this.fs = fileSystem;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // java.util.concurrent.Callable
    public OperationException call() throws IOException {
        String str = this.sortColumns.get(0);
        List<String> subList = this.sortColumns.subList(1, this.sortColumns.size());
        ArrayList arrayList = new ArrayList();
        for (String str2 : this.inputFiles) {
            if (this.fs.exists(new Path(str2))) {
                arrayList.add(str2);
            } else {
                LOGGER.info("Ignoring file {} as it does not exist", str2);
            }
        }
        if (arrayList.isEmpty()) {
            LOGGER.info("Not sorting data for group {} as list of input files that exist is empty", this.group);
            return null;
        }
        LOGGER.info("Sorting data in {} files by columns {} to {} files in output directory {}", new Object[]{Integer.valueOf(arrayList.size()), StringUtils.join(this.sortColumns, ','), Integer.valueOf(this.numberOfOutputFiles), this.outputDir});
        ExtractKeyFromRow extractKeyFromRow = new ExtractKeyFromRow(new HashSet(), this.schemaUtils.getColumnToPaths(this.group), this.schemaUtils.getEntityGroups().contains(this.group), this.isReversed);
        LOGGER.info("Sampling data from {} input files to identify split points for sorting", Integer.valueOf(arrayList.size()));
        List takeSample = this.spark.read().parquet((String[]) arrayList.toArray(new String[0])).javaRDD().map(extractKeyFromRow).takeSample(false, 10000, 1234567890L);
        LOGGER.info("Obtained {} rows in the sample", Integer.valueOf(takeSample.size()));
        TreeSet treeSet = new TreeSet(new SeqComparator());
        treeSet.addAll(takeSample);
        TreeSet treeSet2 = new TreeSet(new SeqComparator());
        int i = this.numberOfOutputFiles - 1;
        long size = (treeSet.size() < 2 || i < 1) ? 1L : treeSet.size() / i;
        if (size < 1) {
            size = 1;
        }
        int i2 = 0;
        int i3 = 0;
        Iterator it = treeSet.iterator();
        while (it.hasNext()) {
            Seq seq = (Seq) it.next();
            i3++;
            if (0 == i3 % size) {
                treeSet2.add(seq);
                i2++;
            }
            if (i2 >= i) {
                break;
            }
        }
        LOGGER.info("Found {} split points", Integer.valueOf(treeSet2.size()));
        SeqObjectPartitioner seqObjectPartitioner = new SeqObjectPartitioner(this.numberOfOutputFiles, treeSet2);
        LOGGER.info("Partitioning data using split points and sorting within partition, outputting to {}", this.outputDir);
        JavaRDD values = this.spark.read().parquet((String[]) arrayList.toArray(new String[0])).javaRDD().keyBy(new ExtractKeyFromRow(new HashSet(), this.schemaUtils.getColumnToPaths(this.group), this.schemaUtils.getEntityGroups().contains(this.group), this.isReversed)).partitionBy(seqObjectPartitioner).values();
        LOGGER.info("Sorting data within partitions, outputting to {}", this.outputDir);
        this.spark.createDataFrame(values, this.schemaUtils.getSparkSchema(this.group)).sortWithinPartitions(str, (String[]) subList.stream().toArray(i4 -> {
            return new String[i4];
        })).write().option("compression", this.compressionCodecName.name()).parquet(this.outputDir);
        FileStatus[] listStatus = this.fs.listStatus(new Path(this.outputDir), path -> {
            return path.getName().endsWith(".parquet");
        });
        TreeSet treeSet3 = new TreeSet();
        Stream map = Arrays.stream(listStatus).map((v0) -> {
            return v0.getPath();
        });
        treeSet3.getClass();
        map.forEach((v1) -> {
            r1.add(v1);
        });
        Path[] pathArr = (Path[]) treeSet3.toArray(new Path[0]);
        LOGGER.info("Renaming part-* files to partition-* files, removing empty files (part-* files are in directory {})", this.outputDir);
        int i5 = 0;
        for (Path path2 : pathArr) {
            if (isFileEmpty(path2)) {
                LOGGER.debug("Deleting empty file {}", path2);
                this.fs.delete(path2, false);
            } else {
                Path path3 = new Path(this.outputDir + ParquetStore.getFile(Integer.valueOf(i5)));
                LOGGER.debug("Renaming {} to {}", path2, path3);
                this.fs.rename(path2, path3);
                i5++;
            }
        }
        return null;
    }

    private boolean isFileEmpty(Path path) throws IOException {
        LOGGER.debug("Opening a new Parquet reader for file {} to test if it's empty", path);
        ParquetReader build = new ParquetElementReader.Builder(path).isEntity(this.isEntity).usingConverter(this.converter).build();
        boolean z = true;
        if (null != build.read()) {
            z = false;
        }
        build.close();
        LOGGER.debug("File {} is {}", path, z ? "empty" : "not empty");
        return z;
    }
}
