package uk.gov.gchq.gaffer.parquetstore.operation.addelements.impl;

import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.StructType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.collection.JavaConversions;
import uk.gov.gchq.gaffer.exception.SerialisationException;
import uk.gov.gchq.gaffer.jsonserialisation.JSONSerialiser;
import uk.gov.gchq.gaffer.operation.OperationException;
import uk.gov.gchq.gaffer.parquetstore.ParquetStore;
import uk.gov.gchq.gaffer.parquetstore.ParquetStoreProperties;
import uk.gov.gchq.gaffer.parquetstore.utils.GafferGroupObjectConverter;
import uk.gov.gchq.gaffer.parquetstore.utils.ParquetStoreConstants;
import uk.gov.gchq.gaffer.store.schema.Schema;
import uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition;
import uk.gov.gchq.gaffer.store.schema.SchemaEntityDefinition;
import uk.gov.gchq.gaffer.store.util.AggregatorUtil;

/* loaded from: input_file:uk/gov/gchq/gaffer/parquetstore/operation/addelements/impl/AggregateGroupSplit.class */
public class AggregateGroupSplit implements Callable<OperationException>, Serializable {
    private static final String SPLIT = "/split";
    private static final String RAW = "/raw";
    private static final String AGGREGATED = "/aggregated";
    private static final Logger LOGGER = LoggerFactory.getLogger(AggregateGroupSplit.class);
    private static final long serialVersionUID = -7828247145178905841L;
    private final String group;
    private final String tempFileDir;
    private final SparkSession spark;
    private final Map<String, String[]> columnToPaths;
    private final StructType sparkSchema;
    private final GafferGroupObjectConverter gafferGroupObjectConverter;
    private final Set<String> currentGraphFiles;
    private final String inputDir;
    private final String outputDir;
    private final Boolean isEntity;
    private final String[] gafferProperties;
    private final byte[] aggregatorJson;
    private final Set<String> groupByColumns;
    private final boolean aggregate;

    public AggregateGroupSplit(String str, String str2, ParquetStore parquetStore, Set<String> set, SparkSession sparkSession, int i) throws SerialisationException {
        this.group = str;
        this.tempFileDir = parquetStore.getTempFilesDir();
        Schema gafferSchema = parquetStore.getSchemaUtils().getGafferSchema();
        SchemaElementDefinition element = gafferSchema.getElement(str);
        this.isEntity = Boolean.valueOf(element instanceof SchemaEntityDefinition);
        String str3 = parquetStore.m3getProperties().get(ParquetStoreProperties.PARQUET_AGGREGATE_ON_INGEST, null);
        if (null == str3) {
            this.aggregate = element.isAggregate();
        } else {
            this.aggregate = Boolean.valueOf(str3).booleanValue();
        }
        this.groupByColumns = new HashSet(AggregatorUtil.getIngestGroupBy(str, gafferSchema));
        this.aggregatorJson = JSONSerialiser.serialise(element.getIngestAggregator(), new String[0]);
        this.gafferProperties = new String[element.getProperties().size()];
        element.getProperties().toArray(this.gafferProperties);
        this.spark = sparkSession;
        this.columnToPaths = parquetStore.getSchemaUtils().getColumnToPaths(str);
        this.sparkSchema = parquetStore.getSchemaUtils().getSparkSchema(str);
        this.gafferGroupObjectConverter = parquetStore.getSchemaUtils().getConverter(str);
        this.currentGraphFiles = set;
        if (this.isEntity.booleanValue()) {
            this.inputDir = ParquetStore.getGroupDirectory(str, ParquetStoreConstants.VERTEX, this.tempFileDir) + RAW + SPLIT + i;
            this.outputDir = ParquetStore.getGroupDirectory(str, str2, this.tempFileDir) + AGGREGATED + SPLIT + i;
        } else {
            this.inputDir = ParquetStore.getGroupDirectory(str, ParquetStoreConstants.SOURCE, this.tempFileDir) + RAW + SPLIT + i;
            this.outputDir = ParquetStore.getGroupDirectory(str, str2, this.tempFileDir) + AGGREGATED + SPLIT + i;
        }
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // java.util.concurrent.Callable
    public OperationException call() {
        try {
            FileSystem fileSystem = FileSystem.get(new Configuration());
            ArrayList arrayList = new ArrayList();
            if (fileSystem.exists(new Path(this.inputDir))) {
                arrayList.add(this.inputDir);
            }
            if (null != this.currentGraphFiles && !this.currentGraphFiles.isEmpty()) {
                Iterator<String> it = this.currentGraphFiles.iterator();
                while (it.hasNext()) {
                    Path path = new Path(it.next());
                    if (fileSystem.exists(path.getParent())) {
                        for (FileStatus fileStatus : fileSystem.globStatus(path)) {
                            arrayList.add(fileStatus.getPath().toString());
                        }
                    }
                }
            }
            if (arrayList.isEmpty()) {
                LOGGER.debug("Skipping the sorting and aggregation of group: {}, due to no data existing in the temporary files directory: {}", this.group, this.tempFileDir);
            } else {
                LOGGER.debug("Aggregating and sorting the data for group {} stored in directories {}", this.group, StringUtils.join(arrayList, ','));
                Dataset parquet = this.spark.read().parquet(JavaConversions.asScalaBuffer(arrayList));
                (this.aggregate ? this.spark.createDataFrame(parquet.javaRDD().keyBy(new ExtractKeyFromRow(this.groupByColumns, this.columnToPaths, this.isEntity.booleanValue())).reduceByKey(new AggregateGafferRowsFunction(this.gafferProperties, this.isEntity.booleanValue(), this.groupByColumns, this.columnToPaths, this.aggregatorJson, this.gafferGroupObjectConverter)).values(), this.sparkSchema) : parquet).write().parquet(this.outputDir);
            }
            return null;
        } catch (IOException e) {
            return new OperationException("IOException occurred during aggregation and sorting of data", e);
        }
    }
}
