package uk.gov.gchq.gaffer.accumulostore.operation.hdfs.handler.job.factory;

import com.google.common.collect.Lists;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.io.compress.zlib.ZlibFactory;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.NativeCodeLoader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.gov.gchq.gaffer.accumulostore.AccumuloStore;
import uk.gov.gchq.gaffer.accumulostore.operation.hdfs.mapper.SampleDataForSplitPointsMapper;
import uk.gov.gchq.gaffer.accumulostore.operation.hdfs.reducer.AccumuloKeyValueReducer;
import uk.gov.gchq.gaffer.accumulostore.utils.AccumuloStoreConstants;
import uk.gov.gchq.gaffer.hdfs.operation.SampleDataForSplitPoints;
import uk.gov.gchq.gaffer.hdfs.operation.handler.job.factory.JobFactory;
import uk.gov.gchq.gaffer.hdfs.operation.handler.job.factory.SampleDataForSplitPointsJobFactory;
import uk.gov.gchq.gaffer.store.Store;
import uk.gov.gchq.gaffer.store.StoreException;

/* loaded from: input_file:uk/gov/gchq/gaffer/accumulostore/operation/hdfs/handler/job/factory/AccumuloSampleDataForSplitPointsJobFactory.class */
public class AccumuloSampleDataForSplitPointsJobFactory implements SampleDataForSplitPointsJobFactory {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) AccumuloSampleDataForSplitPointsJobFactory.class);

    @Override // uk.gov.gchq.gaffer.hdfs.operation.handler.job.factory.JobFactory
    public List<Job> createJobs(SampleDataForSplitPoints sampleDataForSplitPoints, Store store) throws IOException {
        ArrayList arrayList = new ArrayList();
        HashMap hashMap = new HashMap();
        for (Map.Entry<String, String> entry : sampleDataForSplitPoints.getInputMapperPairs().entrySet()) {
            if (hashMap.containsKey(entry.getValue())) {
                ((List) hashMap.get(entry.getValue())).add(entry.getKey());
            } else {
                hashMap.put(entry.getValue(), Lists.newArrayList(entry.getKey()));
            }
        }
        for (String str : hashMap.keySet()) {
            Job job = Job.getInstance(createJobConf(sampleDataForSplitPoints, str, store));
            setupJob(job, sampleDataForSplitPoints, str, store);
            if (null != sampleDataForSplitPoints.getJobInitialiser()) {
                sampleDataForSplitPoints.getJobInitialiser().initialiseJob(job, sampleDataForSplitPoints, store);
            }
            arrayList.add(job);
        }
        return arrayList;
    }

    @Override // uk.gov.gchq.gaffer.hdfs.operation.handler.job.factory.SampleDataForSplitPointsJobFactory
    @SuppressFBWarnings(value = {"BC_UNCONFIRMED_CAST"}, justification = "key should always be an instance of Key")
    public byte[] createSplit(Writable writable, Writable writable2) {
        return ((Key) writable).getRow().getBytes();
    }

    @Override // uk.gov.gchq.gaffer.hdfs.operation.handler.job.factory.SampleDataForSplitPointsJobFactory
    public Writable createKey() {
        return new Key();
    }

    @Override // uk.gov.gchq.gaffer.hdfs.operation.handler.job.factory.SampleDataForSplitPointsJobFactory
    public Writable createValue() {
        return new Value();
    }

    @Override // uk.gov.gchq.gaffer.hdfs.operation.handler.job.factory.SampleDataForSplitPointsJobFactory
    public int getExpectedNumberOfSplits(Store store) {
        try {
            int size = ((AccumuloStore) store).getTabletServers().size();
            LOGGER.info("Number of tablet servers is {}", Integer.valueOf(size));
            return size - 1;
        } catch (StoreException e) {
            LOGGER.error("Exception thrown getting number of tablet servers: {}", e.getMessage());
            throw new RuntimeException(e.getMessage(), e);
        }
    }

    @Override // uk.gov.gchq.gaffer.hdfs.operation.handler.job.factory.JobFactory
    public JobConf createJobConf(SampleDataForSplitPoints sampleDataForSplitPoints, String str, Store store) throws IOException {
        JobConf jobConf = new JobConf(new Configuration());
        LOGGER.info("Setting up job conf");
        jobConf.set(JobFactory.SCHEMA, new String(store.getSchema().toCompactJson(), "UTF-8"));
        LOGGER.info("Added {} {} to job conf", JobFactory.SCHEMA, new String(store.getSchema().toCompactJson(), "UTF-8"));
        jobConf.set(JobFactory.MAPPER_GENERATOR, str);
        LOGGER.info("Added {} of {} to job conf", JobFactory.MAPPER_GENERATOR, str);
        jobConf.set(JobFactory.VALIDATE, String.valueOf(sampleDataForSplitPoints.isValidate()));
        LOGGER.info("Added {} option of {} to job conf", JobFactory.VALIDATE, Boolean.valueOf(sampleDataForSplitPoints.isValidate()));
        jobConf.set(SampleDataForSplitPointsJobFactory.PROPORTION_TO_SAMPLE, String.valueOf(sampleDataForSplitPoints.getProportionToSample()));
        LOGGER.info("Added {} option of {} to job conf", SampleDataForSplitPointsJobFactory.PROPORTION_TO_SAMPLE, String.valueOf(sampleDataForSplitPoints.getProportionToSample()));
        Integer numMapTasks = sampleDataForSplitPoints.getNumMapTasks();
        if (null != numMapTasks) {
            jobConf.setNumMapTasks(numMapTasks.intValue());
            LOGGER.info("Set number of map tasks to {} on job conf", numMapTasks);
        }
        jobConf.setNumReduceTasks(1);
        LOGGER.info("Set number of reduce tasks to 1 on job conf");
        jobConf.set(AccumuloStoreConstants.ACCUMULO_ELEMENT_CONVERTER_CLASS, ((AccumuloStore) store).getKeyPackage().getKeyConverter().getClass().getName());
        return jobConf;
    }

    @Override // uk.gov.gchq.gaffer.hdfs.operation.handler.job.factory.JobFactory
    public void setupJob(Job job, SampleDataForSplitPoints sampleDataForSplitPoints, String str, Store store) throws IOException {
        job.setJarByClass(getClass());
        job.setJobName(getJobName(str, new Path(sampleDataForSplitPoints.getOutputPath())));
        setupMapper(job);
        setupReducer(job);
        setupOutput(job, sampleDataForSplitPoints, store);
    }

    protected String getJobName(String str, Path path) {
        return "Sample Data: Generator=" + str + ", output=" + path;
    }

    protected void setupMapper(Job job) throws IOException {
        job.setMapperClass(SampleDataForSplitPointsMapper.class);
        job.setMapOutputKeyClass(Key.class);
        job.setMapOutputValueClass(Value.class);
    }

    protected void setupReducer(Job job) throws IOException {
        job.setReducerClass(AccumuloKeyValueReducer.class);
        job.setOutputKeyClass(Key.class);
        job.setOutputValueClass(Value.class);
    }

    protected void setupOutput(Job job, SampleDataForSplitPoints sampleDataForSplitPoints, Store store) throws IOException {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setOutputPath(job, new Path(sampleDataForSplitPoints.getOutputPath()));
        if (null != sampleDataForSplitPoints.getCompressionCodec()) {
            if (GzipCodec.class.isAssignableFrom(sampleDataForSplitPoints.getCompressionCodec()) && !NativeCodeLoader.isNativeCodeLoaded() && !ZlibFactory.isNativeZlibLoaded(job.getConfiguration())) {
                LOGGER.warn("SequenceFile doesn't work with GzipCodec without native-hadoop code!");
                return;
            }
            SequenceFileOutputFormat.setCompressOutput(job, true);
            SequenceFileOutputFormat.setOutputCompressorClass(job, sampleDataForSplitPoints.getCompressionCodec());
            SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
        }
    }
}
