package ivory.core.preprocess;

import edu.umd.cloud9.util.PowerTool;
import ivory.core.Constants;
import ivory.core.RetrievalEnvironment;
import ivory.core.data.document.IntDocVector;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.log4j.Logger;

/* loaded from: input_file:ivory/core/preprocess/BuildIntDocVectorsForwardIndex.class */
public class BuildIntDocVectorsForwardIndex extends PowerTool {
    private static final Logger LOG = Logger.getLogger(BuildIntDocVectorsForwardIndex.class);
    public static final String[] RequiredParameters = {Constants.IndexPath};

    /* loaded from: input_file:ivory/core/preprocess/BuildIntDocVectorsForwardIndex$DocVectors.class */
    protected enum DocVectors {
        Count
    }

    /* loaded from: input_file:ivory/core/preprocess/BuildIntDocVectorsForwardIndex$MyMapper.class */
    private static class MyMapper extends Mapper<IntWritable, IntDocVector, IntWritable, LongWritable> {
        private static final LongWritable output = new LongWritable();

        private MyMapper() {
        }

        public void run(Mapper<IntWritable, IntDocVector, IntWritable, LongWritable>.Context context) throws IOException, InterruptedException {
            String name = context.getInputSplit().getPath().getName();
            BuildIntDocVectorsForwardIndex.LOG.info("Input file: " + name);
            PositionalSequenceFileRecordReader positionalSequenceFileRecordReader = new PositionalSequenceFileRecordReader();
            positionalSequenceFileRecordReader.initialize(context.getInputSplit(), context);
            int parseInt = Integer.parseInt(name.substring(name.lastIndexOf("-") + 1));
            long position = positionalSequenceFileRecordReader.getPosition();
            while (true) {
                long j = position;
                if (!positionalSequenceFileRecordReader.nextKeyValue()) {
                    positionalSequenceFileRecordReader.close();
                    return;
                }
                output.set((1000000000000000L * parseInt) + j);
                context.write(positionalSequenceFileRecordReader.getCurrentKey(), output);
                context.getCounter(DocVectors.Count).increment(1L);
                position = positionalSequenceFileRecordReader.getPosition();
            }
        }
    }

    /* loaded from: input_file:ivory/core/preprocess/BuildIntDocVectorsForwardIndex$MyReducer.class */
    private static class MyReducer extends Reducer<IntWritable, LongWritable, NullWritable, NullWritable> {
        private FSDataOutputStream out;
        private int collectionDocumentCount;
        private int curDoc = 0;

        private MyReducer() {
        }

        public void setup(Reducer<IntWritable, LongWritable, NullWritable, NullWritable>.Context context) {
            Configuration configuration = context.getConfiguration();
            try {
                FileSystem fileSystem = FileSystem.get(configuration);
                try {
                    RetrievalEnvironment retrievalEnvironment = new RetrievalEnvironment(configuration.get(Constants.IndexPath), fileSystem);
                    String intDocVectorsForwardIndex = retrievalEnvironment.getIntDocVectorsForwardIndex();
                    this.collectionDocumentCount = retrievalEnvironment.readCollectionDocumentCount();
                    try {
                        this.out = fileSystem.create(new Path(intDocVectorsForwardIndex), true);
                        this.out.writeInt(retrievalEnvironment.readDocnoOffset());
                        this.out.writeInt(this.collectionDocumentCount);
                    } catch (Exception e) {
                        throw new RuntimeException("Error in creating files!");
                    }
                } catch (IOException e2) {
                    throw new RuntimeException("Unable to create RetrievalEnvironment!");
                }
            } catch (Exception e3) {
                throw new RuntimeException("Error opening the FileSystem!");
            }
        }

        public void reduce(IntWritable intWritable, Iterable<LongWritable> iterable, Reducer<IntWritable, LongWritable, NullWritable, NullWritable>.Context context) throws IOException, InterruptedException {
            Iterator<LongWritable> it = iterable.iterator();
            long j = it.next().get();
            if (it.hasNext()) {
                throw new RuntimeException("There shouldn't be more than one value, key=" + intWritable);
            }
            this.curDoc++;
            this.out.writeLong(j);
        }

        public void cleanup(Reducer<IntWritable, LongWritable, NullWritable, NullWritable>.Context context) throws IOException {
            this.out.close();
            if (this.curDoc != this.collectionDocumentCount) {
                throw new IOException("Expected " + this.collectionDocumentCount + " docs, actually got " + this.curDoc + " terms!");
            }
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
            reduce((IntWritable) obj, (Iterable<LongWritable>) iterable, (Reducer<IntWritable, LongWritable, NullWritable, NullWritable>.Context) context);
        }
    }

    public BuildIntDocVectorsForwardIndex(Configuration configuration) {
        super(configuration);
    }

    public String[] getRequiredParameters() {
        return RequiredParameters;
    }

    public int runTool() throws Exception {
        Configuration conf = getConf();
        FileSystem fileSystem = FileSystem.get(conf);
        String str = conf.get(Constants.IndexPath);
        RetrievalEnvironment retrievalEnvironment = new RetrievalEnvironment(str, fileSystem);
        String readCollectionName = retrievalEnvironment.readCollectionName();
        LOG.info("Tool: " + BuildIntDocVectorsForwardIndex.class.getSimpleName());
        LOG.info(String.format(" - %s: %s", Constants.CollectionName, readCollectionName));
        LOG.info(String.format(" - %s: %s", Constants.IndexPath, str));
        String intDocVectorsDirectory = retrievalEnvironment.getIntDocVectorsDirectory();
        String intDocVectorsForwardIndex = retrievalEnvironment.getIntDocVectorsForwardIndex();
        if (!fileSystem.exists(new Path(intDocVectorsDirectory))) {
            LOG.info("Error: IntDocVectors don't exist!");
            return 0;
        }
        if (fileSystem.exists(new Path(intDocVectorsForwardIndex))) {
            LOG.info("IntDocVectorIndex already exists: skipping!");
            return 0;
        }
        Job job = Job.getInstance(conf, BuildIntDocVectorsForwardIndex.class.getSimpleName() + ":" + readCollectionName);
        job.setJarByClass(BuildIntDocVectorsForwardIndex.class);
        FileInputFormat.setInputPaths(job, new Path[]{new Path(intDocVectorsDirectory)});
        job.setNumReduceTasks(1);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(NullOutputFormat.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(LongWritable.class);
        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);
        long currentTimeMillis = System.currentTimeMillis();
        job.waitForCompletion(true);
        LOG.info("Job Finished in " + ((System.currentTimeMillis() - currentTimeMillis) / 1000.0d) + " seconds");
        return 0;
    }
}
