package ivory.core.index;

import edu.umd.cloud9.io.pair.PairOfInts;
import edu.umd.cloud9.util.PowerTool;
import edu.umd.cloud9.util.map.HMapII;
import edu.umd.cloud9.util.map.MapII;
import ivory.core.Constants;
import ivory.core.RetrievalEnvironment;
import ivory.core.data.document.IntDocVector;
import ivory.core.data.index.PostingsList;
import ivory.core.data.index.PostingsListDocSortedPositional;
import ivory.core.data.index.TermPositions;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;

/* loaded from: input_file:ivory/core/index/BuildIPInvertedIndexDocSorted.class */
public class BuildIPInvertedIndexDocSorted extends PowerTool {
    private static final Logger LOG = Logger.getLogger(BuildIPInvertedIndexDocSorted.class);
    public static final String[] RequiredParameters = {Constants.NumReduceTasks, Constants.IndexPath};

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:ivory/core/index/BuildIPInvertedIndexDocSorted$Docs.class */
    public enum Docs {
        Total
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:ivory/core/index/BuildIPInvertedIndexDocSorted$IndexedTerms.class */
    public enum IndexedTerms {
        Unique,
        Total
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:ivory/core/index/BuildIPInvertedIndexDocSorted$MapTime.class */
    public enum MapTime {
        Total
    }

    /* loaded from: input_file:ivory/core/index/BuildIPInvertedIndexDocSorted$MyMapper.class */
    private static class MyMapper extends Mapper<IntWritable, IntDocVector, PairOfInts, TermPositions> {
        private static final TermPositions termPositions = new TermPositions();
        private static final PairOfInts pair = new PairOfInts();
        private static final HMapII dfs = new HMapII();
        private int docno;

        private MyMapper() {
        }

        public void setup(Mapper<IntWritable, IntDocVector, PairOfInts, TermPositions>.Context context) {
            dfs.clear();
        }

        public void map(IntWritable intWritable, IntDocVector intDocVector, Mapper<IntWritable, IntDocVector, PairOfInts, TermPositions>.Context context) throws IOException, InterruptedException {
            this.docno = intWritable.get();
            long currentTimeMillis = System.currentTimeMillis();
            IntDocVector.Reader reader = intDocVector.getReader();
            int i = 0;
            while (reader.hasMoreTerms()) {
                int nextTerm = reader.nextTerm();
                reader.getPositions(termPositions);
                pair.set(nextTerm, this.docno);
                context.write(pair, termPositions);
                i += termPositions.getTf();
                dfs.increment(nextTerm);
            }
            context.getCounter(IndexedTerms.Total).increment(i);
            context.getCounter(Docs.Total).increment(1L);
            context.getCounter(MapTime.Total).increment(System.currentTimeMillis() - currentTimeMillis);
        }

        public void cleanup(Mapper<IntWritable, IntDocVector, PairOfInts, TermPositions>.Context context) throws IOException, InterruptedException {
            int[] iArr = new int[1];
            for (MapII.Entry entry : dfs.entrySet()) {
                iArr[0] = entry.getValue();
                termPositions.set(iArr, (short) 1);
                pair.set(entry.getKey(), -1);
                context.write(pair, termPositions);
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
            map((IntWritable) obj, (IntDocVector) obj2, (Mapper<IntWritable, IntDocVector, PairOfInts, TermPositions>.Context) context);
        }
    }

    /* loaded from: input_file:ivory/core/index/BuildIPInvertedIndexDocSorted$MyPartitioner.class */
    private static class MyPartitioner extends Partitioner<PairOfInts, TermPositions> {
        private MyPartitioner() {
        }

        public int getPartition(PairOfInts pairOfInts, TermPositions termPositions, int i) {
            return (pairOfInts.getLeftElement() & Integer.MAX_VALUE) % i;
        }
    }

    /* loaded from: input_file:ivory/core/index/BuildIPInvertedIndexDocSorted$MyReducer.class */
    private static class MyReducer extends Reducer<PairOfInts, TermPositions, IntWritable, PostingsList> {
        private static final IntWritable term = new IntWritable();
        private static PostingsList postings;
        private int prevTerm = -1;
        private int numPostings = 0;

        private MyReducer() {
        }

        public void setup(Reducer<PairOfInts, TermPositions, IntWritable, PostingsList>.Context context) {
            BuildIPInvertedIndexDocSorted.LOG.setLevel(Level.WARN);
            int i = context.getConfiguration().getInt(Constants.CollectionDocumentCount, 0);
            if (i == 0) {
                throw new RuntimeException("Error: size of collection cannot be zero!");
            }
            try {
                postings = (PostingsList) Class.forName(context.getConfiguration().get(Constants.PostingsListsType, PostingsListDocSortedPositional.class.getCanonicalName())).newInstance();
                postings.setCollectionDocumentCount(i);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }

        public void reduce(PairOfInts pairOfInts, Iterable<TermPositions> iterable, Reducer<PairOfInts, TermPositions, IntWritable, PostingsList>.Context context) throws IOException, InterruptedException {
            long currentTimeMillis = System.currentTimeMillis();
            int leftElement = pairOfInts.getLeftElement();
            if (pairOfInts.getRightElement() != -1) {
                Iterator<TermPositions> it = iterable.iterator();
                TermPositions next = it.next();
                postings.add(pairOfInts.getRightElement(), next.getTf(), next);
                if (it.hasNext()) {
                    throw new RuntimeException(String.format("Error: values with the same (term, docno): docno=%d, term=%d", Integer.valueOf(pairOfInts.getRightElement()), Integer.valueOf(leftElement)));
                }
                this.prevTerm = leftElement;
                context.getCounter(ReduceTime.Total).increment(System.currentTimeMillis() - currentTimeMillis);
                return;
            }
            if (this.prevTerm != -1 && leftElement != this.prevTerm) {
                if (this.numPostings != postings.size()) {
                    throw new RuntimeException(String.format("Error: actual number of postings processed is different from expected! expected: %d, got: %d for term %d", Integer.valueOf(this.numPostings), Integer.valueOf(postings.size()), Integer.valueOf(this.prevTerm)));
                }
                term.set(this.prevTerm);
                context.write(term, postings);
                context.getCounter(IndexedTerms.Unique).increment(1L);
                BuildIPInvertedIndexDocSorted.LOG.info(String.format("Finished processing postings for term %d (num postings=%d)", Integer.valueOf(this.prevTerm), Integer.valueOf(postings.size())));
                postings.clear();
            }
            this.numPostings = 0;
            Iterator<TermPositions> it2 = iterable.iterator();
            while (it2.hasNext()) {
                this.numPostings += it2.next().getPositions()[0];
            }
            postings.setNumberOfPostings(this.numPostings);
        }

        public void cleanup(Reducer<PairOfInts, TermPositions, IntWritable, PostingsList>.Context context) throws IOException, InterruptedException {
            long currentTimeMillis = System.currentTimeMillis();
            if (this.numPostings != postings.size()) {
                throw new RuntimeException(String.format("Error: actual number of postings processed is different from expected! expected: %d, got: %d for term %d", Integer.valueOf(this.numPostings), Integer.valueOf(postings.size()), Integer.valueOf(this.prevTerm)));
            }
            term.set(this.prevTerm);
            context.write(term, postings);
            context.getCounter(IndexedTerms.Unique).increment(1L);
            BuildIPInvertedIndexDocSorted.LOG.info(String.format("Finished processing postings for term %d (num postings=%d)", Integer.valueOf(this.prevTerm), Integer.valueOf(postings.size())));
            context.getCounter(ReduceTime.Total).increment(System.currentTimeMillis() - currentTimeMillis);
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
            reduce((PairOfInts) obj, (Iterable<TermPositions>) iterable, (Reducer<PairOfInts, TermPositions, IntWritable, PostingsList>.Context) context);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:ivory/core/index/BuildIPInvertedIndexDocSorted$ReduceTime.class */
    public enum ReduceTime {
        Total
    }

    public String[] getRequiredParameters() {
        return RequiredParameters;
    }

    public BuildIPInvertedIndexDocSorted(Configuration configuration) {
        super(configuration);
    }

    public int runTool() throws Exception {
        Configuration conf = getConf();
        FileSystem fileSystem = FileSystem.get(conf);
        String str = conf.get(Constants.IndexPath);
        RetrievalEnvironment retrievalEnvironment = new RetrievalEnvironment(str, fileSystem);
        String readCollectionName = retrievalEnvironment.readCollectionName();
        int i = conf.getInt(Constants.NumReduceTasks, 0);
        int i2 = conf.getInt(Constants.MinSplitSize, 0);
        int readCollectionDocumentCount = retrievalEnvironment.readCollectionDocumentCount();
        Class<?> cls = Class.forName(conf.get(Constants.PostingsListsType, PostingsListDocSortedPositional.class.getCanonicalName()));
        LOG.info("PowerTool: " + BuildIPInvertedIndexDocSorted.class.getSimpleName());
        LOG.info(String.format(" - %s: %s", Constants.IndexPath, str));
        LOG.info(String.format(" - %s: %s", Constants.CollectionName, readCollectionName));
        LOG.info(String.format(" - %s: %s", Constants.CollectionDocumentCount, Integer.valueOf(readCollectionDocumentCount)));
        LOG.info(String.format(" - %s: %s", Constants.PostingsListsType, cls.getCanonicalName()));
        LOG.info(String.format(" - %s: %s", Constants.NumReduceTasks, Integer.valueOf(i)));
        LOG.info(String.format(" - %s: %s", Constants.MinSplitSize, Integer.valueOf(i2)));
        if (!fileSystem.exists(new Path(str))) {
            fileSystem.mkdirs(new Path(str));
        }
        Path path = new Path(retrievalEnvironment.getIntDocVectorsDirectory());
        Path path2 = new Path(retrievalEnvironment.getPostingsDirectory());
        if (fileSystem.exists(path2)) {
            LOG.info("Postings already exist: no indexing will be performed.");
            return 0;
        }
        conf.setInt(Constants.CollectionDocumentCount, readCollectionDocumentCount);
        conf.setInt("mapred.min.split.size", i2);
        conf.set("mapreduce.map.memory.mb", "3072");
        conf.set("mapreduce.map.java.opts", "-Xmx3072m");
        conf.set("mapreduce.reduce.memory.mb", "3072");
        conf.set("mapreduce.reduce.java.opts", "-Xmx3072m");
        Job job = Job.getInstance(conf, BuildIPInvertedIndexDocSorted.class.getSimpleName() + ":" + readCollectionName);
        job.setJarByClass(BuildIPInvertedIndexDocSorted.class);
        job.setNumReduceTasks(i);
        FileInputFormat.setInputPaths(job, new Path[]{path});
        FileOutputFormat.setOutputPath(job, path2);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setMapOutputKeyClass(PairOfInts.class);
        job.setMapOutputValueClass(TermPositions.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(cls);
        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);
        job.setPartitionerClass(MyPartitioner.class);
        long currentTimeMillis = System.currentTimeMillis();
        job.waitForCompletion(true);
        LOG.info("Job Finished in " + ((System.currentTimeMillis() - currentTimeMillis) / 1000.0d) + " seconds");
        retrievalEnvironment.writePostingsType(cls.getCanonicalName());
        return 0;
    }
}
