package ivory.core.index;

import edu.umd.cloud9.util.PowerTool;
import edu.umd.cloud9.util.map.HMapIV;
import edu.umd.cloud9.util.map.MapIV;
import ivory.core.Constants;
import ivory.core.RetrievalEnvironment;
import ivory.core.data.document.IntDocVector;
import ivory.core.data.index.PostingsAccumulator;
import ivory.core.data.index.PostingsList;
import ivory.core.data.index.PostingsListDocSortedPositional;
import ivory.core.data.index.TermPositions;
import ivory.core.util.QuickSort;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.log4j.Logger;

/* loaded from: input_file:ivory/core/index/BuildLPInvertedIndexDocSorted.class */
public class BuildLPInvertedIndexDocSorted extends PowerTool {
    private static final Logger LOG = Logger.getLogger(BuildLPInvertedIndexDocSorted.class);
    public static final String[] RequiredParameters = {Constants.NumReduceTasks, Constants.IndexPath};

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:ivory/core/index/BuildLPInvertedIndexDocSorted$Docs.class */
    public enum Docs {
        Total
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:ivory/core/index/BuildLPInvertedIndexDocSorted$IndexedTerms.class */
    public enum IndexedTerms {
        Total
    }

    /* loaded from: input_file:ivory/core/index/BuildLPInvertedIndexDocSorted$MapStats.class */
    protected enum MapStats {
        PL1,
        df1
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:ivory/core/index/BuildLPInvertedIndexDocSorted$MapTime.class */
    public enum MapTime {
        Spilling,
        Parsing
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:ivory/core/index/BuildLPInvertedIndexDocSorted$MemoryFlushes.class */
    public enum MemoryFlushes {
        AfterMemoryFilled,
        AfterNDocs,
        AtClose,
        Total
    }

    /* loaded from: input_file:ivory/core/index/BuildLPInvertedIndexDocSorted$MyMapper.class */
    private static class MyMapper extends Mapper<IntWritable, IntDocVector, IntWritable, PostingsListDocSortedPositional> {
        private static final IntWritable TERM = new IntWritable();
        private static final Runtime runtime = Runtime.getRuntime();
        private static float MAP_MEMORY_THRESHOLD = 0.9f;
        private static int MAX_DOCS_BEFORE_FLUSH = 50000;
        private int docno;
        private int collectionDocumentCount;
        private int docs = 0;
        private PostingsListDocSortedPositional postingsList = new PostingsListDocSortedPositional();
        private HMapIV<PostingsAccumulator> partialPostings = new HMapIV<>();

        private MyMapper() {
        }

        public void setup(Mapper<IntWritable, IntDocVector, IntWritable, PostingsListDocSortedPositional>.Context context) {
            Configuration configuration = context.getConfiguration();
            MAP_MEMORY_THRESHOLD = configuration.getFloat(Constants.IndexingMapMemoryThreshold, 0.9f);
            MAX_DOCS_BEFORE_FLUSH = configuration.getInt(Constants.MaxNDocsBeforeFlush, 50000);
            this.collectionDocumentCount = configuration.getInt(Constants.CollectionDocumentCount, 0);
        }

        public void map(IntWritable intWritable, IntDocVector intDocVector, Mapper<IntWritable, IntDocVector, IntWritable, PostingsListDocSortedPositional>.Context context) throws IOException, InterruptedException {
            this.docno = intWritable.get();
            flushPostings(false, context);
            long currentTimeMillis = System.currentTimeMillis();
            IntDocVector.Reader reader = intDocVector.getReader();
            int i = 0;
            while (true) {
                int i2 = i;
                if (!reader.hasMoreTerms()) {
                    context.getCounter(MapTime.Parsing).increment(System.currentTimeMillis() - currentTimeMillis);
                    context.getCounter(IndexedTerms.Total).increment(i2);
                    this.docs++;
                    flushPostings(false, context);
                    context.getCounter(Docs.Total).increment(1L);
                    return;
                }
                int nextTerm = reader.nextTerm();
                int[] positions = reader.getPositions();
                PostingsAccumulator postingsAccumulator = (PostingsAccumulator) this.partialPostings.get(nextTerm);
                if (postingsAccumulator == null) {
                    postingsAccumulator = new PostingsAccumulator();
                    this.partialPostings.put(nextTerm, postingsAccumulator);
                }
                postingsAccumulator.add(this.docno, positions);
                i = i2 + positions.length;
            }
        }

        private boolean flushPostings(boolean z, Mapper<IntWritable, IntDocVector, IntWritable, PostingsListDocSortedPositional>.Context context) throws IOException, InterruptedException {
            if (!z) {
                float freeMemory = 1.0f - ((((float) runtime.freeMemory()) * 1.0f) / ((float) runtime.totalMemory()));
                context.setStatus("m" + freeMemory);
                if (freeMemory < MAP_MEMORY_THRESHOLD && this.docs % MAX_DOCS_BEFORE_FLUSH != 0) {
                    return false;
                }
                if (freeMemory >= MAP_MEMORY_THRESHOLD) {
                    context.getCounter(MemoryFlushes.AfterMemoryFilled).increment(1L);
                } else {
                    context.getCounter(MemoryFlushes.AfterNDocs).increment(1L);
                }
            }
            if (this.partialPostings.size() == 0) {
                return true;
            }
            TermPositions termPositions = new TermPositions();
            long currentTimeMillis = System.currentTimeMillis();
            for (MapIV.Entry entry : this.partialPostings.entrySet()) {
                TERM.set(entry.getKey());
                context.setStatus("t" + TERM.get());
                PostingsAccumulator postingsAccumulator = (PostingsAccumulator) entry.getValue();
                this.postingsList.clear();
                this.postingsList.setCollectionDocumentCount(this.collectionDocumentCount);
                this.postingsList.setNumberOfPostings(postingsAccumulator.size());
                int[] docnos = postingsAccumulator.getDocnos();
                int[][] positions = postingsAccumulator.getPositions();
                QuickSort.quicksortWithStack(positions, docnos, 0, postingsAccumulator.size() - 1);
                for (int i = 0; i < postingsAccumulator.size(); i++) {
                    termPositions.set(positions[i], (short) positions[i].length);
                    this.postingsList.add(docnos[i], termPositions.getTf(), termPositions);
                }
                context.write(TERM, this.postingsList);
            }
            context.getCounter(MapTime.Spilling).increment(System.currentTimeMillis() - currentTimeMillis);
            this.partialPostings.clear();
            return true;
        }

        public void cleanup(Mapper<IntWritable, IntDocVector, IntWritable, PostingsListDocSortedPositional>.Context context) throws IOException, InterruptedException {
            if (this.partialPostings.size() > 0) {
                flushPostings(true, context);
                context.getCounter(MemoryFlushes.AtClose).increment(1L);
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
            map((IntWritable) obj, (IntDocVector) obj2, (Mapper<IntWritable, IntDocVector, IntWritable, PostingsListDocSortedPositional>.Context) context);
        }
    }

    /* loaded from: input_file:ivory/core/index/BuildLPInvertedIndexDocSorted$MyReducer.class */
    public static class MyReducer extends Reducer<IntWritable, PostingsListDocSortedPositional, IntWritable, PostingsListDocSortedPositional> {
        private static float REDUCE_MEMORY_THRESHOLD = 0.9f;
        private static final Runtime runtime = Runtime.getRuntime();
        private int collectionDocumentCount = 0;
        private List<PostingsList> mergedList = new ArrayList();
        private List<PostingsList> incomingLists = new ArrayList();
        private PostingsListDocSortedPositional finalPostingsList = new PostingsListDocSortedPositional();

        public void setup(Reducer<IntWritable, PostingsListDocSortedPositional, IntWritable, PostingsListDocSortedPositional>.Context context) {
            Configuration configuration = context.getConfiguration();
            REDUCE_MEMORY_THRESHOLD = configuration.getFloat(Constants.IndexingReduceMemoryThreshold, 0.9f);
            this.collectionDocumentCount = configuration.getInt(Constants.CollectionDocumentCount, 0);
        }

        public void reduce(IntWritable intWritable, Iterable<PostingsListDocSortedPositional> iterable, Reducer<IntWritable, PostingsListDocSortedPositional, IntWritable, PostingsListDocSortedPositional>.Context context) throws IOException, InterruptedException {
            context.setStatus("t" + intWritable);
            long currentTimeMillis = System.currentTimeMillis();
            Iterator<PostingsListDocSortedPositional> it = iterable.iterator();
            PostingsListDocSortedPositional next = it.next();
            if (it.hasNext()) {
                this.mergedList.clear();
                this.incomingLists.clear();
                this.incomingLists.add(PostingsListDocSortedPositional.create(next.serialize()));
                do {
                    this.incomingLists.add(PostingsListDocSortedPositional.create(it.next().serialize()));
                    mergeLists(false, this.incomingLists, this.mergedList, context);
                } while (it.hasNext());
                mergeLists(true, this.incomingLists, this.mergedList, context);
                if (this.mergedList.size() == 1) {
                    context.write(intWritable, (PostingsListDocSortedPositional) this.mergedList.get(0));
                } else {
                    BuildLPInvertedIndexDocSorted.LOG.info("Merging the master list");
                    this.finalPostingsList.clear();
                    PostingsListDocSortedPositional.mergeList(this.finalPostingsList, this.mergedList, this.collectionDocumentCount);
                    context.write(intWritable, this.finalPostingsList);
                }
            } else {
                context.write(intWritable, next);
                context.getCounter(Reduce.OnePL).increment(1L);
            }
            context.getCounter(ReduceTime.Total).increment(System.currentTimeMillis() - currentTimeMillis);
        }

        private boolean mergeLists(boolean z, List<PostingsList> list, List<PostingsList> list2, Reducer<IntWritable, PostingsListDocSortedPositional, IntWritable, PostingsListDocSortedPositional>.Context context) throws IOException {
            if (list.size() == 0) {
                return false;
            }
            float freeMemory = 1.0f - ((((float) runtime.freeMemory()) * 1.0f) / ((float) runtime.totalMemory()));
            context.setStatus("m" + freeMemory);
            if (!z && freeMemory < REDUCE_MEMORY_THRESHOLD) {
                return false;
            }
            long currentTimeMillis = System.currentTimeMillis();
            BuildLPInvertedIndexDocSorted.LOG.info(">> merging a list of " + list.size() + " partial lists");
            if (list.size() > 1) {
                PostingsListDocSortedPositional postingsListDocSortedPositional = new PostingsListDocSortedPositional();
                PostingsListDocSortedPositional.mergeList(postingsListDocSortedPositional, list, this.collectionDocumentCount);
                list.clear();
                list2.add(PostingsListDocSortedPositional.create(postingsListDocSortedPositional.serialize()));
                context.getCounter(Reduce.Merges).increment(1L);
            } else {
                PostingsList remove = list.remove(0);
                remove.setCollectionDocumentCount(this.collectionDocumentCount);
                list2.add(remove);
            }
            context.getCounter(ReduceTime.Merging).increment(System.currentTimeMillis() - currentTimeMillis);
            return true;
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
            reduce((IntWritable) obj, (Iterable<PostingsListDocSortedPositional>) iterable, (Reducer<IntWritable, PostingsListDocSortedPositional, IntWritable, PostingsListDocSortedPositional>.Context) context);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:ivory/core/index/BuildLPInvertedIndexDocSorted$Reduce.class */
    public enum Reduce {
        Merges,
        OnePL
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:ivory/core/index/BuildLPInvertedIndexDocSorted$ReduceTime.class */
    public enum ReduceTime {
        Total,
        Merging,
        Spilling
    }

    public String[] getRequiredParameters() {
        return RequiredParameters;
    }

    public BuildLPInvertedIndexDocSorted(Configuration configuration) {
        super(configuration);
    }

    public int runTool() throws Exception {
        Configuration conf = getConf();
        FileSystem fileSystem = FileSystem.get(conf);
        String str = conf.get(Constants.IndexPath);
        RetrievalEnvironment retrievalEnvironment = new RetrievalEnvironment(str, fileSystem);
        String readCollectionName = retrievalEnvironment.readCollectionName();
        int i = conf.getInt(Constants.NumReduceTasks, 0);
        int i2 = conf.getInt(Constants.MinSplitSize, 0);
        int readCollectionDocumentCount = retrievalEnvironment.readCollectionDocumentCount();
        Class<?> cls = Class.forName(conf.get(Constants.PostingsListsType, PostingsListDocSortedPositional.class.getCanonicalName()));
        float f = conf.getFloat(Constants.IndexingMapMemoryThreshold, 0.9f);
        float f2 = conf.getFloat(Constants.IndexingReduceMemoryThreshold, 0.9f);
        int i3 = conf.getInt(Constants.MaxHeap, 2048);
        int i4 = conf.getInt(Constants.MaxNDocsBeforeFlush, 50000);
        LOG.info("PowerTool: " + BuildLPInvertedIndexDocSorted.class.getSimpleName());
        LOG.info(String.format(" - %s: %s", Constants.IndexPath, str));
        LOG.info(String.format(" - %s: %s", Constants.CollectionName, readCollectionName));
        LOG.info(String.format(" - %s: %s", Constants.CollectionDocumentCount, Integer.valueOf(readCollectionDocumentCount)));
        LOG.info(String.format(" - %s: %s", Constants.PostingsListsType, cls.getCanonicalName()));
        LOG.info(String.format(" - %s: %s", Constants.NumReduceTasks, Integer.valueOf(i)));
        LOG.info(String.format(" - %s: %s", Constants.MinSplitSize, Integer.valueOf(i2)));
        LOG.info(String.format(" - %s: %s", Constants.IndexingMapMemoryThreshold, Float.valueOf(f)));
        LOG.info(String.format(" - %s: %s", Constants.IndexingReduceMemoryThreshold, Float.valueOf(f2)));
        LOG.info(String.format(" - %s: %s", Constants.MaxHeap, Integer.valueOf(i3)));
        LOG.info(String.format(" - %s: %s", Constants.MaxNDocsBeforeFlush, Integer.valueOf(i4)));
        if (!fileSystem.exists(new Path(str))) {
            fileSystem.mkdirs(new Path(str));
        }
        Path path = new Path(retrievalEnvironment.getIntDocVectorsDirectory());
        Path path2 = new Path(retrievalEnvironment.getPostingsDirectory());
        if (fileSystem.exists(path2)) {
            LOG.info("Postings already exist: no indexing will be performed.");
            return 0;
        }
        conf.setInt(Constants.CollectionDocumentCount, readCollectionDocumentCount);
        conf.setInt("mapred.min.split.size", i2);
        conf.set("mapreduce.map.memory.mb", "2048");
        conf.set("mapreduce.map.java.opts", "-Xmx2048m");
        conf.set("mapreduce.reduce.memory.mb", "2048");
        conf.set("mapreduce.reduce.java.opts", "-Xmx2048m");
        Job job = Job.getInstance(conf, BuildLPInvertedIndexDocSorted.class.getSimpleName() + ":" + readCollectionName);
        job.setJarByClass(BuildLPInvertedIndexDocSorted.class);
        job.setNumReduceTasks(i);
        FileInputFormat.setInputPaths(job, new Path[]{path});
        FileOutputFormat.setOutputPath(job, path2);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(PostingsListDocSortedPositional.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(PostingsListDocSortedPositional.class);
        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);
        long currentTimeMillis = System.currentTimeMillis();
        job.waitForCompletion(true);
        LOG.info("Job Finished in " + ((System.currentTimeMillis() - currentTimeMillis) / 1000.0d) + " seconds");
        retrievalEnvironment.writePostingsType("ivory.data.PostingsListDocSortedPositional");
        return 0;
    }
}
