package ivory.pwsim;

import edu.umd.cloud9.io.map.HMapIFW;
import edu.umd.cloud9.util.PowerTool;
import edu.umd.cloud9.util.map.MapIF;
import ivory.core.Constants;
import ivory.core.RetrievalEnvironment;
import ivory.core.data.index.Posting;
import ivory.core.data.index.PostingsList;
import ivory.core.data.index.PostingsReader;
import ivory.core.data.stat.DocLengthTable;
import ivory.core.data.stat.DocLengthTable2B;
import ivory.pwsim.score.ScoringModel;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;

/* loaded from: input_file:ivory/pwsim/PCP.class */
public class PCP extends PowerTool {
    private static final Logger sLogger = Logger.getLogger(PCP.class);
    public static final String[] RequiredParameters = {Constants.IndexPath, "Ivory.OutputPath", Constants.NumMapTasks, Constants.NumReduceTasks, "Ivory.ScoringModel", "Ivory.DfCut", "Ivory.BlockSize", "Ivory.TopN"};

    /* loaded from: input_file:ivory/pwsim/PCP$MyMapper.class */
    private static class MyMapper extends MapReduceBase implements Mapper<IntWritable, PostingsList, IntWritable, HMapIFW> {
        private DocLengthTable mDocLengthTable;
        private ScoringModel mModel;
        private int dfCut;
        private int mBlockStart;
        private int mBlockEnd;
        private int mCollectionDocCount;
        Posting e1 = new Posting();
        Posting e2 = new Posting();

        private MyMapper() {
        }

        public void configure(JobConf jobConf) {
            this.mCollectionDocCount = jobConf.getInt(Constants.CollectionDocumentCount, -1);
            try {
                if (jobConf.get("mapred.job.tracker").equals("local")) {
                    LocalFileSystem local = FileSystem.getLocal(jobConf);
                    Path doclengthsData = new RetrievalEnvironment(jobConf.get(Constants.IndexPath), local).getDoclengthsData();
                    PCP.sLogger.debug("Reading doclengths: " + doclengthsData);
                    this.mDocLengthTable = new DocLengthTable2B(doclengthsData, local);
                } else {
                    this.mDocLengthTable = new DocLengthTable2B(DistributedCache.getLocalCacheFiles(jobConf)[0], FileSystem.getLocal(jobConf));
                }
                this.dfCut = jobConf.getInt("Ivory.DfCut", -1);
                this.mBlockStart = jobConf.getInt("Ivory.BlockStart", -1);
                this.mBlockEnd = jobConf.getInt("Ivory.BlockEnd", -1);
                if (this.dfCut <= 0 || this.mBlockStart < 0 || this.mBlockEnd <= 0) {
                    throw new RuntimeException("Invalid config parameter(s): dfCut=" + this.dfCut + ", blockStart=" + this.mBlockStart + ", blockEnd=" + this.mBlockEnd);
                }
                try {
                    this.mModel = (ScoringModel) Class.forName(jobConf.get("Ivory.ScoringModel")).newInstance();
                    this.mModel.setDocCount(this.mDocLengthTable.getDocCount());
                    this.mModel.setAvgDocLength(this.mDocLengthTable.getAvgDocLength());
                } catch (Exception e) {
                    throw new RuntimeException("Mappers failed to initialize!");
                }
            } catch (Exception e2) {
                throw new RuntimeException("Error initializing DocLengthTable!");
            }
        }

        public void map(IntWritable intWritable, PostingsList postingsList, OutputCollector<IntWritable, HMapIFW> outputCollector, Reporter reporter) throws IOException {
            PCP.sLogger.debug(Integer.valueOf(this.mCollectionDocCount));
            postingsList.setCollectionDocumentCount(this.mCollectionDocCount);
            PostingsReader postingsReader = postingsList.getPostingsReader();
            if (postingsReader.getNumberOfPostings() > this.dfCut) {
                return;
            }
            this.mModel.setDF(postingsReader.getNumberOfPostings());
            while (postingsReader.nextPosting(this.e1)) {
                if (this.e1.getDocno() >= this.mBlockStart) {
                    if (this.e1.getDocno() >= this.mBlockEnd) {
                        return;
                    }
                    HMapIFW hMapIFW = new HMapIFW();
                    PCP.sLogger.debug(intWritable + ": " + this.e1);
                    PostingsReader postingsReader2 = postingsList.getPostingsReader();
                    while (postingsReader2.nextPosting(this.e2)) {
                        PCP.sLogger.debug(intWritable + ": " + this.e1 + ", " + this.e2);
                        if (this.e1.getDocno() != this.e2.getDocno()) {
                            hMapIFW.put(this.e2.getDocno(), this.mModel.computeScore(this.e1.getTf(), this.e2.getTf(), this.mDocLengthTable.getDocLength(this.e1.getDocno()), this.mDocLengthTable.getDocLength(this.e2.getDocno())));
                        }
                    }
                    outputCollector.collect(new IntWritable(this.e1.getDocno()), hMapIFW);
                }
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((IntWritable) obj, (PostingsList) obj2, (OutputCollector<IntWritable, HMapIFW>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:ivory/pwsim/PCP$MyReducer.class */
    private static class MyReducer extends MapReduceBase implements Reducer<IntWritable, HMapIFW, IntWritable, HMapIFW> {
        HMapIFW map = new HMapIFW();
        HMapIFW newMap = new HMapIFW();
        int topN = -1;

        private MyReducer() {
        }

        public void configure(JobConf jobConf) {
            this.topN = jobConf.getInt("Ivory.TopN", -1);
        }

        public void reduce(IntWritable intWritable, Iterator<HMapIFW> it, OutputCollector<IntWritable, HMapIFW> outputCollector, Reporter reporter) throws IOException {
            this.map.clear();
            while (it.hasNext()) {
                this.map.plus(it.next());
            }
            this.newMap.clear();
            if (this.topN > 0) {
                int i = 0;
                for (MapIF.Entry entry : this.map.getEntriesSortedByValue()) {
                    if (i >= this.topN) {
                        break;
                    }
                    this.newMap.put(entry.getKey(), entry.getValue());
                    i++;
                }
            } else {
                for (MapIF.Entry entry2 : this.map.getEntriesSortedByValue()) {
                    this.newMap.put(entry2.getKey(), entry2.getValue());
                }
            }
            outputCollector.collect(intWritable, this.newMap);
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((IntWritable) obj, (Iterator<HMapIFW>) it, (OutputCollector<IntWritable, HMapIFW>) outputCollector, reporter);
        }
    }

    public PCP(Configuration configuration) {
        super(configuration);
        sLogger.setLevel(Level.INFO);
    }

    public String[] getRequiredParameters() {
        return RequiredParameters;
    }

    public int runTool() throws Exception {
        String str = getConf().get(Constants.IndexPath);
        String str2 = getConf().get("Ivory.OutputPath");
        int i = getConf().getInt(Constants.NumMapTasks, 0);
        int i2 = getConf().getInt(Constants.NumReduceTasks, 0);
        int i3 = getConf().getInt("Ivory.DfCut", -1);
        int i4 = getConf().getInt("Ivory.BlockSize", -1);
        int i5 = getConf().getInt("Ivory.TopN", -1);
        FileSystem fileSystem = FileSystem.get(getConf());
        RetrievalEnvironment retrievalEnvironment = new RetrievalEnvironment(str, fileSystem);
        String readCollectionName = retrievalEnvironment.readCollectionName();
        int readCollectionDocumentCount = retrievalEnvironment.readCollectionDocumentCount();
        Path doclengthsData = retrievalEnvironment.getDoclengthsData();
        String str3 = getConf().get("Ivory.ScoringModel");
        sLogger.info("Characteristics of the collection:");
        sLogger.info(" - CollectionName: " + readCollectionName);
        sLogger.info(" - IndexPath: " + str);
        sLogger.info("Characteristics of the job:");
        sLogger.info(" - NumMapTasks: " + i);
        sLogger.info(" - NumReduceTasks: " + i2);
        sLogger.info(" - DfCut: " + getConf().getInt("Ivory.DfCut", 0));
        sLogger.info(" - BlockSize: " + i4);
        sLogger.info(" - ScoringModel: " + str3);
        sLogger.info(" - topN: " + i5);
        sLogger.info(" - OutputPath: " + str2);
        getConf().setInt(Constants.CollectionDocumentCount, readCollectionDocumentCount);
        if (fileSystem.exists(new Path(str2))) {
            System.out.println("PCP output path already exists!");
            return 0;
        }
        int i6 = (readCollectionDocumentCount / i4) + 1;
        int i7 = 0;
        while (i7 < i6) {
            int i8 = i4 * i7;
            int i9 = i7 == i6 - 1 ? readCollectionDocumentCount : i4 * (i7 + 1);
            JobConf jobConf = new JobConf(getConf(), PCP.class);
            DistributedCache.addCacheFile(doclengthsData.toUri(), jobConf);
            sLogger.info("block " + i7 + ": " + i8 + "-" + i9);
            jobConf.setInt("Ivory.BlockStart", i8);
            jobConf.setInt("Ivory.BlockEnd", i9);
            jobConf.setJobName("PCP:" + readCollectionName + "-dfCut=" + i3 + (i5 > 0 ? "-topN" + i5 : "-all") + ":Block #" + i7);
            jobConf.setNumMapTasks(i);
            jobConf.setNumReduceTasks(i2);
            FileInputFormat.setInputPaths(jobConf, new Path[]{new Path(retrievalEnvironment.getPostingsDirectory())});
            FileOutputFormat.setOutputPath(jobConf, new Path(str2 + "/block" + i7));
            jobConf.setInputFormat(SequenceFileInputFormat.class);
            jobConf.setOutputKeyClass(IntWritable.class);
            jobConf.setOutputValueClass(HMapIFW.class);
            jobConf.setOutputFormat(SequenceFileOutputFormat.class);
            jobConf.setMapperClass(MyMapper.class);
            jobConf.setCombinerClass(IdentityReducer.class);
            jobConf.setReducerClass(MyReducer.class);
            JobClient.runJob(jobConf);
            i7++;
        }
        return 0;
    }
}
