package ivory.core.index;

import edu.umd.cloud9.io.pair.PairOfIntLong;
import edu.umd.cloud9.util.PowerTool;
import ivory.core.Constants;
import ivory.core.RetrievalEnvironment;
import ivory.core.data.dictionary.DefaultFrequencySortedDictionary;
import ivory.core.data.index.PostingsList;
import ivory.core.data.index.PostingsListDocSortedPositional;
import ivory.core.data.stat.PrefixEncodedGlobalStats;
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.log4j.Logger;

/* loaded from: input_file:ivory/core/index/DistributeGlobalStatsToPostings.class */
public class DistributeGlobalStatsToPostings extends PowerTool {
    private static final Logger sLogger = Logger.getLogger(DistributeGlobalStatsToPostings.class);
    public static final String[] RequiredParameters = {Constants.IndexPath, "Ivory.GlobalStatsPath", Constants.NumMapTasks};

    /* loaded from: input_file:ivory/core/index/DistributeGlobalStatsToPostings$MyMapper.class */
    private static class MyMapper extends MapReduceBase implements Mapper<IntWritable, PostingsList, IntWritable, PostingsList> {
        private PrefixEncodedGlobalStats gs;
        private DefaultFrequencySortedDictionary mTermIdMap;

        private MyMapper() {
        }

        public void configure(JobConf jobConf) {
            try {
                Path[] localCacheFiles = DistributedCache.getLocalCacheFiles(jobConf);
                DistributeGlobalStatsToPostings.sLogger.info("0: " + localCacheFiles[0]);
                DistributeGlobalStatsToPostings.sLogger.info("1: " + localCacheFiles[1]);
                DistributeGlobalStatsToPostings.sLogger.info("2: " + localCacheFiles[2]);
                DistributeGlobalStatsToPostings.sLogger.info("3: " + localCacheFiles[3]);
                DistributeGlobalStatsToPostings.sLogger.info("4: " + localCacheFiles[4]);
                DistributeGlobalStatsToPostings.sLogger.info("5: " + localCacheFiles[5]);
                FileSystem local = FileSystem.getLocal(jobConf);
                this.gs = new PrefixEncodedGlobalStats(localCacheFiles[0], local);
                this.gs.loadDFStats(localCacheFiles[1], local);
                this.gs.loadCFStats(localCacheFiles[2], local);
                DistributeGlobalStatsToPostings.sLogger.info("loading TermIdMap from " + jobConf.get(Constants.IndexPath));
                this.mTermIdMap = new DefaultFrequencySortedDictionary(localCacheFiles[3], localCacheFiles[4], localCacheFiles[5], local);
            } catch (IOException e) {
                e.printStackTrace();
                throw new RuntimeException("Error loading global term stats!");
            }
        }

        public void map(IntWritable intWritable, PostingsList postingsList, OutputCollector<IntWritable, PostingsList> outputCollector, Reporter reporter) throws IOException {
            PairOfIntLong stats = this.gs.getStats(this.mTermIdMap.getTerm(intWritable.get()));
            if (stats == null) {
                postingsList.setCf(-1L);
                postingsList.setDf(-1);
            } else {
                postingsList.setCf(stats.getRightElement());
                postingsList.setDf(stats.getLeftElement());
            }
            outputCollector.collect(intWritable, postingsList);
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((IntWritable) obj, (PostingsList) obj2, (OutputCollector<IntWritable, PostingsList>) outputCollector, reporter);
        }
    }

    public String[] getRequiredParameters() {
        return RequiredParameters;
    }

    public DistributeGlobalStatsToPostings(Configuration configuration) {
        super(configuration);
    }

    public int runTool() throws Exception {
        sLogger.info("Distributing df/cf stats...");
        JobConf jobConf = new JobConf(getConf(), DistributeGlobalStatsToPostings.class);
        FileSystem fileSystem = FileSystem.get(jobConf);
        int i = jobConf.getInt(Constants.NumMapTasks, 0);
        int i2 = jobConf.getInt(Constants.NumReduceTasks, 1);
        String str = jobConf.get(Constants.IndexPath);
        String str2 = jobConf.get("Ivory.GlobalStatsPath");
        RetrievalEnvironment retrievalEnvironment = new RetrievalEnvironment(str, fileSystem);
        String readCollectionName = retrievalEnvironment.readCollectionName();
        sLogger.info(" - CollectionName: " + readCollectionName);
        sLogger.info(" - IndexPath: " + str);
        sLogger.info(" - NumMapTasks: " + i);
        sLogger.info(" - NumReduceTasks: " + i2);
        Path path = new Path(String.valueOf(str) + "/property.CollectionDocumentCount");
        Path path2 = new Path(String.valueOf(str) + "/property.CollectionDocumentCount.local");
        if (!fileSystem.exists(path2)) {
            sLogger.info("preserving local " + path.getName());
            fileSystem.rename(path, path2);
        }
        Path path3 = new Path(String.valueOf(str) + "/property.CollectionAverageDocumentLength");
        Path path4 = new Path(String.valueOf(str) + "/property.CollectionAverageDocumentLength.local");
        if (!fileSystem.exists(path4)) {
            sLogger.info("preserving local " + path3.getName());
            fileSystem.rename(path3, path4);
        }
        Path path5 = new Path(String.valueOf(str) + "/property.CollectionLength");
        Path path6 = new Path(String.valueOf(str) + "/property.CollectionLength.local");
        if (!fileSystem.exists(path6)) {
            sLogger.info("preserving local " + path5.getName());
            fileSystem.rename(path5, path6);
        }
        RetrievalEnvironment retrievalEnvironment2 = new RetrievalEnvironment(str2, fileSystem);
        long readCollectionLength = retrievalEnvironment2.readCollectionLength();
        int readCollectionDocumentCount = retrievalEnvironment2.readCollectionDocumentCount();
        float readCollectionAverageDocumentLength = retrievalEnvironment2.readCollectionAverageDocumentLength();
        sLogger.info("writing global stats from all index segments: ");
        sLogger.info(" - CollectionLength: " + readCollectionLength);
        sLogger.info(" - CollectionDocumentCount: " + readCollectionDocumentCount);
        sLogger.info(" - AverageDocumentLength: " + readCollectionAverageDocumentLength);
        retrievalEnvironment.writeCollectionLength(readCollectionLength);
        retrievalEnvironment.writeCollectionDocumentCount(readCollectionDocumentCount);
        retrievalEnvironment.writeCollectionAverageDocumentLength(readCollectionAverageDocumentLength);
        Path path7 = new Path(String.valueOf(str) + "/postings/");
        Path path8 = new Path(String.valueOf(str) + "/postings.old/");
        if (fileSystem.exists(path7)) {
            sLogger.info("renaming " + path7.getName() + " to " + path8.getName());
            fileSystem.rename(path7, path8);
        }
        jobConf.setJobName("DistributeGlobalStatsToPostings:" + readCollectionName);
        FileInputFormat.setInputPaths(jobConf, new Path[]{path8});
        FileOutputFormat.setOutputPath(jobConf, path7);
        jobConf.setNumMapTasks(i);
        jobConf.setNumReduceTasks(0);
        DistributedCache.addCacheFile(new URI(String.valueOf(str2) + "/dict.terms"), jobConf);
        DistributedCache.addCacheFile(new URI(String.valueOf(str2) + "/dict.df"), jobConf);
        DistributedCache.addCacheFile(new URI(String.valueOf(str2) + "/dict.cf"), jobConf);
        DistributedCache.addCacheFile(new URI(retrievalEnvironment.getIndexTermsData()), jobConf);
        DistributedCache.addCacheFile(new URI(retrievalEnvironment.getIndexTermIdsData()), jobConf);
        DistributedCache.addCacheFile(new URI(retrievalEnvironment.getIndexTermIdMappingData()), jobConf);
        jobConf.set("mapred.child.java.opts", "-Xmx2048m");
        jobConf.setInt("mapred.map.max.attempts", 10);
        jobConf.setInt("mapred.reduce.max.attempts", 10);
        jobConf.setInt("mapred.min.split.size", 1073741824);
        jobConf.setInputFormat(SequenceFileInputFormat.class);
        jobConf.setOutputKeyClass(IntWritable.class);
        jobConf.setOutputValueClass(PostingsListDocSortedPositional.class);
        jobConf.setOutputFormat(SequenceFileOutputFormat.class);
        jobConf.setMapperClass(MyMapper.class);
        jobConf.setReducerClass(IdentityReducer.class);
        JobClient.runJob(jobConf);
        return 0;
    }

    public static void main(String[] strArr) throws Exception {
        if (strArr.length != 2) {
            System.out.println("usage: [global-stats] [index-path]");
            System.exit(-1);
        }
        Configuration configuration = new Configuration();
        String str = strArr[0];
        String str2 = strArr[1];
        configuration.set(Constants.IndexPath, str2);
        configuration.set("Ivory.GlobalStatsPath", str);
        configuration.setInt(Constants.NumMapTasks, 100);
        sLogger.info("Distributing global statistics to " + str2);
        new DistributeGlobalStatsToPostings(configuration).run();
    }
}
