package ivory.lsh.eval;

import edu.umd.cloud9.io.FSLineReader;
import edu.umd.cloud9.util.map.HMapII;
import ivory.core.data.document.WeightedIntDocVector;
import ivory.lsh.eval.SampleSignatures;
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;

/* loaded from: input_file:ivory/lsh/eval/SampleDocVectors.class */
public class SampleDocVectors extends Configured implements Tool {
    static Class keyClass = IntWritable.class;
    static Class valueClass = WeightedIntDocVector.class;
    static Class inputFormat = SequenceFileInputFormat.class;
    private static final Logger sLogger = Logger.getLogger(SampleDocVectors.class);

    /* loaded from: input_file:ivory/lsh/eval/SampleDocVectors$MyMapper.class */
    private static class MyMapper extends MapReduceBase implements Mapper<IntWritable, WeightedIntDocVector, IntWritable, WeightedIntDocVector> {
        static int sampleFreq;
        HMapII samplesMap = null;
        static Path[] localFiles;

        private MyMapper() {
        }

        public void configure(JobConf jobConf) {
            sampleFreq = jobConf.getInt("SampleFrequency", -1);
            try {
                localFiles = DistributedCache.getLocalCacheFiles(jobConf);
                if (localFiles != null) {
                    SampleDocVectors.sLogger.setLevel(Level.INFO);
                    this.samplesMap = new HMapII();
                    try {
                        FSLineReader fSLineReader = new FSLineReader(localFiles[0], FileSystem.getLocal(jobConf));
                        Text text = new Text();
                        while (fSLineReader.readLine(text) != 0) {
                            this.samplesMap.put(Integer.parseInt(text.toString()), 1);
                        }
                        fSLineReader.close();
                    } catch (IOException e) {
                    }
                    SampleDocVectors.sLogger.info(this.samplesMap);
                }
            } catch (Exception e2) {
                throw new RuntimeException("Error reading doc vectors!");
            }
        }

        public void map(IntWritable intWritable, WeightedIntDocVector weightedIntDocVector, OutputCollector<IntWritable, WeightedIntDocVector> outputCollector, Reporter reporter) throws IOException {
            if (this.samplesMap == null) {
                if (((int) (Math.random() * sampleFreq)) == 0) {
                    outputCollector.collect(intWritable, weightedIntDocVector);
                }
            } else if (this.samplesMap.containsKey(intWritable.get())) {
                reporter.incrCounter(SampleSignatures.mapoutput.count, 1L);
                outputCollector.collect(intWritable, weightedIntDocVector);
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((IntWritable) obj, (WeightedIntDocVector) obj2, (OutputCollector<IntWritable, WeightedIntDocVector>) outputCollector, reporter);
        }
    }

    private static int printUsage() {
        System.out.println("usage: [input] [output-dir] [number-of-mappers] [sample-freq] ([sample-docnos-path])");
        return -1;
    }

    public int run(String[] strArr) throws Exception {
        LocalFileSystem localFileSystem;
        if (strArr.length != 4 && strArr.length != 5 && 0 == 0) {
            printUsage();
            return -1;
        }
        String str = strArr[0];
        String str2 = strArr[1];
        int parseInt = Integer.parseInt(strArr[2]);
        int parseInt2 = Integer.parseInt(strArr[3]);
        JobConf jobConf = new JobConf(SampleDocVectors.class);
        if (0 != 0) {
            sLogger.info("Running local...");
            jobConf.set("mapred.job.tracker", "local");
            jobConf.set("fs.default.name", "file:///");
            localFileSystem = FileSystem.getLocal(jobConf);
        } else {
            localFileSystem = FileSystem.get(jobConf);
        }
        jobConf.setJobName(getClass().getName());
        if (strArr.length == 5) {
            parseInt2 = -1;
            DistributedCache.addCacheFile(new URI(strArr[4]), jobConf);
        }
        if (!localFileSystem.exists(new Path(str))) {
            throw new RuntimeException("Error, input path does not exist!");
        }
        sLogger.setLevel(Level.INFO);
        localFileSystem.delete(new Path(str2), true);
        FileInputFormat.setInputPaths(jobConf, new Path[]{new Path(str)});
        FileOutputFormat.setOutputPath(jobConf, new Path(str2));
        FileOutputFormat.setCompressOutput(jobConf, false);
        jobConf.set("mapred.child.java.opts", "-Xmx2048m");
        jobConf.setInt("mapred.map.max.attempts", 100);
        jobConf.setInt("mapred.reduce.max.attempts", 100);
        jobConf.setInt("mapred.task.timeout", 600000000);
        jobConf.setInt("SampleFrequency", parseInt2);
        sLogger.info("Running job " + jobConf.getJobName());
        sLogger.info("Input directory: " + str);
        sLogger.info("Output directory: " + str2);
        sLogger.info("Number of mappers: " + parseInt);
        sLogger.info("Sample frequency: " + parseInt2);
        jobConf.setNumMapTasks(parseInt);
        jobConf.setNumReduceTasks(1);
        jobConf.setInputFormat(inputFormat);
        jobConf.setMapOutputKeyClass(keyClass);
        jobConf.setMapOutputValueClass(valueClass);
        jobConf.setOutputKeyClass(keyClass);
        jobConf.setOutputValueClass(valueClass);
        jobConf.setMapperClass(MyMapper.class);
        jobConf.setReducerClass(IdentityReducer.class);
        jobConf.setOutputFormat(SequenceFileOutputFormat.class);
        JobClient.runJob(jobConf);
        return 0;
    }

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new Configuration(), new SampleDocVectors(), strArr);
    }
}
