package ivory.lsh.pwsim.cl;

import edu.umd.cloud9.io.FSLineReader;
import edu.umd.cloud9.io.map.HMapIIW;
import edu.umd.cloud9.io.pair.PairOfInts;
import ivory.core.Constants;
import ivory.lsh.data.BitsSignatureTable;
import ivory.lsh.data.Signature;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.Iterator;
import java.util.TreeSet;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;

/* loaded from: input_file:ivory/lsh/pwsim/cl/CLSlidingWindowPwsim.class */
public class CLSlidingWindowPwsim extends Configured implements Tool {
    private static final Logger sLogger = Logger.getLogger(CLSlidingWindowPwsim.class);
    public static final String[] RequiredParameters = {Constants.NumMapTasks, Constants.NumReduceTasks, Constants.CollectionName, "Ivory.NumOfPermutations", "Ivory.SlidingWindowSize", "Ivory.MaxHammingDistance"};

    /* loaded from: input_file:ivory/lsh/pwsim/cl/CLSlidingWindowPwsim$MyMapper.class */
    public static class MyMapper extends MapReduceBase implements Mapper<IntWritable, BitsSignatureTable, PairOfInts, IntWritable> {
        static int slidingWindowSize;
        static int maxDist;
        Path[] localFiles;
        int hammingDistance;
        Signature[] signatures = null;
        int[] docNos = null;
        HMapIIW samplesMap = null;
        PairOfInts outKey = new PairOfInts();
        IntWritable outValue = new IntWritable();
        int nSignatures = -1;

        public void configure(JobConf jobConf) {
            slidingWindowSize = jobConf.getInt("Ivory.SlidingWindowSize", -1);
            maxDist = jobConf.getInt("Ivory.MaxHammingDistance", -1);
            CLSlidingWindowPwsim.sLogger.setLevel(Level.INFO);
            CLSlidingWindowPwsim.sLogger.info("configure");
            CLSlidingWindowPwsim.sLogger.info(Integer.valueOf(maxDist));
            CLSlidingWindowPwsim.sLogger.info(Integer.valueOf(slidingWindowSize));
            try {
                this.localFiles = DistributedCache.getLocalCacheFiles(jobConf);
                if (this.localFiles == null || this.localFiles.length <= 0) {
                    CLSlidingWindowPwsim.sLogger.info("samples file does not exist");
                    return;
                }
                this.samplesMap = new HMapIIW();
                try {
                    FSLineReader fSLineReader = new FSLineReader(jobConf.get("Ivory.SampleFile"), FileSystem.get(jobConf));
                    Text text = new Text();
                    while (fSLineReader.readLine(text) != 0) {
                        int parseInt = Integer.parseInt(text.toString());
                        CLSlidingWindowPwsim.sLogger.info(parseInt + " --> sample");
                        this.samplesMap.put(parseInt, 1);
                    }
                    fSLineReader.close();
                } catch (IOException e) {
                }
                CLSlidingWindowPwsim.sLogger.info(this.samplesMap.size() + " sampled");
            } catch (Exception e2) {
                throw new RuntimeException("Error reading doc vectors!");
            }
        }

        public void map(IntWritable intWritable, BitsSignatureTable bitsSignatureTable, OutputCollector<PairOfInts, IntWritable> outputCollector, Reporter reporter) throws IOException {
            this.signatures = bitsSignatureTable.getSignatures();
            this.docNos = bitsSignatureTable.getDocNos();
            this.nSignatures = bitsSignatureTable.getNumOfSignatures();
            for (int i = 0; i < this.nSignatures; i++) {
                if ((this.docNos[i] > 1000000000 && this.samplesMap == null) || (this.samplesMap != null && this.samplesMap.containsKey(this.docNos[i]))) {
                    for (int i2 = i - 1; i2 > i - slidingWindowSize && i2 >= 0; i2--) {
                        if (this.docNos[i2] <= 1000000000) {
                            reporter.incrCounter(mapoutput.PrefixSum, this.signatures[i].getLongestPrefix(this.signatures[i2]));
                            reporter.incrCounter(mapoutput.PROCESSEDPAIRS, 1L);
                            this.hammingDistance = this.signatures[i].hammingDistance(this.signatures[i2], maxDist);
                            CLSlidingWindowPwsim.sLogger.debug(Integer.valueOf(this.hammingDistance));
                            if (this.hammingDistance <= maxDist) {
                                reporter.incrCounter(mapoutput.EMITTEDPAIRS, 1L);
                                this.outValue.set(this.hammingDistance);
                                this.outKey.set(this.docNos[i2], this.docNos[i]);
                                outputCollector.collect(this.outKey, this.outValue);
                            }
                        }
                    }
                    for (int i3 = i + 1; i3 < i + slidingWindowSize && i3 < this.nSignatures; i3++) {
                        if (this.docNos[i3] <= 1000000000) {
                            reporter.incrCounter(mapoutput.PrefixSum, this.signatures[i].getLongestPrefix(this.signatures[i3]));
                            reporter.incrCounter(mapoutput.PROCESSEDPAIRS, 1L);
                            this.hammingDistance = this.signatures[i].hammingDistance(this.signatures[i3], maxDist);
                            CLSlidingWindowPwsim.sLogger.debug(Integer.valueOf(this.hammingDistance));
                            if (this.hammingDistance <= maxDist) {
                                reporter.incrCounter(mapoutput.EMITTEDPAIRS, 1L);
                                this.outValue.set(this.hammingDistance);
                                this.outKey.set(this.docNos[i3], this.docNos[i]);
                                outputCollector.collect(this.outKey, this.outValue);
                            }
                        }
                    }
                }
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((IntWritable) obj, (BitsSignatureTable) obj2, (OutputCollector<PairOfInts, IntWritable>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:ivory/lsh/pwsim/cl/CLSlidingWindowPwsim$MyReducer.class */
    public static class MyReducer extends MapReduceBase implements Reducer<PairOfInts, IntWritable, PairOfInts, IntWritable> {
        IntWritable outValue = new IntWritable();
        HashMap<String, Integer> map = new HashMap<>();

        public void reduce(PairOfInts pairOfInts, Iterator<IntWritable> it, OutputCollector<PairOfInts, IntWritable> outputCollector, Reporter reporter) throws IOException {
            outputCollector.collect(pairOfInts, it.next());
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((PairOfInts) obj, (Iterator<IntWritable>) it, (OutputCollector<PairOfInts, IntWritable>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:ivory/lsh/pwsim/cl/CLSlidingWindowPwsim$MyReducerTopN.class */
    public static class MyReducerTopN extends MapReduceBase implements Reducer<IntWritable, PairOfInts, IntWritable, PairOfInts> {
        int numResults;
        TreeSet<PairOfInts> list = new TreeSet<>();

        public void configure(JobConf jobConf) {
            this.numResults = jobConf.getInt("Ivory.NumResults", -1);
            CLSlidingWindowPwsim.sLogger.info("numResults");
        }

        public void reduce(IntWritable intWritable, Iterator<PairOfInts> it, OutputCollector<IntWritable, PairOfInts> outputCollector, Reporter reporter) throws IOException {
            this.list.clear();
            while (it.hasNext()) {
                PairOfInts next = it.next();
                this.list.add(new PairOfInts(next.getLeftElement(), next.getRightElement()));
                reporter.incrCounter(mapoutput.count, 1L);
            }
            for (int i = 0; !this.list.isEmpty() && i < this.numResults; i++) {
                outputCollector.collect(intWritable, this.list.pollFirst());
            }
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((IntWritable) obj, (Iterator<PairOfInts>) it, (OutputCollector<IntWritable, PairOfInts>) outputCollector, reporter);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:ivory/lsh/pwsim/cl/CLSlidingWindowPwsim$mapoutput.class */
    public enum mapoutput {
        count,
        PROCESSEDPAIRS,
        EMITTEDPAIRS,
        PrefixSum
    }

    private static int printUsage() {
        System.out.println("usage: [input-path] [output-path] [window-size] [max-distance] ([sample-docnos])");
        return -1;
    }

    public String[] getRequiredParameters() {
        return RequiredParameters;
    }

    public int run(String[] strArr) throws Exception {
        if (strArr.length != 4 && strArr.length != 5) {
            printUsage();
        }
        JobConf jobConf = new JobConf(getConf(), CLSlidingWindowPwsim.class);
        FileSystem fileSystem = FileSystem.get(jobConf);
        String str = strArr[0];
        String str2 = strArr[1];
        int i = jobConf.getInt(Constants.NumMapTasks, 100);
        int i2 = jobConf.getInt(Constants.NumReduceTasks, 1);
        int parseInt = Integer.parseInt(strArr[2]);
        int parseInt2 = Integer.parseInt(strArr[3]);
        int i3 = jobConf.getInt("Ivory.NumResults", -1);
        if (fileSystem.exists(new Path(str2))) {
            sLogger.info("SlidingWindowPwsim output already exists! Quitting...\nPath: " + str2);
            return 0;
        }
        String str3 = "";
        if (strArr.length == 5) {
            str3 = strArr[4];
            DistributedCache.addCacheFile(new URI(str3), jobConf);
        }
        jobConf.setJobName("SlidingWindowPwsim:" + jobConf.get(Constants.CollectionName) + strArr[0].replaceFirst("tables", "") + "_B=" + parseInt + "_" + i3);
        FileInputFormat.setInputPaths(jobConf, new Path[]{new Path(str)});
        FileOutputFormat.setOutputPath(jobConf, new Path(str2));
        FileOutputFormat.setCompressOutput(jobConf, false);
        jobConf.set("mapred.child.java.opts", "-Xmx2048m");
        jobConf.setInt("mapred.task.timeout", 60000000);
        jobConf.setInt("Ivory.SlidingWindowSize", parseInt);
        jobConf.setInt("Ivory.MaxHammingDistance", parseInt2);
        jobConf.set("Ivory.SampleFile", str3);
        jobConf.setNumMapTasks(i);
        jobConf.setNumReduceTasks(i2);
        jobConf.setInputFormat(SequenceFileInputFormat.class);
        jobConf.setMapOutputKeyClass(PairOfInts.class);
        jobConf.setMapOutputValueClass(IntWritable.class);
        jobConf.setOutputKeyClass(PairOfInts.class);
        jobConf.setOutputValueClass(IntWritable.class);
        jobConf.setMapperClass(MyMapper.class);
        if (i3 == -1) {
            jobConf.setReducerClass(MyReducer.class);
        } else {
            jobConf.setReducerClass(MyReducerTopN.class);
        }
        if (str3.equals("")) {
            jobConf.setOutputFormat(SequenceFileOutputFormat.class);
        } else {
            jobConf.setOutputFormat(TextOutputFormat.class);
            sLogger.info("text output");
        }
        sLogger.info("Running job " + jobConf.getJobName() + "...");
        sLogger.info("Input path: " + str);
        sLogger.info("Output path: " + str2);
        sLogger.info("Window size: " + parseInt);
        sLogger.info("Threshold: " + parseInt2);
        sLogger.info("Sample file?: " + (!str3.equals("")));
        sLogger.info("Number of results: " + (i3 == -1 ? "all" : Integer.valueOf(i3)));
        long currentTimeMillis = System.currentTimeMillis();
        RunningJob runJob = JobClient.runJob(jobConf);
        System.out.println("Job finished in " + (System.currentTimeMillis() - currentTimeMillis) + " milliseconds");
        Counters counters = runJob.getCounters();
        System.out.println("Avg prefix length = " + (((float) counters.findCounter(mapoutput.PrefixSum).getCounter()) / ((float) counters.findCounter(mapoutput.PROCESSEDPAIRS).getCounter())));
        return 0;
    }

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new Configuration(), new CLSlidingWindowPwsim(), strArr);
    }
}
