package ivory.lsh.eval;

import edu.umd.cloud9.io.map.HMapIIW;
import edu.umd.cloud9.io.pair.PairOfInts;
import ivory.lsh.driver.PwsimEnvironment;
import java.io.IOException;
import java.net.URI;
import java.util.Iterator;
import java.util.TreeSet;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.util.LineReader;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;

/* loaded from: input_file:ivory/lsh/eval/FilterResults.class */
public class FilterResults extends Configured implements Tool {
    private static final Logger sLogger = Logger.getLogger(FilterResults.class);
    private static final String WORKDIR_PATH_OPTION = "index";
    private static final String INPUT_PATH_OPTION = "input";
    private static final String OUTPUT_PATH_OPTION = "output";
    private static final String THRESHOLD_OPTION = "T";
    private static final String SAMPLEDOCNOS_OPTION = "docnos";
    private static final String WINDOWSIZE_OPTION = "B";
    private static final String SIGNLENG_OPTION = "num_bits";
    private static final String NUMPERMS_OPTION = "Q";
    private static final String OVERLAPSIZE_OPTION = "overlap";
    private static final String SIGNTYPE_OPTION = "type";
    private static final String TOPN_OPTION = "topN";
    private static final String LIBJARS_OPTION = "libjars";
    private Options options;
    private int numOfPermutations;
    private int windowSize;
    private int maxDist;
    private int numResults;
    private int numOfBits;
    private String signatureType;
    private String sampleDocnosFile;
    private String workDir;
    private String inputPath;
    private String outputPath;

    /* loaded from: input_file:ivory/lsh/eval/FilterResults$MyMapper.class */
    public static class MyMapper extends MapReduceBase implements Mapper<PairOfInts, IntWritable, PairOfInts, IntWritable> {
        static Path[] localFiles;
        int maxDist;
        HMapIIW samplesMap = null;
        IntWritable outValue = new IntWritable();
        PairOfInts outKey = new PairOfInts();

        public void configure(JobConf jobConf) {
            FilterResults.sLogger.setLevel(Level.INFO);
            this.maxDist = jobConf.getInt("Ivory.MaxHammingDistance", -1);
            String str = jobConf.get("Ivory.SampleFile");
            try {
                this.samplesMap = FilterResults.readSamplesFromCache(FilterResults.getFilename(str), jobConf);
            } catch (IOException e) {
                e.printStackTrace();
                throw new RuntimeException("I/O error in " + str);
            } catch (NumberFormatException e2) {
                e2.printStackTrace();
                throw new RuntimeException("Incorrect format in " + str);
            } catch (Exception e3) {
                e3.printStackTrace();
                throw new RuntimeException("Error reading sample file " + str);
            }
        }

        public void map(PairOfInts pairOfInts, IntWritable intWritable, OutputCollector<PairOfInts, IntWritable> outputCollector, Reporter reporter) throws IOException {
            int leftElement = pairOfInts.getLeftElement();
            int rightElement = pairOfInts.getRightElement();
            if (this.samplesMap == null || this.samplesMap.containsKey(rightElement)) {
                if (this.maxDist == -1 || intWritable.get() <= this.maxDist) {
                    this.outKey.set(leftElement, rightElement);
                    this.outValue.set(intWritable.get());
                    outputCollector.collect(this.outKey, this.outValue);
                }
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((PairOfInts) obj, (IntWritable) obj2, (OutputCollector<PairOfInts, IntWritable>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:ivory/lsh/eval/FilterResults$MyMapperTopN.class */
    public static class MyMapperTopN extends MapReduceBase implements Mapper<PairOfInts, IntWritable, IntWritable, PairOfInts> {
        static Path[] localFiles;
        HMapIIW samplesMap = null;
        int maxDist;

        public void configure(JobConf jobConf) {
            FilterResults.sLogger.setLevel(Level.INFO);
            this.maxDist = jobConf.getInt("Ivory.MaxHammingDistance", -1);
            String str = jobConf.get("Ivory.SampleFile");
            try {
                this.samplesMap = FilterResults.readSamplesFromCache(FilterResults.getFilename(str), jobConf);
            } catch (IOException e) {
                e.printStackTrace();
                throw new RuntimeException("I/O error in " + str);
            } catch (NumberFormatException e2) {
                e2.printStackTrace();
                throw new RuntimeException("Incorrect format in " + str);
            } catch (Exception e3) {
                e3.printStackTrace();
                throw new RuntimeException("Error reading sample file " + str);
            }
        }

        public void map(PairOfInts pairOfInts, IntWritable intWritable, OutputCollector<IntWritable, PairOfInts> outputCollector, Reporter reporter) throws IOException {
            int leftElement = pairOfInts.getLeftElement();
            int rightElement = pairOfInts.getRightElement();
            FilterResults.sLogger.debug(Integer.valueOf(rightElement));
            if (this.samplesMap == null || this.samplesMap.containsKey(rightElement)) {
                if (this.maxDist == -1 || intWritable.get() <= this.maxDist) {
                    outputCollector.collect(new IntWritable(rightElement), new PairOfInts(intWritable.get(), leftElement));
                }
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((PairOfInts) obj, (IntWritable) obj2, (OutputCollector<IntWritable, PairOfInts>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:ivory/lsh/eval/FilterResults$MyReducerTopN.class */
    public static class MyReducerTopN extends MapReduceBase implements Reducer<IntWritable, PairOfInts, IntWritable, PairOfInts> {
        int numResults;
        TreeSet<PairOfInts> list = new TreeSet<>();

        public void configure(JobConf jobConf) {
            this.numResults = jobConf.getInt("Ivory.NumResults", -1);
            FilterResults.sLogger.info("numResults");
        }

        public void reduce(IntWritable intWritable, Iterator<PairOfInts> it, OutputCollector<IntWritable, PairOfInts> outputCollector, Reporter reporter) throws IOException {
            this.list.clear();
            while (it.hasNext()) {
                PairOfInts next = it.next();
                this.list.add(new PairOfInts(next.getLeftElement(), next.getRightElement()));
                reporter.incrCounter(mapoutput.count, 1L);
            }
            for (int i = 0; !this.list.isEmpty() && i < this.numResults; i++) {
                outputCollector.collect(intWritable, this.list.pollFirst());
            }
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((IntWritable) obj, (Iterator<PairOfInts>) it, (OutputCollector<IntWritable, PairOfInts>) outputCollector, reporter);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:ivory/lsh/eval/FilterResults$mapoutput.class */
    public enum mapoutput {
        count
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static HMapIIW readSamplesFromCache(String str, JobConf jobConf) throws IOException {
        HMapIIW hMapIIW = null;
        for (Path path : DistributedCache.getLocalCacheFiles(jobConf)) {
            if (path.toString().contains(str)) {
                hMapIIW = new HMapIIW();
                LineReader lineReader = new LineReader(FileSystem.getLocal(jobConf).open(path));
                Text text = new Text();
                while (lineReader.readLine(text) != 0) {
                    int parseInt = Integer.parseInt(text.toString());
                    sLogger.info(parseInt + " --> sample");
                    hMapIIW.put(parseInt, 1);
                }
                lineReader.close();
                sLogger.info(hMapIIW.size() + " sampled");
            }
        }
        if (hMapIIW == null) {
            throw new RuntimeException("Not found in local cache: " + str);
        }
        return hMapIIW;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static String getFilename(String str) {
        return str.substring(str.lastIndexOf("/") + 1);
    }

    public int run(String[] strArr) throws Exception {
        if (parseArgs(strArr) < 0) {
            printUsage();
            return -1;
        }
        JobConf jobConf = new JobConf(getConf(), FilterResults.class);
        jobConf.setInt("Ivory.MaxHammingDistance", this.maxDist);
        jobConf.setInt("Ivory.NumResults", this.numResults);
        jobConf.setJobName("FilterResults_sample=" + getFilename(this.sampleDocnosFile) + "_top=" + (this.numResults > 0 ? Integer.valueOf(this.numResults) : "all"));
        FileSystem fileSystem = FileSystem.get(jobConf);
        this.inputPath = this.inputPath == null ? PwsimEnvironment.getPwsimDir(this.workDir, this.signatureType, this.maxDist, this.numOfBits, this.numOfPermutations, this.windowSize) : this.inputPath;
        this.outputPath = this.outputPath == null ? PwsimEnvironment.getFilteredPwsimDir(this.workDir, this.signatureType, this.maxDist, this.numOfBits, this.numOfPermutations, this.windowSize, this.sampleDocnosFile, this.numResults) : this.outputPath;
        if (fileSystem.exists(new Path(this.outputPath))) {
            sLogger.info("FilteredPwsim output already exists! Quitting...");
            return 0;
        }
        FileInputFormat.setInputPaths(jobConf, new Path[]{new Path(this.inputPath)});
        FileOutputFormat.setOutputPath(jobConf, new Path(this.outputPath));
        FileOutputFormat.setCompressOutput(jobConf, false);
        jobConf.set("mapred.child.java.opts", "-Xmx2048m");
        jobConf.setInt("mapred.map.max.attempts", 10);
        jobConf.setInt("mapred.reduce.max.attempts", 10);
        jobConf.setInt("mapred.task.timeout", 6000000);
        jobConf.set("Ivory.SampleFile", this.sampleDocnosFile);
        DistributedCache.addCacheFile(new URI(this.sampleDocnosFile), jobConf);
        sLogger.info("Running job " + jobConf.getJobName());
        sLogger.info("Input directory: " + this.inputPath);
        sLogger.info("Output directory: " + this.outputPath);
        sLogger.info("Samples file: " + this.sampleDocnosFile);
        if (this.numResults > 0) {
            sLogger.info("Number of results = " + this.numResults);
            jobConf.setMapperClass(MyMapperTopN.class);
            jobConf.setReducerClass(MyReducerTopN.class);
            jobConf.setMapOutputKeyClass(IntWritable.class);
            jobConf.setMapOutputValueClass(PairOfInts.class);
        } else {
            sLogger.info("Number of results = all");
            jobConf.setMapperClass(MyMapper.class);
            jobConf.setReducerClass(IdentityReducer.class);
            jobConf.setMapOutputKeyClass(PairOfInts.class);
            jobConf.setMapOutputValueClass(IntWritable.class);
        }
        jobConf.setJarByClass(FilterResults.class);
        jobConf.setNumMapTasks(300);
        jobConf.setNumReduceTasks(1);
        jobConf.setInputFormat(SequenceFileInputFormat.class);
        JobClient.runJob(jobConf);
        return 0;
    }

    private void printUsage() {
        new HelpFormatter().printHelp(getClass().getCanonicalName(), this.options);
    }

    private int parseArgs(String[] strArr) {
        this.options = new Options();
        Options options = this.options;
        OptionBuilder.withDescription("path to index directory");
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        options.addOption(OptionBuilder.create("index"));
        Options options2 = this.options;
        OptionBuilder.withDescription("path to source-language index directory");
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        options2.addOption(OptionBuilder.create(INPUT_PATH_OPTION));
        Options options3 = this.options;
        OptionBuilder.withDescription("path to target-language index directory");
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        options3.addOption(OptionBuilder.create("output"));
        Options options4 = this.options;
        OptionBuilder.withDescription("only keep pairs that match these docnos");
        OptionBuilder.withArgName("path to sample docnos file");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired();
        options4.addOption(OptionBuilder.create(SAMPLEDOCNOS_OPTION));
        Options options5 = this.options;
        OptionBuilder.withDescription("hamming distance threshold for similar pairs");
        OptionBuilder.withArgName("threshold");
        OptionBuilder.hasArg();
        options5.addOption(OptionBuilder.create(THRESHOLD_OPTION));
        Options options6 = this.options;
        OptionBuilder.withDescription("keep only N results for each source document");
        OptionBuilder.withArgName("N");
        OptionBuilder.hasArg();
        options6.addOption(OptionBuilder.create(TOPN_OPTION));
        Options options7 = this.options;
        OptionBuilder.withDescription("length of signature");
        OptionBuilder.withArgName("number of bits");
        OptionBuilder.hasArg();
        options7.addOption(OptionBuilder.create(SIGNLENG_OPTION));
        Options options8 = this.options;
        OptionBuilder.withDescription("sliding window size");
        OptionBuilder.withArgName("window");
        OptionBuilder.hasArg();
        options8.addOption(OptionBuilder.create(WINDOWSIZE_OPTION));
        Options options9 = this.options;
        OptionBuilder.withDescription("type of signature");
        OptionBuilder.withArgName("random|minhash|simhash");
        OptionBuilder.hasArg();
        options9.addOption(OptionBuilder.create(SIGNTYPE_OPTION));
        Options options10 = this.options;
        OptionBuilder.withDescription("number of permutations (tables)");
        OptionBuilder.withArgName("permutations");
        OptionBuilder.hasArg();
        options10.addOption(OptionBuilder.create(NUMPERMS_OPTION));
        Options options11 = this.options;
        OptionBuilder.withDescription("size of overlap between chunks (default: window size)");
        OptionBuilder.withArgName("overlap size");
        OptionBuilder.hasArg();
        options11.addOption(OptionBuilder.create(OVERLAPSIZE_OPTION));
        Options options12 = this.options;
        OptionBuilder.withDescription("Hadoop option to load external jars");
        OptionBuilder.withArgName("jar packages");
        OptionBuilder.hasArg();
        options12.addOption(OptionBuilder.create(LIBJARS_OPTION));
        try {
            CommandLine parse = new GnuParser().parse(this.options, strArr);
            this.workDir = parse.hasOption("index") ? parse.getOptionValue("index") : null;
            this.inputPath = parse.hasOption(INPUT_PATH_OPTION) ? parse.getOptionValue(INPUT_PATH_OPTION) : null;
            this.outputPath = parse.hasOption("output") ? parse.getOptionValue("output") : null;
            this.numOfBits = parse.hasOption(SIGNLENG_OPTION) ? Integer.parseInt(parse.getOptionValue(SIGNLENG_OPTION)) : -1;
            this.signatureType = parse.hasOption(SIGNTYPE_OPTION) ? parse.getOptionValue(SIGNTYPE_OPTION) : null;
            this.numOfPermutations = parse.hasOption(NUMPERMS_OPTION) ? Integer.parseInt(parse.getOptionValue(NUMPERMS_OPTION)) : -1;
            this.maxDist = parse.hasOption(THRESHOLD_OPTION) ? Integer.parseInt(parse.getOptionValue(THRESHOLD_OPTION)) : -1;
            this.windowSize = parse.hasOption(WINDOWSIZE_OPTION) ? Integer.parseInt(parse.getOptionValue(WINDOWSIZE_OPTION)) : -1;
            if ((this.workDir == null || this.numOfBits <= 0 || this.numOfPermutations <= 0 || this.windowSize <= 0 || this.signatureType == null || this.maxDist <= 0) && (this.inputPath == null || this.outputPath == null)) {
                System.err.println("Either options -index and -num_bits and -type and -Q and -overlap or options -input and -outputshould be specified!");
                return -1;
            }
            try {
                PwsimEnvironment.getPwsimDir(this.workDir, this.signatureType, this.maxDist, this.numOfBits, this.numOfPermutations, this.windowSize);
                this.sampleDocnosFile = parse.getOptionValue(SAMPLEDOCNOS_OPTION);
                this.numResults = parse.hasOption(TOPN_OPTION) ? Integer.parseInt(parse.getOptionValue(TOPN_OPTION)) : -1;
                return 0;
            } catch (IOException e) {
                System.err.println("Error with path names: " + e.getMessage());
                return -1;
            }
        } catch (ParseException e2) {
            System.err.println("Error parsing command line: " + e2.getMessage());
            return -1;
        }
    }

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new FilterResults(), strArr);
    }
}
