package ivory.lsh.eval;

import edu.umd.cloud9.io.SequenceFileUtils;
import edu.umd.cloud9.io.map.HMapSFW;
import edu.umd.cloud9.io.pair.PairOfFloatInt;
import edu.umd.cloud9.io.pair.PairOfInts;
import edu.umd.cloud9.io.pair.PairOfWritables;
import ivory.core.data.document.WeightedIntDocVector;
import ivory.core.util.CLIRUtils;
import ivory.lsh.data.Signature;
import java.io.IOException;
import java.net.URI;
import java.text.NumberFormat;
import java.util.Iterator;
import java.util.List;
import java.util.TreeSet;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;

/* loaded from: input_file:ivory/lsh/eval/BruteForcePwsim.class */
public class BruteForcePwsim extends Configured implements Tool {
    private static final Logger sLogger = Logger.getLogger(BruteForcePwsim.class);
    private static final String INPUT_PATH_OPTION = "input";
    private static final String OUTPUT_PATH_OPTION = "output";
    private static final String INPTYPE_OPTION = "type";
    private static final String THRESHOLD_OPTION = "cosineT";
    private static final String SAMPLE_OPTION = "sample";
    private static final String TOPN_OPTION = "topN";
    private static final String LIBJARS_OPTION = "libjars";
    private Options options;
    private float threshold;
    private int numResults;
    private String sampleFile;
    private String inputPath;
    private String outputPath;
    private String inputType;

    /* loaded from: input_file:ivory/lsh/eval/BruteForcePwsim$MyMapperDocVectors.class */
    public static class MyMapperDocVectors extends MapReduceBase implements Mapper<IntWritable, WeightedIntDocVector, IntWritable, PairOfFloatInt> {
        static List<PairOfWritables<WritableComparable, Writable>> vectors;
        float threshold;

        private String getFilename(String str) {
            return str.substring(str.lastIndexOf("/") + 1);
        }

        public void configure(JobConf jobConf) {
            BruteForcePwsim.sLogger.setLevel(Level.INFO);
            this.threshold = jobConf.getFloat("Ivory.CosineThreshold", -1.0f);
            BruteForcePwsim.sLogger.info("Threshold = " + this.threshold);
            String str = jobConf.get("Ivory.SampleFile");
            try {
                str = getFilename(str);
                for (Path path : DistributedCache.getLocalCacheFiles(jobConf)) {
                    if (path.toString().contains(str)) {
                        vectors = SequenceFileUtils.readFile(path, FileSystem.getLocal(jobConf));
                    }
                }
                if (vectors == null) {
                    throw new RuntimeException("Sample file not found at " + str);
                }
                BruteForcePwsim.sLogger.info("Read " + vectors.size() + " sample doc vectors");
            } catch (Exception e) {
                e.printStackTrace();
                throw new RuntimeException("Error reading doc vectors from " + str);
            }
        }

        public void map(IntWritable intWritable, WeightedIntDocVector weightedIntDocVector, OutputCollector<IntWritable, PairOfFloatInt> outputCollector, Reporter reporter) throws IOException {
            for (int i = 0; i < vectors.size(); i++) {
                IntWritable leftElement = vectors.get(i).getLeftElement();
                float cosine = CLIRUtils.cosine(weightedIntDocVector.getWeightedTerms(), ((WeightedIntDocVector) vectors.get(i).getRightElement()).getWeightedTerms());
                if (cosine >= this.threshold) {
                    outputCollector.collect(new IntWritable(leftElement.get()), new PairOfFloatInt(cosine, intWritable.get()));
                }
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((IntWritable) obj, (WeightedIntDocVector) obj2, (OutputCollector<IntWritable, PairOfFloatInt>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:ivory/lsh/eval/BruteForcePwsim$MyMapperSignature.class */
    public static class MyMapperSignature extends MapReduceBase implements Mapper<IntWritable, Signature, IntWritable, PairOfFloatInt> {
        static List<PairOfWritables<WritableComparable, Writable>> signatures;
        int maxDist;

        public void configure(JobConf jobConf) {
            BruteForcePwsim.sLogger.setLevel(Level.INFO);
            this.maxDist = (int) jobConf.getFloat("Ivory.MaxHammingDistance", -1.0f);
            BruteForcePwsim.sLogger.info("Threshold = " + this.maxDist);
            String str = jobConf.get("Ivory.SampleFile");
            try {
                for (Path path : DistributedCache.getLocalCacheFiles(jobConf)) {
                    if (path.toString().contains(str)) {
                        signatures = SequenceFileUtils.readFile(path, FileSystem.getLocal(jobConf));
                    }
                }
                if (signatures == null) {
                    throw new RuntimeException("Sample file not found at " + str);
                }
                BruteForcePwsim.sLogger.info(Integer.valueOf(signatures.size()));
            } catch (Exception e) {
                e.printStackTrace();
                throw new RuntimeException("Error reading sample signatures!");
            }
        }

        public void map(IntWritable intWritable, Signature signature, OutputCollector<IntWritable, PairOfFloatInt> outputCollector, Reporter reporter) throws IOException {
            for (int i = 0; i < signatures.size(); i++) {
                reporter.incrCounter(Pairs.Total, 1L);
                IntWritable leftElement = signatures.get(i).getLeftElement();
                if (signature.hammingDistance((Signature) signatures.get(i).getRightElement(), this.maxDist) <= this.maxDist) {
                    outputCollector.collect(new IntWritable(leftElement.get()), new PairOfFloatInt(-r0, intWritable.get()));
                    reporter.incrCounter(Pairs.Emitted, 1L);
                }
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((IntWritable) obj, (Signature) obj2, (OutputCollector<IntWritable, PairOfFloatInt>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:ivory/lsh/eval/BruteForcePwsim$MyMapperTermDocVectors.class */
    public static class MyMapperTermDocVectors extends MapReduceBase implements Mapper<IntWritable, HMapSFW, IntWritable, PairOfFloatInt> {
        static List<PairOfWritables<WritableComparable, Writable>> vectors;
        float threshold;

        public void configure(JobConf jobConf) {
            BruteForcePwsim.sLogger.setLevel(Level.INFO);
            this.threshold = jobConf.getFloat("Ivory.CosineThreshold", -1.0f);
            BruteForcePwsim.sLogger.info("Threshold = " + this.threshold);
            String str = jobConf.get("Ivory.SampleFile");
            try {
                for (Path path : DistributedCache.getLocalCacheFiles(jobConf)) {
                    if (path.toString().contains(str)) {
                        vectors = SequenceFileUtils.readFile(path, FileSystem.getLocal(jobConf));
                    }
                }
                if (vectors == null) {
                    throw new RuntimeException("Sample file not found at " + str);
                }
                BruteForcePwsim.sLogger.info("Read " + vectors.size() + " sample doc vectors");
            } catch (Exception e) {
                throw new RuntimeException("Error reading doc vectors from " + str);
            }
        }

        public void map(IntWritable intWritable, HMapSFW hMapSFW, OutputCollector<IntWritable, PairOfFloatInt> outputCollector, Reporter reporter) throws IOException {
            for (int i = 0; i < vectors.size(); i++) {
                reporter.incrCounter(Pairs.Total, 1L);
                IntWritable leftElement = vectors.get(i).getLeftElement();
                HMapSFW rightElement = vectors.get(i).getRightElement();
                float cosine = CLIRUtils.cosine(hMapSFW, rightElement);
                if (cosine >= this.threshold) {
                    BruteForcePwsim.sLogger.debug(leftElement + "," + rightElement + "\n" + rightElement.length());
                    BruteForcePwsim.sLogger.debug(intWritable + "," + hMapSFW + "\n" + hMapSFW.length());
                    BruteForcePwsim.sLogger.debug(Float.valueOf(cosine));
                    reporter.incrCounter(Pairs.Emitted, 1L);
                    outputCollector.collect(new IntWritable(leftElement.get()), new PairOfFloatInt(cosine, intWritable.get()));
                }
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((IntWritable) obj, (HMapSFW) obj2, (OutputCollector<IntWritable, PairOfFloatInt>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:ivory/lsh/eval/BruteForcePwsim$MyReducer.class */
    public static class MyReducer extends MapReduceBase implements Reducer<IntWritable, PairOfFloatInt, PairOfInts, Text> {
        int numResults;
        TreeSet<PairOfFloatInt> list = new TreeSet<>();
        PairOfInts keyOut = new PairOfInts();
        Text valOut = new Text();
        NumberFormat nf;

        public void configure(JobConf jobConf) {
            BruteForcePwsim.sLogger.setLevel(Level.INFO);
            this.numResults = jobConf.getInt("Ivory.NumResults", Integer.MAX_VALUE);
            this.nf = NumberFormat.getInstance();
            this.nf.setMaximumFractionDigits(3);
            this.nf.setMinimumFractionDigits(3);
        }

        public void reduce(IntWritable intWritable, Iterator<PairOfFloatInt> it, OutputCollector<PairOfInts, Text> outputCollector, Reporter reporter) throws IOException {
            this.list.clear();
            while (it.hasNext()) {
                PairOfFloatInt next = it.next();
                if (this.list.add(new PairOfFloatInt(next.getLeftElement(), next.getRightElement()))) {
                    BruteForcePwsim.sLogger.debug("Added: " + next);
                } else {
                    BruteForcePwsim.sLogger.debug("Not added: " + next);
                }
                reporter.incrCounter(Pairs.Total, 1L);
            }
            BruteForcePwsim.sLogger.debug(Integer.valueOf(this.list.size()));
            for (int i = 0; !this.list.isEmpty() && i < this.numResults; i++) {
                PairOfFloatInt pollLast = this.list.pollLast();
                BruteForcePwsim.sLogger.debug("output " + i + "=" + pollLast);
                this.keyOut.set(pollLast.getRightElement(), intWritable.get());
                this.valOut.set(this.nf.format(pollLast.getLeftElement()));
                outputCollector.collect(this.keyOut, this.valOut);
            }
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((IntWritable) obj, (Iterator<PairOfFloatInt>) it, (OutputCollector<PairOfInts, Text>) outputCollector, reporter);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:ivory/lsh/eval/BruteForcePwsim$Pairs.class */
    public enum Pairs {
        Total,
        Emitted,
        DEBUG,
        DEBUG2,
        Total2
    }

    /* loaded from: input_file:ivory/lsh/eval/BruteForcePwsim$Sample.class */
    enum Sample {
        Size
    }

    public int run(String[] strArr) throws Exception {
        if (parseArgs(strArr) < 0) {
            return printUsage();
        }
        JobConf jobConf = new JobConf(getConf(), BruteForcePwsim.class);
        FileSystem.get(jobConf).delete(new Path(this.outputPath), true);
        FileInputFormat.setInputPaths(jobConf, new Path[]{new Path(this.inputPath)});
        FileOutputFormat.setOutputPath(jobConf, new Path(this.outputPath));
        FileOutputFormat.setCompressOutput(jobConf, false);
        jobConf.set("mapred.child.java.opts", "-Xmx2048m");
        jobConf.setInt("mapred.map.max.attempts", 10);
        jobConf.setInt("mapred.reduce.max.attempts", 10);
        jobConf.setInt("mapred.task.timeout", 6000000);
        jobConf.setNumMapTasks(100);
        jobConf.setNumReduceTasks(1);
        jobConf.setInputFormat(SequenceFileInputFormat.class);
        jobConf.setMapOutputKeyClass(IntWritable.class);
        jobConf.setMapOutputValueClass(PairOfFloatInt.class);
        jobConf.setOutputKeyClass(PairOfInts.class);
        jobConf.setOutputValueClass(FloatWritable.class);
        jobConf.set("Ivory.SampleFile", this.sampleFile);
        DistributedCache.addCacheFile(new URI(this.sampleFile), jobConf);
        if (this.inputType.contains("signature")) {
            jobConf.setMapperClass(MyMapperSignature.class);
            jobConf.setFloat("Ivory.MaxHammingDistance", this.threshold);
        } else if (this.inputType.contains("vector")) {
            if (this.inputType.contains("term")) {
                jobConf.setMapperClass(MyMapperTermDocVectors.class);
            } else {
                jobConf.setMapperClass(MyMapperDocVectors.class);
            }
            jobConf.setFloat("Ivory.CosineThreshold", this.threshold);
        }
        jobConf.setJobName("BruteForcePwsim_type=" + this.inputType + "_cosine=" + this.threshold + "_top=" + (this.numResults > 0 ? Integer.valueOf(this.numResults) : "all"));
        if (this.numResults > 0) {
            jobConf.setInt("Ivory.NumResults", this.numResults);
        }
        jobConf.setReducerClass(MyReducer.class);
        sLogger.info("Running job " + jobConf.getJobName());
        JobClient.runJob(jobConf);
        return 0;
    }

    private int printUsage() {
        new HelpFormatter().printHelp(getClass().getCanonicalName(), this.options);
        return -1;
    }

    private int parseArgs(String[] strArr) {
        this.options = new Options();
        Options options = this.options;
        OptionBuilder.withDescription("path to input doc vectors or signatures");
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired();
        options.addOption(OptionBuilder.create(INPUT_PATH_OPTION));
        Options options2 = this.options;
        OptionBuilder.withDescription("path to output directory");
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired();
        options2.addOption(OptionBuilder.create("output"));
        Options options3 = this.options;
        OptionBuilder.withDescription("cosine similarity threshold when type=*docvector, hamming distance threshold when type=signature");
        OptionBuilder.withArgName("threshold");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired();
        options3.addOption(OptionBuilder.create(THRESHOLD_OPTION));
        Options options4 = this.options;
        OptionBuilder.withDescription("path to file with sample doc vectors or signatures");
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired();
        options4.addOption(OptionBuilder.create(SAMPLE_OPTION));
        Options options5 = this.options;
        OptionBuilder.withDescription("type of input");
        OptionBuilder.withArgName("signature|intdocvector|termdocvector");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired();
        options5.addOption(OptionBuilder.create(INPTYPE_OPTION));
        Options options6 = this.options;
        OptionBuilder.withDescription("keep only N results for each source document");
        OptionBuilder.withArgName("N");
        OptionBuilder.hasArg();
        options6.addOption(OptionBuilder.create(TOPN_OPTION));
        Options options7 = this.options;
        OptionBuilder.withDescription("Hadoop option to load external jars");
        OptionBuilder.withArgName("jar packages");
        OptionBuilder.hasArg();
        options7.addOption(OptionBuilder.create(LIBJARS_OPTION));
        try {
            CommandLine parse = new GnuParser().parse(this.options, strArr);
            this.inputPath = parse.getOptionValue(INPUT_PATH_OPTION);
            this.outputPath = parse.getOptionValue("output");
            this.threshold = Float.parseFloat(parse.getOptionValue(THRESHOLD_OPTION));
            this.sampleFile = parse.getOptionValue(SAMPLE_OPTION);
            this.inputType = parse.getOptionValue(INPTYPE_OPTION);
            this.numResults = parse.hasOption(TOPN_OPTION) ? Integer.parseInt(parse.getOptionValue(TOPN_OPTION)) : -1;
            return 0;
        } catch (ParseException e) {
            System.err.println("Error parsing command line: " + e.getMessage());
            return -1;
        }
    }

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new BruteForcePwsim(), strArr);
    }
}
