package ivory.lsh.projection;

import edu.umd.cloud9.io.SequenceFileUtils;
import edu.umd.cloud9.io.array.ArrayListOfIntsWritable;
import edu.umd.cloud9.io.map.HMapIFW;
import edu.umd.cloud9.util.PowerTool;
import ivory.core.Constants;
import ivory.core.RetrievalEnvironment;
import ivory.core.data.document.WeightedIntDocVector;
import ivory.lsh.data.MinhashSignature;
import ivory.lsh.data.Permutation;
import ivory.lsh.data.PermutationByBit;
import ivory.lsh.driver.PwsimEnvironment;
import java.io.IOException;
import java.net.URI;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;

/* loaded from: input_file:ivory/lsh/projection/ComputeSignaturesMinhash.class */
public class ComputeSignaturesMinhash extends PowerTool {
    public static final String[] RequiredParameters = new String[0];
    private static final Logger sLogger = Logger.getLogger(ComputeSignaturesMinhash.class);

    /* loaded from: input_file:ivory/lsh/projection/ComputeSignaturesMinhash$Maps.class */
    protected enum Maps {
        ALL,
        ONES,
        ZEROS,
        EMPTY;

        /* renamed from: values, reason: to resolve conflict with enum method */
        public static Maps[] valuesCustom() {
            Maps[] valuesCustom = values();
            int length = valuesCustom.length;
            Maps[] mapsArr = new Maps[length];
            System.arraycopy(valuesCustom, 0, mapsArr, 0, length);
            return mapsArr;
        }
    }

    /* loaded from: input_file:ivory/lsh/projection/ComputeSignaturesMinhash$MyMapper.class */
    public static class MyMapper extends MapReduceBase implements Mapper<IntWritable, WeightedIntDocVector, IntWritable, MinhashSignature> {
        static Path[] localFiles;
        static int D;
        static MinhashSignature signature;
        static List<Writable> randomOrderings;

        private String getFilename(String str) {
            return str.substring(str.lastIndexOf("/") + 1);
        }

        public void configure(JobConf jobConf) {
            D = jobConf.getInt("Ivory.NumOfBits", -1);
            if (D == -1) {
                throw new RuntimeException("Could not read parameters!");
            }
            String str = jobConf.get("InCache");
            try {
                str = getFilename(str);
                localFiles = DistributedCache.getLocalCacheFiles(jobConf);
                for (Path path : localFiles) {
                    if (path.toString().contains(str)) {
                        randomOrderings = SequenceFileUtils.readValues(path, FileSystem.getLocal(jobConf));
                    }
                }
                if (randomOrderings == null) {
                    throw new RuntimeException("File not found in local cache: " + str);
                }
                if (randomOrderings == null || randomOrderings.size() != D) {
                    throw new RuntimeException("No of random orderings not correct. Something is wrong!");
                }
                signature = new MinhashSignature(D);
            } catch (Exception e) {
                throw new RuntimeException("Error reading random orderings from " + str);
            }
        }

        public void map(IntWritable intWritable, WeightedIntDocVector weightedIntDocVector, OutputCollector<IntWritable, MinhashSignature> outputCollector, Reporter reporter) throws IOException {
            HMapIFW weightedTerms = weightedIntDocVector.getWeightedTerms();
            signature.clear();
            for (int i = 0; i < randomOrderings.size(); i++) {
                signature.add(getMinHashTerm(weightedTerms, (ArrayListOfIntsWritable) randomOrderings.get(i)));
            }
            ComputeSignaturesMinhash.sLogger.debug("Doc vector " + weightedTerms + " mapped to \nBitsSignature: " + intWritable + "\n" + signature);
            outputCollector.collect(intWritable, signature);
        }

        private int getMinHashTerm(HMapIFW hMapIFW, ArrayListOfIntsWritable arrayListOfIntsWritable) {
            for (int i = 0; i < arrayListOfIntsWritable.size(); i++) {
                int i2 = arrayListOfIntsWritable.get(i);
                if (hMapIFW.containsKey(i2)) {
                    return i2;
                }
            }
            throw new RuntimeException("No terms in doc vector. Something is wrong!");
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((IntWritable) obj, (WeightedIntDocVector) obj2, (OutputCollector<IntWritable, MinhashSignature>) outputCollector, reporter);
        }
    }

    static {
        sLogger.setLevel(Level.INFO);
    }

    public ComputeSignaturesMinhash(Configuration configuration) {
        super(configuration);
    }

    private static int printUsage() {
        System.out.println("usage: [index-path] [num-of-bits] [type-of-computation] ([batch-size])");
        return -1;
    }

    public int runTool() throws Exception {
        Configuration conf = getConf();
        int i = conf.getInt("Ivory.NumOfBits", -1);
        int i2 = conf.getInt("NumBatch", 0);
        boolean z = i2 != 0;
        String str = conf.get(Constants.IndexPath);
        if (i < 0 || i2 < 0) {
            throw new RuntimeException("Parameters not read properly! Quitting...");
        }
        JobConf jobConf = new JobConf(conf, ComputeSignaturesMinhash.class);
        FileSystem fileSystem = FileSystem.get(jobConf);
        int readCollectionTermCount = new RetrievalEnvironment(str, fileSystem).readCollectionTermCount();
        jobConf.setJobName("ComputeSignatures_minhash");
        String intDocvectorsFile = PwsimEnvironment.getIntDocvectorsFile(str, fileSystem);
        String signaturesDir = PwsimEnvironment.getSignaturesDir(str, i, "minhash");
        if (fileSystem.exists(new Path(signaturesDir))) {
            sLogger.info("Signatures output path already exists! Quitting...");
            return 0;
        }
        String permutationsFile = PwsimEnvironment.getPermutationsFile(str, fileSystem, readCollectionTermCount, i);
        if (fileSystem.exists(new Path(permutationsFile))) {
            sLogger.info("Random permutations output path already exists!");
        } else {
            Permutation.writeToFile(new PermutationByBit(readCollectionTermCount), i, fileSystem, jobConf, permutationsFile);
        }
        DistributedCache.addCacheFile(new URI(permutationsFile), jobConf);
        sLogger.info("Computing signatures...");
        sLogger.info("Type of computation: Minhash");
        sLogger.info("Total number of ints: " + i);
        sLogger.info("random perms file: " + permutationsFile);
        sLogger.info("InputPath: " + intDocvectorsFile);
        sLogger.info("outputPath: " + signaturesDir);
        sLogger.info("Batch?: " + z);
        FileInputFormat.setInputPaths(jobConf, new Path[]{new Path(intDocvectorsFile)});
        FileOutputFormat.setOutputPath(jobConf, new Path(signaturesDir));
        FileOutputFormat.setCompressOutput(jobConf, false);
        jobConf.set("mapred.child.java.opts", "-Xmx2048m");
        jobConf.setInt("mapred.map.max.attempts", 10);
        jobConf.setInt("mapred.reduce.max.attempts", 10);
        jobConf.setInt("mapred.task.timeout", 6000000);
        jobConf.setNumMapTasks(300);
        jobConf.setInputFormat(SequenceFileInputFormat.class);
        jobConf.setMapOutputKeyClass(IntWritable.class);
        jobConf.setMapOutputValueClass(MinhashSignature.class);
        jobConf.setOutputKeyClass(IntWritable.class);
        jobConf.setOutputValueClass(MinhashSignature.class);
        jobConf.setMapperClass(MyMapper.class);
        jobConf.setOutputFormat(SequenceFileOutputFormat.class);
        if (z) {
            jobConf.setNumReduceTasks(i2);
            jobConf.setReducerClass(IdentityReducer.class);
        } else {
            jobConf.setNumReduceTasks(0);
        }
        long currentTimeMillis = System.currentTimeMillis();
        JobClient.runJob(jobConf);
        System.out.println("Job finished in " + ((System.currentTimeMillis() - currentTimeMillis) / 1000.0d) + " seconds");
        return 0;
    }

    public String[] getRequiredParameters() {
        return new String[0];
    }
}
