package ivory.lsh.eval;

import edu.umd.cloud9.collection.wikipedia.WikipediaPage;
import edu.umd.cloud9.io.array.ArrayListOfIntsWritable;
import edu.umd.cloud9.io.map.HMapIIW;
import edu.umd.cloud9.io.pair.PairOfIntString;
import edu.umd.cloud9.io.pair.PairOfInts;
import edu.umd.cloud9.util.map.HMapIV;
import java.io.IOException;
import java.net.URI;
import java.util.Iterator;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.LineReader;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;

/* loaded from: input_file:ivory/lsh/eval/Docnos2Titles.class */
public class Docnos2Titles extends Configured implements Tool {
    private static final Logger sLogger = Logger.getLogger(Docnos2Titles.class);
    private static Options options;
    private static final String FCOLLECTION_OPTION = "f_collection";
    private static final String ECOLLECTION_OPTION = "e_collection";
    private static final String FLANG_OPTION = "f_lang";
    private static final String ELANG_OPTION = "e_lang";
    private static final String PWSIM_OPTION = "pwsim_output";
    private static final String OUTPUT_PATH_OPTION = "output";
    private static final String SAMPLEDOCNOS_OPTION = "docnos";
    private static final String LIBJARS_OPTION = "libjars";

    /* loaded from: input_file:ivory/lsh/eval/Docnos2Titles$MyMapper.class */
    private static class MyMapper extends MapReduceBase implements Mapper<IntWritable, WikipediaPage, PairOfInts, PairOfIntString> {
        private HMapIV<ArrayListOfIntsWritable> pwsimMapping;
        private JobConf mJob;
        private ArrayListOfIntsWritable similarDocnos;
        private String srcLang;
        private PairOfIntString valOut;
        private PairOfInts keyOut;
        private HMapIIW samplesMap = null;

        private MyMapper() {
        }

        public void configure(JobConf jobConf) {
            Docnos2Titles.sLogger.setLevel(Level.INFO);
            this.srcLang = jobConf.get("fLang");
            this.mJob = jobConf;
            this.pwsimMapping = new HMapIV<>();
            this.valOut = new PairOfIntString();
            this.keyOut = new PairOfInts();
            String str = jobConf.get("Ivory.SampleFile");
            if (str != null) {
                try {
                    this.samplesMap = readSamplesFromCache(getFilename(str), jobConf);
                } catch (IOException e) {
                    e.printStackTrace();
                    throw new RuntimeException("I/O error in " + str);
                } catch (NumberFormatException e2) {
                    e2.printStackTrace();
                    throw new RuntimeException("Incorrect format in " + str);
                } catch (Exception e3) {
                    e3.printStackTrace();
                    throw new RuntimeException("Error reading sample file: " + str);
                }
            }
        }

        private static String getFilename(String str) {
            return str.substring(str.lastIndexOf("/") + 1);
        }

        private static void loadPairs(HMapIV<ArrayListOfIntsWritable> hMapIV, int i, JobConf jobConf, Reporter reporter) {
            try {
                Path[] localCacheFiles = DistributedCache.getLocalCacheFiles(jobConf);
                String str = jobConf.get("PwsimPairs");
                for (Path path : localCacheFiles) {
                    if (path.toString().contains(getFilename(str))) {
                        SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.getLocal(jobConf), path, jobConf);
                        PairOfInts pairOfInts = (PairOfInts) reader.getKeyClass().newInstance();
                        int i2 = 0;
                        for (IntWritable intWritable = (IntWritable) reader.getValueClass().newInstance(); reader.next(pairOfInts, intWritable); intWritable = (IntWritable) reader.getValueClass().newInstance()) {
                            int rightElement = pairOfInts.getRightElement();
                            int leftElement = pairOfInts.getLeftElement();
                            if ((leftElement == 6127 && rightElement == 1000000074) || (leftElement == 6127 && rightElement == 1000000071)) {
                                Docnos2Titles.sLogger.info(pairOfInts);
                            }
                            if (i == -1) {
                                if (!hMapIV.containsKey(leftElement)) {
                                    hMapIV.put(leftElement, new ArrayListOfIntsWritable());
                                }
                                ((ArrayListOfIntsWritable) hMapIV.get(leftElement)).add(rightElement);
                            } else {
                                if (!hMapIV.containsKey(rightElement)) {
                                    hMapIV.put(rightElement, new ArrayListOfIntsWritable());
                                }
                                ((ArrayListOfIntsWritable) hMapIV.get(rightElement)).add(leftElement);
                            }
                            i2++;
                            pairOfInts = (PairOfInts) reader.getKeyClass().newInstance();
                        }
                        reader.close();
                        Docnos2Titles.sLogger.info(hMapIV.size() + "," + i2 + " pairs loaded from " + path);
                    }
                }
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }

        private HMapIIW readSamplesFromCache(String str, JobConf jobConf) throws IOException {
            HMapIIW hMapIIW = null;
            for (Path path : DistributedCache.getLocalCacheFiles(jobConf)) {
                if (path.toString().contains(str)) {
                    hMapIIW = new HMapIIW();
                    LineReader lineReader = new LineReader(FileSystem.getLocal(jobConf).open(path));
                    Text text = new Text();
                    while (lineReader.readLine(text) != 0) {
                        int parseInt = Integer.parseInt(text.toString());
                        Docnos2Titles.sLogger.info(parseInt + " --> sample");
                        hMapIIW.put(parseInt, 1);
                    }
                    lineReader.close();
                    Docnos2Titles.sLogger.info(hMapIIW.size() + " sampled");
                }
            }
            if (hMapIIW == null) {
                throw new RuntimeException("Not found in local cache: " + str);
            }
            return hMapIIW;
        }

        public void map(IntWritable intWritable, WikipediaPage wikipediaPage, OutputCollector<PairOfInts, PairOfIntString> outputCollector, Reporter reporter) throws IOException {
            int i = intWritable.get();
            String title = wikipediaPage.getTitle();
            int i2 = wikipediaPage.getLanguage().equals(this.srcLang) ? 1 : -1;
            if (i2 == 1) {
                i += 1000000000;
                if (this.samplesMap != null && !this.samplesMap.containsKey(i)) {
                    return;
                }
            }
            if (this.pwsimMapping.isEmpty()) {
                loadPairs(this.pwsimMapping, i2, this.mJob, reporter);
                Docnos2Titles.sLogger.info("Mapping loaded: " + this.pwsimMapping.size());
            }
            if (this.pwsimMapping.containsKey(i)) {
                this.similarDocnos = (ArrayListOfIntsWritable) this.pwsimMapping.get(i);
                Iterator it = this.similarDocnos.iterator();
                while (it.hasNext()) {
                    int intValue = ((Integer) it.next()).intValue();
                    if (i2 != -1) {
                        this.keyOut.set(i, intValue);
                    } else if (this.samplesMap == null || this.samplesMap.containsKey(intValue)) {
                        this.keyOut.set(intValue, i);
                    }
                    this.valOut.set(i2, title);
                    outputCollector.collect(this.keyOut, this.valOut);
                }
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((IntWritable) obj, (WikipediaPage) obj2, (OutputCollector<PairOfInts, PairOfIntString>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:ivory/lsh/eval/Docnos2Titles$MyReducer.class */
    private static class MyReducer extends MapReduceBase implements Reducer<PairOfInts, PairOfIntString, Text, Text> {
        private Text fTitle;
        private Text eTitle;

        private MyReducer() {
        }

        public void configure(JobConf jobConf) {
            this.fTitle = new Text();
            this.eTitle = new Text();
        }

        public void reduce(PairOfInts pairOfInts, Iterator<PairOfIntString> it, OutputCollector<Text, Text> outputCollector, Reporter reporter) throws IOException {
            this.eTitle.clear();
            this.fTitle.clear();
            Docnos2Titles.sLogger.info(pairOfInts);
            int i = 0;
            while (it.hasNext()) {
                PairOfIntString next = it.next();
                Docnos2Titles.sLogger.info(next);
                if (next.getLeftElement() == -1) {
                    this.eTitle.set(next.getRightElement());
                    i++;
                } else {
                    if (next.getLeftElement() != 1) {
                        throw new RuntimeException("Unknown language ID: " + next.getLeftElement());
                    }
                    this.fTitle.set(next.getRightElement());
                    i++;
                }
            }
            if (i == 2) {
                outputCollector.collect(this.fTitle, this.eTitle);
            } else {
                Docnos2Titles.sLogger.info("Incomplete data for " + pairOfInts + ":" + this.fTitle + "," + this.eTitle);
            }
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((PairOfInts) obj, (Iterator<PairOfIntString>) it, (OutputCollector<Text, Text>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:ivory/lsh/eval/Docnos2Titles$Pairs.class */
    enum Pairs {
        COUNT,
        COUNT2,
        COUNT3,
        COUNTE,
        COUNTF,
        COUNT4,
        COUNT3x
    }

    private static void printUsage() {
        new HelpFormatter().printHelp("Docnos2Titles", options);
        System.exit(-1);
    }

    public int run(String[] strArr) throws Exception {
        JobConf jobConf = new JobConf(getConf(), Docnos2Titles.class);
        CommandLine parseArgs = parseArgs(strArr);
        if (parseArgs == null) {
            printUsage();
        }
        String optionValue = parseArgs.getOptionValue(ECOLLECTION_OPTION);
        String optionValue2 = parseArgs.getOptionValue(FCOLLECTION_OPTION);
        String optionValue3 = parseArgs.getOptionValue(PWSIM_OPTION);
        String optionValue4 = parseArgs.getOptionValue("output");
        String optionValue5 = parseArgs.getOptionValue(ELANG_OPTION);
        String optionValue6 = parseArgs.getOptionValue(FLANG_OPTION);
        String optionValue7 = parseArgs.getOptionValue(SAMPLEDOCNOS_OPTION);
        jobConf.setJobName("Docnos2Titles_" + optionValue6 + "-" + optionValue5);
        FileInputFormat.addInputPaths(jobConf, optionValue);
        FileInputFormat.addInputPaths(jobConf, optionValue2);
        FileOutputFormat.setOutputPath(jobConf, new Path(optionValue4));
        DistributedCache.addCacheFile(new URI(optionValue3), jobConf);
        DistributedCache.addCacheFile(new URI(optionValue7), jobConf);
        jobConf.set("eLang", optionValue5);
        jobConf.set("fLang", optionValue6);
        jobConf.set("PwsimPairs", optionValue3);
        jobConf.set("Ivory.SampleFile", optionValue7);
        jobConf.setInt("mapred.task.timeout", 60000000);
        jobConf.set("mapreduce.map.memory.mb", "3000");
        jobConf.set("mapreduce.map.java.opts", "-Xmx3000m");
        jobConf.setBoolean("mapred.map.tasks.speculative.execution", false);
        jobConf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
        jobConf.setNumMapTasks(100);
        jobConf.setNumReduceTasks(1);
        jobConf.setInt("mapred.min.split.size", 2000000000);
        jobConf.setFloat("mapred.reduce.slowstart.completed.maps", 0.9f);
        jobConf.setInputFormat(SequenceFileInputFormat.class);
        jobConf.setOutputFormat(TextOutputFormat.class);
        jobConf.setMapOutputKeyClass(PairOfInts.class);
        jobConf.setMapOutputValueClass(PairOfIntString.class);
        jobConf.setOutputKeyClass(Text.class);
        jobConf.setOutputValueClass(Text.class);
        jobConf.setMapperClass(MyMapper.class);
        jobConf.setReducerClass(MyReducer.class);
        sLogger.info("Running job " + jobConf.getJobName() + "...");
        sLogger.info("E-collection path: " + optionValue);
        sLogger.info("F-collection path: " + optionValue2);
        sLogger.info("Pwsim output path: " + optionValue3);
        sLogger.info("Output path: " + optionValue4);
        sLogger.info("Sample file?: " + (optionValue7 != null ? optionValue7 : "none"));
        long currentTimeMillis = System.currentTimeMillis();
        JobClient.runJob(jobConf);
        System.out.println("Job finished in " + ((System.currentTimeMillis() - currentTimeMillis) / 1000.0d) + " seconds");
        return 0;
    }

    private CommandLine parseArgs(String[] strArr) throws Exception {
        options = new Options();
        Options options2 = options;
        OptionBuilder.withDescription("path to output of pwsim algorithm");
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired();
        options2.addOption(OptionBuilder.create(PWSIM_OPTION));
        Options options3 = options;
        OptionBuilder.withDescription("path to output");
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired();
        options3.addOption(OptionBuilder.create("output"));
        Options options4 = options;
        OptionBuilder.withDescription("source-side raw collection path");
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired();
        options4.addOption(OptionBuilder.create(FCOLLECTION_OPTION));
        Options options5 = options;
        OptionBuilder.withDescription("target-side raw collection path");
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired();
        options5.addOption(OptionBuilder.create(ECOLLECTION_OPTION));
        Options options6 = options;
        OptionBuilder.withDescription("two-letter code for f-language");
        OptionBuilder.withArgName("en|de|tr|cs|zh|ar|es");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired();
        options6.addOption(OptionBuilder.create(FLANG_OPTION));
        Options options7 = options;
        OptionBuilder.withDescription("two-letter code for e-language");
        OptionBuilder.withArgName("en|de|tr|cs|zh|ar|es");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired();
        options7.addOption(OptionBuilder.create(ELANG_OPTION));
        Options options8 = options;
        OptionBuilder.withDescription("only keep pairs that match these docnos");
        OptionBuilder.withArgName("path to sample docnos file");
        OptionBuilder.hasArg();
        options8.addOption(OptionBuilder.create(SAMPLEDOCNOS_OPTION));
        Options options9 = options;
        OptionBuilder.withDescription("Hadoop option to load external jars");
        OptionBuilder.withArgName("jar packages");
        OptionBuilder.hasArg();
        options9.addOption(OptionBuilder.create(LIBJARS_OPTION));
        try {
            return new GnuParser().parse(options, strArr);
        } catch (ParseException e) {
            System.err.println("Error parsing command line: " + e.getMessage());
            return null;
        }
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(ToolRunner.run(new Docnos2Titles(), strArr));
    }
}
