package ivory.integration.wikipedia;

import edu.umd.cloud9.io.map.HMapSFW;
import ivory.core.data.document.WeightedIntDocVector;
import java.io.IOException;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;

/* loaded from: input_file:ivory/integration/wikipedia/SearchSequenceFiles.class */
public class SearchSequenceFiles extends Configured implements Tool {
    private static final Logger LOG = Logger.getLogger(SearchSequenceFiles.class);
    private static Options options;
    private static final String IN_OPTION = "input";
    private static final String OUT_OPTION = "output";
    private static final String VALUECLASS_OPTION = "valueclass";
    private static final String KEYS_OPTION = "keys";
    private static final String LIBJARS_OPTION = "libjars";

    /* loaded from: input_file:ivory/integration/wikipedia/SearchSequenceFiles$MyMapperInt.class */
    static class MyMapperInt extends MapReduceBase implements Mapper<IntWritable, WeightedIntDocVector, IntWritable, WeightedIntDocVector> {
        private String[] keys;

        MyMapperInt() {
        }

        public void configure(JobConf jobConf) {
            this.keys = jobConf.get(SearchSequenceFiles.KEYS_OPTION).split(",");
        }

        public void map(IntWritable intWritable, WeightedIntDocVector weightedIntDocVector, OutputCollector<IntWritable, WeightedIntDocVector> outputCollector, Reporter reporter) throws IOException {
            for (String str : this.keys) {
                if (Integer.parseInt(str) == intWritable.get()) {
                    outputCollector.collect(intWritable, weightedIntDocVector);
                }
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((IntWritable) obj, (WeightedIntDocVector) obj2, (OutputCollector<IntWritable, WeightedIntDocVector>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:ivory/integration/wikipedia/SearchSequenceFiles$MyMapperTerm.class */
    static class MyMapperTerm extends MapReduceBase implements Mapper<IntWritable, HMapSFW, IntWritable, HMapSFW> {
        private String[] keys;

        MyMapperTerm() {
        }

        public void configure(JobConf jobConf) {
            this.keys = jobConf.get(SearchSequenceFiles.KEYS_OPTION).split(",");
        }

        public void map(IntWritable intWritable, HMapSFW hMapSFW, OutputCollector<IntWritable, HMapSFW> outputCollector, Reporter reporter) throws IOException {
            for (String str : this.keys) {
                if (Integer.parseInt(str) == intWritable.get()) {
                    outputCollector.collect(intWritable, hMapSFW);
                }
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((IntWritable) obj, (HMapSFW) obj2, (OutputCollector<IntWritable, HMapSFW>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:ivory/integration/wikipedia/SearchSequenceFiles$mapoutput.class */
    enum mapoutput {
        count
    }

    private static void printUsage() {
        new HelpFormatter().printHelp("SearchSequenceFiles", options);
        System.exit(-1);
    }

    public int run(String[] strArr) throws Exception {
        CommandLine parseArgs = parseArgs(strArr);
        if (parseArgs == null) {
            printUsage();
            System.exit(-1);
        }
        String optionValue = parseArgs.getOptionValue(IN_OPTION);
        String optionValue2 = parseArgs.getOptionValue("output");
        String optionValue3 = parseArgs.getOptionValue(KEYS_OPTION);
        String optionValue4 = parseArgs.getOptionValue(VALUECLASS_OPTION);
        JobConf jobConf = new JobConf(getConf(), SearchSequenceFiles.class);
        jobConf.setJobName("SearchSequenceFiles");
        FileSystem.get(jobConf).delete(new Path(optionValue2), true);
        FileInputFormat.setInputPaths(jobConf, optionValue);
        FileOutputFormat.setOutputPath(jobConf, new Path(optionValue2));
        FileOutputFormat.setCompressOutput(jobConf, false);
        jobConf.set("mapred.child.java.opts", "-Xmx2048m");
        jobConf.setInt("mapred.map.max.attempts", 100);
        jobConf.setInt("mapred.reduce.max.attempts", 100);
        jobConf.setInt("mapred.task.timeout", 600000000);
        jobConf.set(KEYS_OPTION, optionValue3);
        LOG.setLevel(Level.INFO);
        LOG.info("Running job " + jobConf.getJobName());
        LOG.info("Input directory: " + optionValue);
        LOG.info("Output directory: " + optionValue2);
        LOG.info("Value class: " + optionValue4);
        if (optionValue4.contains("HMapSFW")) {
            jobConf.setMapperClass(MyMapperTerm.class);
            jobConf.setMapOutputValueClass(HMapSFW.class);
            jobConf.setOutputValueClass(HMapSFW.class);
        } else {
            jobConf.setMapperClass(MyMapperInt.class);
            jobConf.setMapOutputValueClass(WeightedIntDocVector.class);
            jobConf.setOutputValueClass(WeightedIntDocVector.class);
        }
        jobConf.setReducerClass(IdentityReducer.class);
        jobConf.setMapOutputKeyClass(IntWritable.class);
        jobConf.setOutputKeyClass(IntWritable.class);
        jobConf.setNumReduceTasks(1);
        jobConf.setInputFormat(SequenceFileInputFormat.class);
        jobConf.setOutputFormat(SequenceFileOutputFormat.class);
        JobClient.runJob(jobConf);
        return 0;
    }

    private static CommandLine parseArgs(String[] strArr) {
        options = new Options();
        Options options2 = options;
        OptionBuilder.withDescription("path to input <IntWritable, V> SequenceFiles");
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired();
        options2.addOption(OptionBuilder.create(IN_OPTION));
        Options options3 = options;
        OptionBuilder.withDescription("path to output");
        OptionBuilder.withArgName("path");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired();
        options3.addOption(OptionBuilder.create("output"));
        Options options4 = options;
        OptionBuilder.withDescription("Class of Value objects in SequenceFiles");
        OptionBuilder.withArgName("class");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired();
        options4.addOption(OptionBuilder.create(VALUECLASS_OPTION));
        Options options5 = options;
        OptionBuilder.withDescription("Integer keys to output, comma-separated");
        OptionBuilder.withArgName("comma-separated integers");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired();
        options5.addOption(OptionBuilder.create(KEYS_OPTION));
        Options options6 = options;
        OptionBuilder.withDescription("Hadoop option to load external jars");
        OptionBuilder.withArgName("jar packages");
        OptionBuilder.hasArg();
        options6.addOption(OptionBuilder.create(LIBJARS_OPTION));
        try {
            return new GnuParser().parse(options, strArr);
        } catch (ParseException e) {
            System.err.println("Error parsing command line: " + e.getMessage());
            return null;
        }
    }

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new SearchSequenceFiles(), strArr);
    }
}
