package ivory.ptc;

import edu.umd.cloud9.io.array.ArrayListOfIntsWritable;
import edu.umd.cloud9.io.array.ArrayListWritable;
import edu.umd.cloud9.util.PowerTool;
import edu.umd.cloud9.webgraph.data.AnchorText;
import ivory.core.Constants;
import ivory.ptc.data.AnchorTextTarget;
import ivory.ptc.judgments.weighting.WeightingScheme;
import java.io.IOException;
import java.net.URI;
import java.util.Collections;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;

/* loaded from: input_file:ivory/ptc/AnchorTextInvertedIndex.class */
public class AnchorTextInvertedIndex extends PowerTool {
    private static final Logger LOG = Logger.getLogger(AnchorTextInvertedIndex.class);
    public static final String PARAMETER_SEPARATER = ",";
    public static final String[] RequiredParameters;

    /* loaded from: input_file:ivory/ptc/AnchorTextInvertedIndex$MyMapper.class */
    private static class MyMapper extends MapReduceBase implements Mapper<IntWritable, ArrayListWritable<AnchorText>, Text, AnchorTextTarget> {
        private static final AnchorTextTarget anchorTextTarget = new AnchorTextTarget();
        private static final Text keyOut = new Text();
        private static WeightingScheme weightingScheme;

        private MyMapper() {
        }

        public void configure(JobConf jobConf) {
            try {
                Path[] localCacheFiles = DistributedCache.getLocalCacheFiles(jobConf);
                String[] strArr = new String[localCacheFiles.length];
                for (int i = 0; i < strArr.length; i++) {
                    strArr[i] = localCacheFiles[i].toString();
                }
                try {
                    weightingScheme = (WeightingScheme) Class.forName(jobConf.get("Ivory.WeightingScheme")).newInstance();
                    weightingScheme.initialize(FileSystem.getLocal(jobConf), strArr);
                } catch (Exception e) {
                    throw new RuntimeException("Mapper failed to initialize the weighting scheme: " + jobConf.get("Ivory.WeightingScheme") + " with parameters: " + jobConf.get("Ivory.WeightingSchemeParameters"));
                }
            } catch (IOException e2) {
                throw new RuntimeException("Local cache files not read properly.");
            }
        }

        public void map(IntWritable intWritable, ArrayListWritable<AnchorText> arrayListWritable, OutputCollector<Text, AnchorTextTarget> outputCollector, Reporter reporter) throws IOException {
            anchorTextTarget.setTarget(intWritable.get());
            Iterator it = arrayListWritable.iterator();
            while (it.hasNext()) {
                AnchorText anchorText = (AnchorText) it.next();
                if (anchorText.isExternalInLink() || anchorText.isInternalInLink()) {
                    keyOut.set(anchorText.getText());
                    anchorTextTarget.setSources(new ArrayListOfIntsWritable(anchorText.getDocuments()));
                    anchorTextTarget.setWeight(weightingScheme.getWeight(intWritable.get(), anchorText));
                    outputCollector.collect(keyOut, anchorTextTarget);
                }
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((IntWritable) obj, (ArrayListWritable<AnchorText>) obj2, (OutputCollector<Text, AnchorTextTarget>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:ivory/ptc/AnchorTextInvertedIndex$MyReducer.class */
    private static class MyReducer extends MapReduceBase implements Reducer<Text, AnchorTextTarget, Text, ArrayListWritable<AnchorTextTarget>> {
        private static final ArrayListWritable<AnchorTextTarget> outList = new ArrayListWritable<>();

        private MyReducer() {
        }

        public void reduce(Text text, Iterator<AnchorTextTarget> it, OutputCollector<Text, ArrayListWritable<AnchorTextTarget>> outputCollector, Reporter reporter) throws IOException {
            outList.clear();
            while (it.hasNext()) {
                outList.add(new AnchorTextTarget(it.next()));
            }
            Collections.sort(outList);
            outputCollector.collect(text, outList);
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((Text) obj, (Iterator<AnchorTextTarget>) it, (OutputCollector<Text, ArrayListWritable<AnchorTextTarget>>) outputCollector, reporter);
        }
    }

    public String[] getRequiredParameters() {
        return RequiredParameters;
    }

    public AnchorTextInvertedIndex(Configuration configuration) {
        super(configuration);
    }

    public int runTool() throws Exception {
        JobConf jobConf = new JobConf(getConf(), AnchorTextInvertedIndex.class);
        FileSystem fileSystem = FileSystem.get(jobConf);
        String str = jobConf.get("Ivory.InputPath");
        String str2 = jobConf.get("Ivory.OutputPath");
        Path path = new Path(str);
        Path path2 = new Path(str2);
        int i = jobConf.getInt(Constants.NumMapTasks, 1);
        int i2 = jobConf.getInt(Constants.NumReduceTasks, 100);
        String str3 = jobConf.get("Ivory.WeightingSchemeParameters");
        LOG.info("BuildAnchorTextInvertedIndex");
        LOG.info(" - input path: " + str);
        LOG.info(" - output path: " + str2);
        LOG.info(" - number of reducers: " + i2);
        LOG.info(" - weighting scheme: " + jobConf.get("Ivory.WeightingScheme"));
        LOG.info(" - weighting scheme parameters: " + str3);
        for (String str4 : str3.split(",")) {
            DistributedCache.addCacheFile(new URI(str4), jobConf);
        }
        jobConf.setJobName("BuildAnchorTextInvertedIndex");
        jobConf.setNumMapTasks(i);
        jobConf.setNumReduceTasks(i2);
        jobConf.set("mapred.child.java.opts", "-Xmx4096m");
        jobConf.setInt("mapred.task.timeout", 60000000);
        FileInputFormat.setInputPaths(jobConf, new Path[]{path});
        FileOutputFormat.setOutputPath(jobConf, path2);
        jobConf.setInputFormat(SequenceFileInputFormat.class);
        jobConf.setOutputFormat(SequenceFileOutputFormat.class);
        jobConf.setMapOutputKeyClass(Text.class);
        jobConf.setMapOutputValueClass(AnchorTextTarget.class);
        jobConf.setOutputKeyClass(Text.class);
        jobConf.setOutputValueClass(ArrayListWritable.class);
        jobConf.setMapperClass(MyMapper.class);
        jobConf.setReducerClass(MyReducer.class);
        fileSystem.delete(path2);
        JobClient.runJob(jobConf);
        return 0;
    }

    static {
        LOG.setLevel(Level.INFO);
        RequiredParameters = new String[]{Constants.NumMapTasks, Constants.NumReduceTasks, "Ivory.InputPath", "Ivory.OutputPath", "Ivory.WeightingScheme", "Ivory.WeightingSchemeParameters"};
    }
}
