package edu.umd.cloud9.webgraph;

import edu.umd.cloud9.io.array.ArrayListWritable;
import edu.umd.cloud9.util.PowerTool;
import edu.umd.cloud9.util.array.ArrayListOfInts;
import edu.umd.cloud9.webgraph.data.AnchorText;
import edu.umd.cloud9.webgraph.data.AnchorTextConstants;
import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.lib.IdentityMapper;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/umd/cloud9/webgraph/BuildReverseWebGraph.class */
public class BuildReverseWebGraph extends PowerTool {
    private static final Logger LOG = Logger.getLogger(BuildReverseWebGraph.class);
    public static final String[] RequiredParameters = {"Cloud9.InputPath", "Cloud9.OutputPath", "Cloud9.Mappers", "Cloud9.Reducers"};

    /* loaded from: input_file:edu/umd/cloud9/webgraph/BuildReverseWebGraph$Reduce.class */
    public static class Reduce extends MapReduceBase implements Reducer<Text, ArrayListWritable<AnchorText>, IntWritable, ArrayListWritable<AnchorText>> {
        private static ArrayListWritable<AnchorText> packet;
        private static boolean pushed;
        private int indegree;
        private static final IntWritable keyWord = new IntWritable();
        private static final ArrayListWritable<AnchorText> arrayList = new ArrayListWritable<>();
        private static final ArrayListOfInts docnos = new ArrayListOfInts();

        /* JADX WARN: Multi-variable type inference failed */
        public void reduce(Text text, Iterator<ArrayListWritable<AnchorText>> it, OutputCollector<IntWritable, ArrayListWritable<AnchorText>> outputCollector, Reporter reporter) throws IOException {
            docnos.clear();
            arrayList.clear();
            this.indegree = 0;
            while (it.hasNext()) {
                packet = it.next();
                Iterator<E> it2 = packet.iterator();
                while (it2.hasNext()) {
                    AnchorText anchorText = (AnchorText) it2.next();
                    if (anchorText.isDocnoField()) {
                        Iterator<Integer> it3 = anchorText.iterator();
                        while (it3.hasNext()) {
                            docnos.add(it3.next().intValue());
                        }
                    } else {
                        pushed = false;
                        this.indegree += anchorText.getSize();
                        int i = 0;
                        while (true) {
                            if (i >= arrayList.size()) {
                                break;
                            }
                            if (((AnchorText) arrayList.get(i)).equalsIgnoreSources(anchorText)) {
                                ((AnchorText) arrayList.get(i)).addDocumentsFrom(anchorText);
                                pushed = true;
                                break;
                            }
                            i++;
                        }
                        if (!pushed) {
                            arrayList.add(anchorText.m303clone());
                        }
                    }
                }
            }
            arrayList.add(new AnchorText(AnchorTextConstants.Type.IN_DEGREE.val, "", this.indegree));
            arrayList.add(new AnchorText(AnchorTextConstants.Type.URL_FIELD.val, text.toString()));
            Collections.sort(arrayList);
            Iterator<Integer> it4 = docnos.iterator();
            while (it4.hasNext()) {
                keyWord.set(it4.next().intValue());
                outputCollector.collect(keyWord, arrayList);
            }
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((Text) obj, (Iterator<ArrayListWritable<AnchorText>>) it, (OutputCollector<IntWritable, ArrayListWritable<AnchorText>>) outputCollector, reporter);
        }
    }

    @Override // edu.umd.cloud9.util.PowerTool
    public String[] getRequiredParameters() {
        return RequiredParameters;
    }

    public BuildReverseWebGraph(Configuration configuration) {
        super(configuration);
    }

    @Override // edu.umd.cloud9.util.PowerTool
    public int runTool() throws Exception {
        JobConf jobConf = new JobConf(getConf(), BuildReverseWebGraph.class);
        FileSystem fileSystem = FileSystem.get(jobConf);
        int i = jobConf.getInt("Cloud9.Mappers", 1);
        int i2 = jobConf.getInt("Cloud9.Reducers", DriverUtil.DEFAULT_REDUCERS);
        String str = jobConf.get("Cloud9.InputPath");
        String str2 = jobConf.get("Cloud9.OutputPath");
        jobConf.setJobName("ReverseWebGraph");
        jobConf.set("mapred.child.java.opts", "-Xmx4096m");
        jobConf.setInt("mapred.task.timeout", 60000000);
        jobConf.setNumMapTasks(i);
        jobConf.setNumReduceTasks(i2);
        jobConf.setMapperClass(IdentityMapper.class);
        jobConf.setReducerClass(Reduce.class);
        jobConf.setOutputKeyClass(IntWritable.class);
        jobConf.setOutputValueClass(ArrayListWritable.class);
        jobConf.setMapOutputKeyClass(Text.class);
        jobConf.setMapOutputValueClass(ArrayListWritable.class);
        jobConf.setInputFormat(SequenceFileInputFormat.class);
        jobConf.setOutputFormat(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setCompressOutput(jobConf, true);
        SequenceFileOutputFormat.setOutputCompressionType(jobConf, SequenceFile.CompressionType.BLOCK);
        SequenceFileInputFormat.setInputPaths(jobConf, str);
        FileOutputFormat.setOutputPath(jobConf, new Path(str2));
        LOG.info("BuildReverseWebGraph");
        LOG.info(" - input path: " + str);
        LOG.info(" - output path: " + str2);
        if (fileSystem.exists(new Path(str2))) {
            LOG.info(str2 + " already exists! Skipping this step...");
            return 0;
        }
        JobClient.runJob(jobConf);
        return 0;
    }
}
