package edu.umd.cloud9.webgraph.driver;

import edu.umd.cloud9.webgraph.BuildReverseWebGraph;
import edu.umd.cloud9.webgraph.BuildWebGraph;
import edu.umd.cloud9.webgraph.CollectHostnames;
import edu.umd.cloud9.webgraph.CollectionConfigurationManager;
import edu.umd.cloud9.webgraph.ComputeWeight;
import edu.umd.cloud9.webgraph.DriverUtil;
import edu.umd.cloud9.webgraph.TrecExtractLinks;
import edu.umd.cloud9.webgraph.normalizer.AnchorTextNormalizer;
import java.io.File;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/* loaded from: input_file:edu/umd/cloud9/webgraph/driver/TrecDriver.class */
public class TrecDriver extends Configured implements Tool {
    private String inputBase;
    private String outputBase;
    private boolean includeInternalLinks = false;
    private boolean computeAnchorWeights = false;
    private String normalizer = "edu.umd.cloud9.webgraph.normalizer.AnchorTextBasicNormalizer";
    private String filtername = null;
    private Configuration conf;
    private CollectionConfigurationManager configer;

    public int run(String[] strArr) throws Exception {
        this.conf = getConf();
        this.configer = new CollectionConfigurationManager();
        if (!readInput(strArr)) {
            printUsage();
            return -1;
        }
        this.configer.applyConfig(this.conf);
        this.conf.setInt("Cloud9.Mappers", 2000);
        this.conf.setInt("Cloud9.Reducers", DriverUtil.DEFAULT_REDUCERS);
        this.conf.setBoolean("Cloud9.IncludeInternalLinks", this.includeInternalLinks);
        this.conf.set("Cloud9.AnchorTextNormalizer", this.normalizer);
        String str = this.inputBase;
        String str2 = String.valueOf(this.outputBase) + "/" + DriverUtil.OUTPUT_EXTRACT_LINKS;
        this.conf.set("Cloud9.InputPath", str);
        this.conf.set("Cloud9.OutputPath", str2);
        if (new TrecExtractLinks(this.conf, this.configer).run() != 0) {
            return -1;
        }
        String str3 = String.valueOf(this.outputBase) + "/" + DriverUtil.OUTPUT_EXTRACT_LINKS;
        String str4 = String.valueOf(this.outputBase) + "/" + DriverUtil.OUTPUT_REVERSE_WEBGRAPH + "/";
        this.conf.set("Cloud9.InputPath", str3);
        this.conf.set("Cloud9.OutputPath", str4);
        this.conf.setInt("Cloud9.Reducers", DriverUtil.DEFAULT_REDUCERS);
        if (new BuildReverseWebGraph(this.conf).run() != 0) {
            return -1;
        }
        String str5 = String.valueOf(this.outputBase) + "/" + DriverUtil.OUTPUT_REVERSE_WEBGRAPH + "/";
        String str6 = String.valueOf(this.outputBase) + "/" + DriverUtil.OUTPUT_WEBGRAPH + "/";
        this.conf.set("Cloud9.InputPath", str5);
        this.conf.set("Cloud9.OutputPath", str6);
        this.conf.setInt("Cloud9.Mappers", 1);
        this.conf.setInt("Cloud9.Reducers", DriverUtil.DEFAULT_REDUCERS);
        if (new BuildWebGraph(this.conf).run() != 0) {
            return -1;
        }
        if (!this.computeAnchorWeights) {
            return 0;
        }
        String str7 = String.valueOf(this.outputBase) + "/" + DriverUtil.OUTPUT_WEBGRAPH + "/";
        String str8 = String.valueOf(this.outputBase) + "/" + DriverUtil.OUTPUT_HOST_NAMES + "/";
        this.conf.set("Cloud9.InputPath", str7);
        this.conf.set("Cloud9.OutputPath", str8);
        this.conf.setInt("Cloud9.Mappers", 1);
        this.conf.setInt("Cloud9.Reducers", DriverUtil.DEFAULT_REDUCERS);
        if (new CollectHostnames(this.conf).run() != 0) {
            return -1;
        }
        String str9 = String.valueOf(this.outputBase) + "/" + DriverUtil.OUTPUT_REVERSE_WEBGRAPH + "/," + this.outputBase + "/" + DriverUtil.OUTPUT_HOST_NAMES + "/";
        String str10 = String.valueOf(this.outputBase) + "/" + DriverUtil.OUTPUT_WEGIHTED_REVERSE_WEBGRAPH + "/";
        this.conf.set("Cloud9.InputPath", str9);
        this.conf.set("Cloud9.OutputPath", str10);
        this.conf.setInt("Cloud9.Mappers", 1);
        this.conf.setInt("Cloud9.Reducers", DriverUtil.DEFAULT_REDUCERS);
        return new ComputeWeight(this.conf).run() != 0 ? -1 : 0;
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(ToolRunner.run(new Configuration(), new TrecDriver(), strArr));
    }

    private static int printUsage() {
        System.out.println("\nusage:[-input collection-path][-output output-base[-collection {trecweb|gov2|wt10g}] [-inputFormat userSpecifiedInputFormatClass] [-docnoClass userSpecifiedDocnoMappingClass] -docno userSpecifiedDocnoMappingFile [-il] [-caw] [-normalizer normalizerClass] ");
        System.out.println("Help:");
        System.out.println("[-input collection-path]\n\tinput directory");
        System.out.println("[-output output-base]\n\toutput directory");
        System.out.println("-collection {trecweb|gov2|wt10g}\n\tname the collection name, if it is supported, automatic configuration will be applied");
        System.out.println("-inputFormat userSpecifiedInputFormatClass\n\tspecify the class work as FileInputFormat; Required when -collection is not specified");
        System.out.println("-docnoClass userSpecifiedDocnoMappingClass\n\tspecify the class work as DocnoMapping;Required when -collection is not specified. It should implement GenericDocnoMapping interface.");
        System.out.println("-docno userSpecifiedDocnoMappingFile\n\tspecify the File work as input to specified DocnoMapping class.");
        System.out.println("-il\n\tinclude internal links, without this option we will not include internal links");
        System.out.println("-caw\n\tcompute default anchor weights, without this option we will not compute default anchor weights");
        System.out.println("-normalizer normalizerClass\n\ta normalizer class used to normalize the lines of anchor text, must extend edu.umd.cloud9.webgraph.normalize.AnchorTextNormalizer.");
        System.out.println();
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    private boolean readInput(String[] strArr) {
        if (strArr.length < 6) {
            System.out.println("More arguments needed.");
            return false;
        }
        this.inputBase = new File(DriverUtil.argValue(strArr, DriverUtil.CL_INPUT)).getAbsolutePath();
        this.outputBase = new File(DriverUtil.argValue(strArr, DriverUtil.CL_OUTPUT)).getAbsolutePath();
        if (DriverUtil.argExists(strArr, DriverUtil.CL_COLLECTION)) {
            String argValue = DriverUtil.argValue(strArr, DriverUtil.CL_COLLECTION);
            if (!this.configer.setConfByCollection(argValue)) {
                System.out.println("Collection \"" + argValue + "\" not supported, please specify inputformat and docnomapping class, or contact developer.");
                return false;
            }
        } else {
            String argValue2 = DriverUtil.argValue(strArr, DriverUtil.CL_INPUT_FORMAT);
            if (!this.configer.setUserSpecifiedInputFormat(argValue2)) {
                System.out.println("class \"" + argValue2 + "\" doesn't exist or not sub-class of FileInputFormat");
                return false;
            }
            String argValue3 = DriverUtil.argValue(strArr, DriverUtil.CL_DOCNO_MAPPING_CLASS);
            if (!this.configer.setUserSpecifiedDocnoMappingClass(argValue3)) {
                System.out.println("class \"" + argValue3 + "\" doesn't exist or not implemented DocnoMappingt");
                return false;
            }
        }
        this.conf.set("Cloud9.DocnoMappingFile", DriverUtil.argValue(strArr, DriverUtil.CL_DOCNO_MAPPING));
        this.includeInternalLinks = DriverUtil.argExists(strArr, DriverUtil.CL_INCLUDE_INTERNAL_LINKS);
        this.computeAnchorWeights = DriverUtil.argExists(strArr, DriverUtil.CL_COMPUTE_WEIGHTS);
        String argValue4 = DriverUtil.argValue(strArr, DriverUtil.CL_NORMALIZER);
        try {
            if (AnchorTextNormalizer.class.isAssignableFrom(Class.forName(argValue4))) {
                this.normalizer = argValue4;
                return true;
            }
            System.out.println("Invalid arguments; Normalizer class must implement AnchorTextNormalizer interface.");
            return false;
        } catch (ClassNotFoundException e) {
            System.out.println("Invalid arguments; Specified Normalizer class doesn't exist");
            return false;
        }
    }
}
