package edu.umd.cloud9.integration.webgraph;

import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import edu.umd.cloud9.integration.IntegrationUtils;
import edu.umd.cloud9.io.array.ArrayListWritable;
import edu.umd.cloud9.webgraph.DriverUtil;
import edu.umd.cloud9.webgraph.data.AnchorText;
import edu.umd.cloud9.webgraph.driver.TrecDriver;
import edu.umd.cloud9.webgraph.normalizer.AnchorTextBasicNormalizer;
import java.util.ArrayList;
import java.util.Map;
import java.util.Random;
import junit.framework.JUnit4TestAdapter;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.junit.Assert;
import org.junit.Test;

/* loaded from: input_file:edu/umd/cloud9/integration/webgraph/VerifyGov2Webgraph.class */
public class VerifyGov2Webgraph {
    private static final String collectionPath = "/shared/collections/gov2/collection.raw/gov2-corpus/GX000";
    private static final String docnoMapping = "/shared/indexes/gov2/docno-mapping.dat";
    private ImmutableMap<String, Float> anchorList1 = ImmutableMap.of("mine safety health administration", Float.valueOf(5.5f), "mine safety health administration msha", Float.valueOf(1.25f), "msha", Float.valueOf(1.25f), "safety health mining", Float.valueOf(0.25f));
    private ImmutableMap<String, ImmutableSet<Integer>> anchorSources1 = ImmutableMap.of("mine safety health administration", ImmutableSet.of(28502, 11970, 11445, 65562, 67427, 6338, new Integer[0]), "mine safety health administration msha", ImmutableSet.of(25765, 24550, 14962, 82536, 68902, 46419, new Integer[]{35554, 6461, 17709}), "msha", ImmutableSet.of(25765, 1050, 35317), "safety health mining", ImmutableSet.of(29107));
    private ImmutableMap<String, Float> anchorList2 = ImmutableMap.of("hanford", Float.valueOf(3.5f), "richland operations office rl", Float.valueOf(0.5f));
    private ImmutableMap<String, ImmutableSet<Integer>> anchorSources2 = ImmutableMap.of("hanford", ImmutableSet.of(55133, 89334, 51706, 52487, 44864, 39214, new Integer[0]), "richland operations office rl", ImmutableSet.of(51706));
    private static final Random rand = new Random();
    private static final String tmp = "/tmp/tmp-" + VerifyGov2Webgraph.class.getSimpleName() + rand.nextInt(10000);
    private static final String collectionOutput = String.valueOf(tmp) + "/webgraph-gov2";

    @Test
    public void runTrecDriver() throws Exception {
        FileSystem fileSystem = FileSystem.get(IntegrationUtils.getBespinConfiguration());
        Assert.assertTrue(fileSystem.exists(new Path(collectionPath)));
        fileSystem.delete(new Path(collectionOutput), true);
        ArrayList newArrayList = Lists.newArrayList();
        newArrayList.add(IntegrationUtils.getJar("dist", "cloud9"));
        newArrayList.add(IntegrationUtils.getJar("lib", "guava-13"));
        newArrayList.add(IntegrationUtils.getJar("lib", "dsiutils"));
        newArrayList.add(IntegrationUtils.getJar("lib", "fastutil"));
        newArrayList.add(IntegrationUtils.getJar("lib", "sux4j"));
        newArrayList.add(IntegrationUtils.getJar("lib", "commons-collections"));
        newArrayList.add(IntegrationUtils.getJar("lib", "commons-lang"));
        newArrayList.add(IntegrationUtils.getJar("lib", "tools"));
        newArrayList.add(IntegrationUtils.getJar("lib", "htmlparser"));
        newArrayList.add(IntegrationUtils.getJar("lib", "pcj"));
        IntegrationUtils.exec(Joiner.on(" ").join(new String[]{"hadoop jar", IntegrationUtils.getJar("dist", "cloud9"), TrecDriver.class.getCanonicalName(), String.format("-libjars=%s", Joiner.on(",").join(newArrayList)), DriverUtil.CL_INPUT, collectionPath, DriverUtil.CL_OUTPUT, collectionOutput, DriverUtil.CL_COLLECTION, "gov2", DriverUtil.CL_DOCNO_MAPPING, docnoMapping, DriverUtil.CL_COMPUTE_WEIGHTS, DriverUtil.CL_NORMALIZER, AnchorTextBasicNormalizer.class.getCanonicalName()}));
    }

    @Test
    public void verifyAnchors() throws Exception {
        FileSystem fileSystem = FileSystem.get(IntegrationUtils.getBespinConfiguration());
        IntWritable intWritable = new IntWritable();
        ArrayListWritable<AnchorText> arrayListWritable = new ArrayListWritable<>();
        SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, new Path(String.valueOf(collectionOutput) + "/" + DriverUtil.OUTPUT_WEGIHTED_REVERSE_WEBGRAPH + "/part-00000"), fileSystem.getConf());
        reader.next(intWritable, arrayListWritable);
        reader.next(intWritable, arrayListWritable);
        verifyWeights(this.anchorList1, arrayListWritable);
        verifySources(this.anchorSources1, arrayListWritable);
        reader.close();
        SequenceFile.Reader reader2 = new SequenceFile.Reader(fileSystem, new Path(String.valueOf(collectionOutput) + "/" + DriverUtil.OUTPUT_WEGIHTED_REVERSE_WEBGRAPH + "/part-00010"), fileSystem.getConf());
        reader2.next(intWritable, arrayListWritable);
        reader2.next(intWritable, arrayListWritable);
        verifyWeights(this.anchorList2, arrayListWritable);
        verifySources(this.anchorSources2, arrayListWritable);
        reader2.close();
    }

    /* JADX WARN: Multi-variable type inference failed */
    private void verifyWeights(Map<String, Float> map, ArrayListWritable<AnchorText> arrayListWritable) {
        for (int i = 0; i < arrayListWritable.size(); i++) {
            if (map.containsKey(((AnchorText) arrayListWritable.get(i)).getText())) {
                Assert.assertEquals(map.get(((AnchorText) arrayListWritable.get(i)).getText()).floatValue(), ((AnchorText) arrayListWritable.get(i)).getWeight(), 1.0E-5d);
            }
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    private void verifySources(Map<String, ImmutableSet<Integer>> map, ArrayListWritable<AnchorText> arrayListWritable) {
        for (int i = 0; i < arrayListWritable.size(); i++) {
            if (map.containsKey(((AnchorText) arrayListWritable.get(i)).getText())) {
                int[] documents = ((AnchorText) arrayListWritable.get(i)).getDocuments();
                Assert.assertEquals(map.get(((AnchorText) arrayListWritable.get(i)).getText()).size(), documents.length);
                for (int i2 : documents) {
                    Assert.assertTrue(map.get(((AnchorText) arrayListWritable.get(i)).getText()).contains(Integer.valueOf(i2)));
                }
            }
        }
    }

    public static junit.framework.Test suite() {
        return new JUnit4TestAdapter(VerifyGov2Webgraph.class);
    }
}
