package com.digitalpebble.storm.crawler.bolt;

import backtype.storm.metric.api.MultiCountMetric;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import com.digitalpebble.storm.crawler.Constants;
import com.digitalpebble.storm.crawler.Metadata;
import com.digitalpebble.storm.crawler.util.ConfUtils;
import crawlercommons.url.PaidLevelDomain;
import java.net.InetAddress;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/digitalpebble/storm/crawler/bolt/URLPartitionerBolt.class */
public class URLPartitionerBolt extends BaseRichBolt {
    private static final Logger LOG = LoggerFactory.getLogger(URLPartitionerBolt.class);
    private OutputCollector _collector;
    private MultiCountMetric eventCounter;
    private Map<String, String> cache;
    private String mode = "byHost";

    public void execute(Tuple tuple) {
        String stringByField = tuple.getStringByField("url");
        Metadata metadata = null;
        if (tuple.contains("metadata")) {
            metadata = (Metadata) tuple.getValueByField("metadata");
        }
        if (metadata == null) {
            metadata = Metadata.empty;
        }
        String str = null;
        String str2 = "";
        if (this.mode.equalsIgnoreCase("byIP")) {
            String firstValue = metadata.getFirstValue("ip");
            if (StringUtils.isNotBlank(firstValue)) {
                str = firstValue;
                this.eventCounter.scope("provided").incrBy(1L);
            }
        }
        if (str == null) {
            try {
                str2 = new URL(stringByField).getHost();
            } catch (MalformedURLException e) {
                this.eventCounter.scope("Invalid URL").incrBy(1L);
                LOG.warn("Invalid URL: {}", stringByField);
                this._collector.ack(tuple);
                return;
            }
        }
        if (this.mode.equalsIgnoreCase("byHost")) {
            str = str2;
        } else if (this.mode.equalsIgnoreCase("byDomain")) {
            str = PaidLevelDomain.getPLD(str2);
        }
        if (this.mode.equalsIgnoreCase("byIP") && str == null) {
            str = this.cache.get(str2);
            if (str != null) {
                this.eventCounter.scope("from cache").incrBy(1L);
            } else {
                try {
                    long currentTimeMillis = System.currentTimeMillis();
                    str = InetAddress.getByName(str2).getHostAddress();
                    LOG.debug("Resolved IP {} in {} msec for : {}", new Object[]{str, Long.valueOf(System.currentTimeMillis() - currentTimeMillis), stringByField});
                    this.cache.put(str2, str);
                } catch (Exception e2) {
                    this.eventCounter.scope("Unable to resolve IP").incrBy(1L);
                    LOG.warn("Unable to resolve IP for: {}", str2);
                    this._collector.ack(tuple);
                    return;
                }
            }
        }
        LOG.debug("Partition Key for: {} > {}", stringByField, str);
        this._collector.emit(tuple, new Values(new Object[]{stringByField, str, metadata}));
        this._collector.ack(tuple);
    }

    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
        outputFieldsDeclarer.declare(new Fields(new String[]{"url", "key", "metadata"}));
    }

    public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
        this.mode = ConfUtils.getString(map, Constants.PARTITION_MODEParamName, "byHost");
        if (!this.mode.equals("byIP") && !this.mode.equals("byDomain") && !this.mode.equals("byHost")) {
            LOG.error("Unknown partition mode : {} - forcing to byHost", this.mode);
            this.mode = "byHost";
        }
        LOG.info("Using partition mode : {}", this.mode);
        this._collector = outputCollector;
        this.eventCounter = topologyContext.registerMetric("URLPartitioner", new MultiCountMetric(), 10);
        this.cache = new LinkedHashMap(501, 0.75f, true) { // from class: com.digitalpebble.storm.crawler.bolt.URLPartitionerBolt.1
            @Override // java.util.LinkedHashMap
            public boolean removeEldestEntry(Map.Entry entry) {
                return size() > 500;
            }
        };
        this.cache = Collections.synchronizedMap(this.cache);
    }
}
