package com.digitalpebble.stormcrawler.filtering.robots;

import com.digitalpebble.stormcrawler.Metadata;
import com.digitalpebble.stormcrawler.filtering.URLFilter;
import com.digitalpebble.stormcrawler.protocol.HttpRobotRulesParser;
import com.digitalpebble.stormcrawler.protocol.ProtocolFactory;
import com.fasterxml.jackson.databind.JsonNode;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Map;
import org.apache.storm.Config;

/* loaded from: input_file:com/digitalpebble/stormcrawler/filtering/robots/RobotsFilter.class */
public class RobotsFilter implements URLFilter {
    private HttpRobotRulesParser robots;
    private ProtocolFactory factory;
    private boolean limitToSameHost = false;

    @Override // com.digitalpebble.stormcrawler.filtering.URLFilter
    public String filter(URL url, Metadata metadata, String str) {
        try {
            URL url2 = new URL(str);
            if ((!this.limitToSameHost || url2.getHost().equalsIgnoreCase(url.getHost())) && !this.robots.getRobotRulesSet(this.factory.getProtocol(url2), str).isAllowed(str)) {
                return null;
            }
            return str;
        } catch (MalformedURLException e) {
            return null;
        }
    }

    @Override // com.digitalpebble.stormcrawler.filtering.URLFilter
    public void configure(Map map, JsonNode jsonNode) {
        Config config = new Config();
        config.putAll(map);
        this.factory = new ProtocolFactory(config);
        this.robots = new HttpRobotRulesParser(config);
        JsonNode jsonNode2 = jsonNode.get("limitToSameHost");
        if (jsonNode2 == null || !jsonNode2.isBoolean()) {
            return;
        }
        this.limitToSameHost = jsonNode2.booleanValue();
    }
}
