package com.digitalpebble.stormcrawler.filtering.robots;

import com.digitalpebble.stormcrawler.Metadata;
import com.digitalpebble.stormcrawler.filtering.URLFilter;
import com.digitalpebble.stormcrawler.protocol.HttpRobotRulesParser;
import com.digitalpebble.stormcrawler.protocol.ProtocolFactory;
import com.fasterxml.jackson.databind.JsonNode;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Map;
import org.apache.storm.Config;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

/* loaded from: input_file:com/digitalpebble/stormcrawler/filtering/robots/RobotsFilter.class */
public class RobotsFilter extends URLFilter {
    private HttpRobotRulesParser robots;
    private ProtocolFactory factory;
    private boolean fromCacheOnly = true;

    @Override // com.digitalpebble.stormcrawler.filtering.URLFilter
    @Nullable
    public String filter(@Nullable URL url, @Nullable Metadata metadata, @NotNull String str) {
        try {
            URL url2 = new URL(str);
            if ((this.fromCacheOnly ? this.robots.getRobotRulesSetFromCache(url2) : this.robots.getRobotRulesSet(this.factory.getProtocol(url2), url2)).isAllowed(str)) {
                return str;
            }
            return null;
        } catch (MalformedURLException e) {
            return null;
        }
    }

    @Override // com.digitalpebble.stormcrawler.util.Configurable
    public void configure(@NotNull Map<String, Object> map, @NotNull JsonNode jsonNode) {
        Config config = new Config();
        config.putAll(map);
        this.factory = ProtocolFactory.getInstance(config);
        this.robots = new HttpRobotRulesParser(config);
        JsonNode jsonNode2 = jsonNode.get("fromCacheOnly");
        if (jsonNode2 == null || !jsonNode2.isBoolean()) {
            return;
        }
        this.fromCacheOnly = jsonNode2.booleanValue();
    }
}
