package com.digitalpebble.stormcrawler.protocol;

import com.digitalpebble.stormcrawler.util.ConfUtils;
import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
import crawlercommons.robots.BaseRobotRules;
import crawlercommons.robots.SimpleRobotRules;
import crawlercommons.robots.SimpleRobotRulesParser;
import java.net.URL;
import java.util.ArrayList;
import java.util.StringTokenizer;
import org.apache.storm.Config;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/digitalpebble/stormcrawler/protocol/RobotRulesParser.class */
public abstract class RobotRulesParser {
    protected static Cache<String, RobotRules> CACHE;
    protected static Cache<String, RobotRules> ERRORCACHE;
    public static final String cacheConfigParamName = "robots.cache.spec";
    public static final String errorcacheConfigParamName = "robots.error.cache.spec";
    protected String agentNames;
    public static final Logger LOG = LoggerFactory.getLogger(RobotRulesParser.class);
    public static final BaseRobotRules EMPTY_RULES = new SimpleRobotRules(SimpleRobotRules.RobotRulesMode.ALLOW_ALL);
    public static final BaseRobotRules FORBID_ALL_RULES = new SimpleRobotRules(SimpleRobotRules.RobotRulesMode.ALLOW_NONE);
    private static final SimpleRobotRulesParser robotParser = new SimpleRobotRulesParser();

    public void setConf(Config config) {
        String string = ConfUtils.getString(config, "http.agent.name");
        if (null == string) {
            throw new RuntimeException("Agent name not configured!");
        }
        StringTokenizer stringTokenizer = new StringTokenizer(ConfUtils.getString(config, "http.robots.agents", ""), ",");
        ArrayList arrayList = new ArrayList();
        while (stringTokenizer.hasMoreTokens()) {
            arrayList.add(stringTokenizer.nextToken().trim());
        }
        if (arrayList.isEmpty()) {
            LOG.info("No agents listed in 'http.robots.agents' property! Using http.agent.name [{}]", string);
            this.agentNames = string;
        } else {
            int i = 0;
            if (((String) arrayList.get(0)).equalsIgnoreCase(string)) {
                i = 0 + 1;
            } else {
                LOG.info("Agent we advertise ({}) not listed first in 'http.robots.agents' property!", string);
            }
            StringBuilder sb = new StringBuilder(string);
            while (i < arrayList.size()) {
                sb.append(", ").append((String) arrayList.get(i));
                i++;
            }
            this.agentNames = sb.toString();
        }
        CACHE = Caffeine.from(ConfUtils.getString(config, cacheConfigParamName, "maximumSize=10000,expireAfterWrite=6h")).build();
        ERRORCACHE = Caffeine.from(ConfUtils.getString(config, errorcacheConfigParamName, "maximumSize=10000,expireAfterWrite=1h")).build();
    }

    public BaseRobotRules parseRules(String str, byte[] bArr, String str2, String str3) {
        return robotParser.parseContent(str, bArr, str2, str3);
    }

    public BaseRobotRules getRobotRulesSet(Protocol protocol, String str) {
        try {
            return getRobotRulesSet(protocol, new URL(str));
        } catch (Exception e) {
            return EMPTY_RULES;
        }
    }

    public abstract BaseRobotRules getRobotRulesSet(Protocol protocol, URL url);

    static {
        robotParser.setMaxCrawlDelay(Long.MAX_VALUE);
    }
}
