package com.digitalpebble.stormcrawler.protocol;

import com.digitalpebble.stormcrawler.Metadata;
import com.digitalpebble.stormcrawler.proxy.ProxyManager;
import com.digitalpebble.stormcrawler.util.ConfUtils;
import com.digitalpebble.stormcrawler.util.InitialisationUtil;
import com.digitalpebble.stormcrawler.util.StringTabScheme;
import crawlercommons.robots.BaseRobotRules;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.Options;
import org.apache.commons.lang.StringUtils;
import org.apache.storm.Config;
import org.apache.storm.utils.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/digitalpebble/stormcrawler/protocol/AbstractHttpProtocol.class */
public abstract class AbstractHttpProtocol implements Protocol {
    private static final Logger LOG = LoggerFactory.getLogger(AbstractHttpProtocol.class);
    private HttpRobotRulesParser robots;
    protected List<String> protocolVersions;
    protected static final String RESPONSE_COOKIES_HEADER = "set-cookie";
    protected static final String SET_HEADER_BY_REQUEST = "set-header";
    public ProxyManager proxyManager;
    protected boolean skipRobots = false;
    protected boolean storeHTTPHeaders = false;
    protected boolean useCookies = false;
    protected String protocolMDprefix = "";
    protected final List<KeyValue> customHeaders = new LinkedList();

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:com/digitalpebble/stormcrawler/protocol/AbstractHttpProtocol$KeyValue.class */
    public static class KeyValue {
        private final String k;
        private final String v;

        public String getKey() {
            return this.k;
        }

        public String getValue() {
            return this.v;
        }

        public KeyValue(String str, String str2) {
            this.k = str;
            this.v = str2;
        }

        public static KeyValue build(String str) {
            int indexOf = str.indexOf("=");
            if (indexOf != -1 && indexOf + 1 != str.length()) {
                return new KeyValue(str.substring(0, indexOf).trim(), str.substring(indexOf + 1).trim());
            }
            return new KeyValue(str.trim(), "");
        }
    }

    @Override // com.digitalpebble.stormcrawler.protocol.Protocol
    public void configure(Config config) {
        this.skipRobots = ConfUtils.getBoolean(config, "http.robots.file.skip", false);
        this.storeHTTPHeaders = ConfUtils.getBoolean(config, "http.store.headers", false);
        this.useCookies = ConfUtils.getBoolean(config, "http.use.cookies", false);
        this.protocolVersions = ConfUtils.loadListFromConf("http.protocol.versions", config);
        Iterator<String> it = ConfUtils.loadListFromConf("http.custom.headers", config).iterator();
        while (it.hasNext()) {
            this.customHeaders.add(KeyValue.build(it.next()));
        }
        this.robots = new HttpRobotRulesParser(config);
        this.protocolMDprefix = ConfUtils.getString(config, ProtocolResponse.PROTOCOL_MD_PREFIX_PARAM, this.protocolMDprefix);
        String string = ConfUtils.getString(config, "http.proxy.manager", ConfUtils.getString(config, "http.proxy.host", null) != null ? "com.digitalpebble.stormcrawler.proxy.SingleProxyManager" : null);
        if (string != null) {
            try {
                this.proxyManager = (ProxyManager) InitialisationUtil.initializeFromQualifiedName(string, ProxyManager.class, new Class[0]);
                this.proxyManager.configure(config);
            } catch (Exception e) {
                LOG.error("Failed to create proxy manager `" + string + "`", e);
            }
        }
    }

    @Override // com.digitalpebble.stormcrawler.protocol.Protocol
    public BaseRobotRules getRobotRules(String str) {
        return this.skipRobots ? RobotRulesParser.EMPTY_RULES : this.robots.getRobotRulesSet(this, str);
    }

    @Override // com.digitalpebble.stormcrawler.protocol.Protocol
    public void cleanup() {
    }

    public static String getAgentString(Config config) {
        String string = ConfUtils.getString(config, "http.agent");
        return (string == null || string.isEmpty()) ? getAgentString(ConfUtils.getString(config, "http.agent.name"), ConfUtils.getString(config, "http.agent.version"), ConfUtils.getString(config, "http.agent.description"), ConfUtils.getString(config, "http.agent.url"), ConfUtils.getString(config, "http.agent.email")) : string;
    }

    private static String getAgentString(String str, String str2, String str3, String str4, String str5) {
        StringBuilder sb = new StringBuilder();
        sb.append(str);
        if (StringUtils.isNotBlank(str2)) {
            sb.append("/");
            sb.append(str2);
        }
        boolean isNotBlank = StringUtils.isNotBlank(str3);
        boolean isNotBlank2 = StringUtils.isNotBlank(str4);
        boolean isNotBlank3 = StringUtils.isNotBlank(str5);
        if (isNotBlank || isNotBlank3 || isNotBlank2) {
            sb.append(" (");
            if (isNotBlank) {
                sb.append(str3);
                if (isNotBlank2 || isNotBlank3) {
                    sb.append("; ");
                }
            }
            if (isNotBlank2) {
                sb.append(str4);
                if (isNotBlank3) {
                    sb.append("; ");
                }
            }
            if (isNotBlank3) {
                sb.append(str5);
            }
            sb.append(")");
        }
        return sb.toString();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static void main(AbstractHttpProtocol abstractHttpProtocol, String[] strArr) throws Exception {
        Config config = new Config();
        config.putAll(ConfUtils.extractConfigElement(Utils.findAndReadConfigFile("crawler-default.yaml", false)));
        Options options = new Options();
        options.addOption("c", true, "configuration file");
        CommandLine parse = new DefaultParser().parse(options, strArr);
        if (parse.hasOption("c")) {
            ConfUtils.loadConf(parse.getOptionValue("c"), config);
        }
        abstractHttpProtocol.configure(config);
        HashSet hashSet = new HashSet();
        for (String str : parse.getArgs()) {
            Runnable runnable = new Runnable(str, abstractHttpProtocol, hashSet) { // from class: com.digitalpebble.stormcrawler.protocol.AbstractHttpProtocol.1Fetchable
                final String url;
                final Metadata md;
                final /* synthetic */ AbstractHttpProtocol val$protocol;
                final /* synthetic */ Set val$threads;

                {
                    this.val$protocol = abstractHttpProtocol;
                    this.val$threads = hashSet;
                    List<Object> deserialize = new StringTabScheme().deserialize(ByteBuffer.wrap(str.getBytes(StandardCharsets.UTF_8)));
                    this.url = (String) deserialize.get(0);
                    this.md = (Metadata) deserialize.get(1);
                }

                @Override // java.lang.Runnable
                public void run() {
                    StringBuilder sb = new StringBuilder();
                    sb.append(this.url).append("\n");
                    if (!this.val$protocol.skipRobots) {
                        BaseRobotRules robotRules = this.val$protocol.getRobotRules(this.url);
                        sb.append("robots allowed: ").append(robotRules.isAllowed(this.url)).append("\n");
                        if (robotRules instanceof RobotRules) {
                            sb.append("robots requests: ").append(((RobotRules) robotRules).getContentLengthFetched().length).append("\n");
                        }
                        sb.append("sitemaps identified: ").append(robotRules.getSitemaps().size()).append("\n");
                    }
                    long currentTimeMillis = System.currentTimeMillis();
                    try {
                        try {
                            ProtocolResponse protocolOutput = this.val$protocol.getProtocolOutput(this.url, this.md);
                            sb.append(protocolOutput.getMetadata()).append("\n");
                            sb.append("status code: ").append(protocolOutput.getStatusCode()).append("\n");
                            sb.append("content length: ").append(protocolOutput.getContent().length).append("\n");
                            sb.append("fetched in : ").append(System.currentTimeMillis() - currentTimeMillis).append(" msec");
                            System.out.println(sb);
                            this.val$threads.remove(this);
                        } catch (Exception e) {
                            e.printStackTrace();
                            this.val$threads.remove(this);
                        }
                    } catch (Throwable th) {
                        this.val$threads.remove(this);
                        throw th;
                    }
                }
            };
            hashSet.add(runnable);
            new Thread(runnable).start();
        }
        while (hashSet.size() > 0) {
            Thread.sleep(1000L);
        }
        abstractHttpProtocol.cleanup();
        System.exit(0);
    }
}
