package com.digitalpebble.storm.crawler.protocol.httpclient;

import backtype.storm.Config;
import com.digitalpebble.storm.crawler.Metadata;
import com.digitalpebble.storm.crawler.protocol.AbstractHttpProtocol;
import com.digitalpebble.storm.crawler.protocol.HttpRobotRulesParser;
import com.digitalpebble.storm.crawler.protocol.ProtocolResponse;
import com.digitalpebble.storm.crawler.protocol.RobotRulesParser;
import com.digitalpebble.storm.crawler.spout.FileSpout;
import com.digitalpebble.storm.crawler.util.ConfUtils;
import crawlercommons.robots.BaseRobotRules;
import java.io.IOException;
import java.util.Locale;
import org.apache.commons.lang.StringUtils;
import org.apache.http.Header;
import org.apache.http.HeaderIterator;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.DefaultProxyRoutePlanner;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/digitalpebble/storm/crawler/protocol/httpclient/HttpProtocol.class */
public class HttpProtocol extends AbstractHttpProtocol implements ResponseHandler<ProtocolResponse> {
    private static final Logger LOG = LoggerFactory.getLogger(HttpProtocol.class);
    private static final PoolingHttpClientConnectionManager CONNECTION_MANAGER = new PoolingHttpClientConnectionManager();
    private HttpRobotRulesParser robots;
    private int maxContent;
    private HttpClientBuilder builder;
    private RequestConfig requestConfig;
    private boolean responseTime = true;
    private boolean skipRobots = false;

    @Override // com.digitalpebble.storm.crawler.protocol.Protocol
    public void configure(Config config) {
        this.maxContent = ConfUtils.getInt(config, "http.content.limit", 65536);
        String agentString = getAgentString(ConfUtils.getString(config, "http.agent.name"), ConfUtils.getString(config, "http.agent.version"), ConfUtils.getString(config, "http.agent.description"), ConfUtils.getString(config, "http.agent.url"), ConfUtils.getString(config, "http.agent.email"));
        this.responseTime = ConfUtils.getBoolean(config, "http.store.responsetime", true);
        this.skipRobots = ConfUtils.getBoolean(config, "http.skip.robots", false);
        this.robots = new HttpRobotRulesParser(config);
        this.builder = HttpClients.custom().setUserAgent(agentString).setConnectionManager(CONNECTION_MANAGER).setConnectionManagerShared(true).disableRedirectHandling();
        String string = ConfUtils.getString(config, "http.proxy.host", null);
        int i = ConfUtils.getInt(config, "http.proxy.port", 8080);
        if (string != null && string.length() > 0) {
            this.builder.setRoutePlanner(new DefaultProxyRoutePlanner(new HttpHost(string, i)));
        }
        int i2 = ConfUtils.getInt(config, "http.timeout", FileSpout.BATCH_SIZE);
        this.requestConfig = RequestConfig.custom().setSocketTimeout(i2).setConnectTimeout(i2).build();
    }

    @Override // com.digitalpebble.storm.crawler.protocol.Protocol
    public ProtocolResponse getProtocolOutput(String str, Metadata metadata) throws Exception {
        LOG.debug("HTTP connection manager stats {}", CONNECTION_MANAGER.getTotalStats());
        HttpGet httpGet = new HttpGet(str);
        httpGet.setConfig(this.requestConfig);
        if (metadata != null) {
            String firstValue = metadata.getFirstValue("cachedLastModified");
            if (StringUtils.isNotBlank(firstValue)) {
                httpGet.addHeader("If-Modified-Since", firstValue);
            }
            String firstValue2 = metadata.getFirstValue("cachedEtag");
            if (StringUtils.isNotBlank(firstValue2)) {
                httpGet.addHeader("If-None-Match", firstValue2);
            }
        }
        CloseableHttpClient build = this.builder.build();
        Throwable th = null;
        try {
            try {
                ProtocolResponse protocolResponse = (ProtocolResponse) build.execute(httpGet, this);
                if (build != null) {
                    if (0 != 0) {
                        try {
                            build.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        build.close();
                    }
                }
                return protocolResponse;
            } finally {
            }
        } catch (Throwable th3) {
            if (build != null) {
                if (th != null) {
                    try {
                        build.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    build.close();
                }
            }
            throw th3;
        }
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // org.apache.http.client.ResponseHandler
    public ProtocolResponse handleResponse(HttpResponse httpResponse) throws ClientProtocolException, IOException {
        int statusCode = httpResponse.getStatusLine().getStatusCode();
        Metadata metadata = new Metadata();
        HeaderIterator headerIterator = httpResponse.headerIterator();
        while (headerIterator.hasNext()) {
            Header nextHeader = headerIterator.nextHeader();
            metadata.addValue(nextHeader.getName().toLowerCase(Locale.ROOT), nextHeader.getValue());
        }
        return new ProtocolResponse(EntityUtils.toByteArray(httpResponse.getEntity()), statusCode, metadata);
    }

    @Override // com.digitalpebble.storm.crawler.protocol.Protocol
    public BaseRobotRules getRobotRules(String str) {
        return this.skipRobots ? RobotRulesParser.EMPTY_RULES : this.robots.getRobotRulesSet(this, str);
    }

    public static void main(String[] strArr) throws Exception {
        HttpProtocol httpProtocol = new HttpProtocol();
        Config config = new Config();
        String str = strArr[0];
        ConfUtils.loadConf(strArr[1], config);
        httpProtocol.configure(config);
        if (!httpProtocol.skipRobots) {
            System.out.println("is allowed : " + httpProtocol.getRobotRules(str).isAllowed(str));
        }
        ProtocolResponse protocolOutput = httpProtocol.getProtocolOutput(str, new Metadata());
        System.out.println(str);
        System.out.println(protocolOutput.getMetadata());
        System.out.println(protocolOutput.getStatusCode());
        System.out.println(protocolOutput.getContent().length);
    }

    static {
        CONNECTION_MANAGER.setMaxTotal(200);
        CONNECTION_MANAGER.setDefaultMaxPerRoute(20);
    }
}
