package com.crispy.net;

import com.crispy.db.Column;
import com.crispy.db.Row;
import com.crispy.db.Table;
import com.crispy.log.Log;
import java.io.IOException;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.sql.SQLException;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.http.HttpEntity;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.util.EntityUtils;
import org.json.JSONObject;

@WebServlet({"/crawler/*"})
/* loaded from: input_file:com/crispy/net/Crawler.class */
public class Crawler extends HttpServlet {
    private ConcurrentHashMap<String, CrawlStats> stats;
    private static final Crawler INSTANCE = new Crawler();
    private DefaultHttpClient httpClient;
    private ScheduledExecutorService background;
    private Log LOG = Log.get("crawler");
    private ConcurrentHashMap<String, CrawlHandler> handlers = new ConcurrentHashMap<>();

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/crispy/net/Crawler$CrawlJob.class */
    public class CrawlJob implements Runnable {
        CrawlJob() {
        }

        @Override // java.lang.Runnable
        public void run() {
            Row removeNextRow;
            try {
                Crawler.this.LOG.trace("new-crawl-run");
                synchronized (Crawler.class) {
                    Row removeNextRow2 = Crawler.this.removeNextRow();
                    if (removeNextRow2 == null) {
                        return;
                    }
                    Job job = new Job((String) removeNextRow2.column("url"), false, null);
                    job.setMetadata(new JSONObject(removeNextRow2.columnAsString("metadata")));
                    while (job != null) {
                        Crawler.this.LOG.info("new-crawl-job url=" + job.getUrl());
                        CrawlStats crawlStats = (CrawlStats) Crawler.this.stats.get(job.getCategory());
                        if (crawlStats == null) {
                            crawlStats = new CrawlStats();
                            Crawler.this.stats.put(job.getCacheKey(), crawlStats);
                        }
                        crawlStats.total.incrementAndGet();
                        boolean z = false;
                        try {
                            Crawler.this.LOG.info("begin-crawl url=" + job.getUrl() + " tag=" + job.getTag() + " handler=" + Crawler.this.handlers.get(job.getTag()));
                            String lookupCache = Crawler.this.lookupCache(job);
                            if (lookupCache == null) {
                                Crawler.this.LOG.info("cache-miss url=" + job.getUrl());
                                lookupCache = Crawler.this.internalFetchAndCache(job);
                                crawlStats.crawled.incrementAndGet();
                            } else {
                                z = true;
                                crawlStats.cache.incrementAndGet();
                            }
                            if (Crawler.this.handlers.containsKey(job.getTag())) {
                                try {
                                    ((CrawlHandler) Crawler.this.handlers.get(job.getTag())).ready(job, lookupCache);
                                } catch (Throwable th) {
                                    th.printStackTrace();
                                    Crawler.this.LOG.error("Error in parsing", th);
                                }
                            } else {
                                Crawler.this.LOG.warn("Missing Handler for url=" + job.getUrl() + " tag=" + job.getTag());
                            }
                        } catch (Throwable th2) {
                            if (Crawler.this.handlers.containsKey(job.getTag())) {
                                ((CrawlHandler) Crawler.this.handlers.get(job.getTag())).ready(job, null);
                            } else {
                                Crawler.this.LOG.warn("Missing Handler for url=" + job.getUrl() + " tag=" + job.getTag());
                            }
                            Crawler.this.LOG.error("crawl-failed url=" + job.getUrl(), th2);
                            crawlStats.errors.incrementAndGet();
                        }
                        if (!z || (removeNextRow = Crawler.this.removeNextRow()) == null) {
                            break;
                        }
                        job = new Job((String) removeNextRow.column("url"), false, null);
                        job.setMetadata(new JSONObject(removeNextRow.columnAsString("metadata")));
                    }
                }
            } catch (Exception e) {
                Crawler.this.LOG.error(e.getMessage(), e);
                throw new IllegalStateException(e);
            }
        }
    }

    private Crawler() {
    }

    public void setHandler(String str, CrawlHandler crawlHandler) {
        this.handlers.put(str, crawlHandler);
    }

    public void start() {
        start(1, 30, 100);
    }

    public void start(int i, int i2, int i3) {
        Table.get("crawl_queue_normal").columns(Column.bigInteger("id", true), Column.text("url", 512), Column.longtext("metadata")).create();
        Table.get("crawl_queue_high").columns(Column.bigInteger("id", true), Column.text("url", 512), Column.longtext("metadata")).create();
        BasicHttpParams basicHttpParams = new BasicHttpParams();
        HttpConnectionParams.setConnectionTimeout(basicHttpParams, i2 * 1000);
        HttpConnectionParams.setSoTimeout(basicHttpParams, i2 * 1000);
        ThreadSafeClientConnManager threadSafeClientConnManager = new ThreadSafeClientConnManager();
        threadSafeClientConnManager.setMaxTotal(i);
        threadSafeClientConnManager.setDefaultMaxPerRoute(2);
        this.httpClient = (DefaultHttpClient) wrapClient(new DefaultHttpClient(threadSafeClientConnManager, basicHttpParams));
        this.background = Executors.newScheduledThreadPool(i);
        this.stats = new ConcurrentHashMap<>();
        for (int i4 = 0; i4 < i; i4++) {
            this.background.scheduleWithFixedDelay(new CrawlJob(), 0L, i3, TimeUnit.MILLISECONDS);
        }
    }

    private HttpClient wrapClient(HttpClient httpClient) {
        try {
            SSLContext sSLContext = SSLContext.getInstance("TLS");
            sSLContext.init(null, new TrustManager[]{new X509TrustManager() { // from class: com.crispy.net.Crawler.1
                @Override // javax.net.ssl.X509TrustManager
                public X509Certificate[] getAcceptedIssuers() {
                    return null;
                }

                @Override // javax.net.ssl.X509TrustManager
                public void checkServerTrusted(X509Certificate[] x509CertificateArr, String str) throws CertificateException {
                }

                @Override // javax.net.ssl.X509TrustManager
                public void checkClientTrusted(X509Certificate[] x509CertificateArr, String str) throws CertificateException {
                }
            }}, null);
            SSLSocketFactory sSLSocketFactory = new SSLSocketFactory(sSLContext);
            sSLSocketFactory.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
            ClientConnectionManager connectionManager = httpClient.getConnectionManager();
            connectionManager.getSchemeRegistry().register(new Scheme("https", sSLSocketFactory, 443));
            return new DefaultHttpClient(connectionManager, httpClient.getParams());
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    public static Crawler getInstance() {
        return INSTANCE;
    }

    public void schedule(Job job) throws Exception {
        if (this.background.isShutdown()) {
            throw new IllegalStateException("Scheduling job when there is no crawler running");
        }
        this.LOG.info("Scheduling job url=" + job.getUrl());
        if (job.isHighPriority()) {
            Table.get("crawl_queue_high").columns("url", "metadata").values(job.getUrl(), job.getMetadata().toString()).add();
        } else {
            Table.get("crawl_queue_normal").columns("url", "metadata").values(job.getUrl(), job.getMetadata().toString()).add();
        }
    }

    public String get(String str) throws Exception {
        return internalFetchAndCache(new Job(str));
    }

    public void post(String str) throws Exception {
        this.LOG.info("POST:" + str);
        CloseableHttpResponse execute = this.httpClient.execute((HttpUriRequest) new HttpPost(str));
        if (execute.getStatusLine().getStatusCode() == 200) {
            HttpEntity entity = execute.getEntity();
            if (entity != null) {
                EntityUtils.consume(entity);
            }
        } else {
            EntityUtils.consume(execute.getEntity());
        }
        System.out.println(execute.getStatusLine().getStatusCode());
    }

    public void fetch(Job job) throws Exception {
        String lookupCache = lookupCache(job);
        if (lookupCache == null) {
            lookupCache = internalFetchAndCache(job);
        }
        CrawlHandler crawlHandler = this.handlers.get(job.getTag());
        if (crawlHandler != null) {
            crawlHandler.ready(job, lookupCache);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public String lookupCache(Job job) throws Exception {
        if (job.getCacheKey() != null && Cache.getInstance().isRunning()) {
            return Cache.getInstance().fetch(job.getCacheKey(), null);
        }
        return null;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public String internalFetchAndCache(Job job) throws Exception {
        String str = null;
        HttpGet httpGet = new HttpGet(job.getUrl());
        for (Map.Entry<String, String> entry : job.getHeaders().entrySet()) {
            httpGet.setHeader(entry.getKey(), entry.getValue());
        }
        if (job.getUserAgent() != null) {
            httpGet.getParams().setParameter("http.useragent", job.getUserAgent());
        }
        CloseableHttpResponse execute = this.httpClient.execute((HttpUriRequest) httpGet);
        if (execute.getStatusLine().getStatusCode() == 200) {
            HttpEntity entity = execute.getEntity();
            if (entity != null) {
                str = EntityUtils.toString(entity);
                EntityUtils.consume(entity);
            }
        } else {
            this.LOG.error(execute.getStatusLine().getStatusCode() + ":" + execute.getStatusLine().getReasonPhrase());
            EntityUtils.consume(execute.getEntity());
        }
        if (str != null && job.getCacheKey() != null && Cache.getInstance().isRunning()) {
            Cache.getInstance().store(job.getCacheKey(), str, job.getExpiry());
        }
        return str;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public Row removeNextRow() throws SQLException {
        Row row = Table.get("crawl_queue_high").random().row();
        if (row == null) {
            row = Table.get("crawl_queue_normal").random().row();
            if (row != null) {
                Table.get("crawl_queue_normal").where("id", row.column("id")).delete();
            }
        } else {
            Table.get("crawl_queue_high").where("id", row.column("id")).delete();
        }
        return row;
    }

    public void shutdown() {
        if (this.background != null) {
            this.background.shutdownNow();
        }
        if (this.httpClient != null) {
            this.httpClient.getConnectionManager().shutdown();
        }
    }

    protected void doGet(HttpServletRequest httpServletRequest, HttpServletResponse httpServletResponse) throws ServletException, IOException {
    }

    public void removeHandler(CrawlHandler crawlHandler) {
        Iterator<Map.Entry<String, CrawlHandler>> it = this.handlers.entrySet().iterator();
        while (it.hasNext()) {
            if (it.next().getValue() == crawlHandler) {
                it.remove();
            }
        }
    }
}
