package cn.wanghaomiao.seimi.http.hc;

import cn.wanghaomiao.seimi.core.SeimiDownloader;
import cn.wanghaomiao.seimi.http.SeimiCookie;
import cn.wanghaomiao.seimi.http.SeimiHttpType;
import cn.wanghaomiao.seimi.struct.BodyType;
import cn.wanghaomiao.seimi.struct.CrawlerModel;
import cn.wanghaomiao.seimi.struct.Request;
import cn.wanghaomiao.seimi.struct.Response;
import cn.wanghaomiao.seimi.utils.StrFormatUtil;
import java.nio.charset.Charset;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.client.CookieStore;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.methods.RequestBuilder;
import org.apache.http.entity.ContentType;
import org.apache.http.impl.cookie.BasicClientCookie;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;
import org.seimicrawler.xpath.JXDocument;
import org.seimicrawler.xpath.exception.NoSuchAxisException;
import org.seimicrawler.xpath.exception.NoSuchFunctionException;
import org.seimicrawler.xpath.exception.XpathSyntaxErrorException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:cn/wanghaomiao/seimi/http/hc/HcDownloader.class */
public class HcDownloader implements SeimiDownloader {
    private CrawlerModel crawlerModel;
    private HttpClient hc;
    private RequestBuilder currentReqBuilder;
    private Request currentRequest;
    private HttpResponse httpResponse;
    private HttpContext httpContext = new BasicHttpContext();
    private Logger logger = LoggerFactory.getLogger(getClass());

    public HcDownloader(CrawlerModel crawlerModel) {
        this.crawlerModel = crawlerModel;
        if (crawlerModel.isUseCookie()) {
            this.hc = HttpClientFactory.getHttpClient(crawlerModel.getHttpTimeOut(), crawlerModel.getCookieStore());
        } else {
            this.hc = HttpClientFactory.getHttpClient(crawlerModel.getHttpTimeOut());
        }
    }

    @Override // cn.wanghaomiao.seimi.core.SeimiDownloader
    public Response process(Request request) throws Exception {
        this.currentReqBuilder = HcRequestGenerator.getHttpRequestBuilder(request, this.crawlerModel);
        this.currentRequest = request;
        addCookies(request.getUrl(), request.getSeimiCookies());
        this.httpResponse = this.hc.execute(this.currentReqBuilder.build(), this.httpContext);
        return renderResponse(this.httpResponse, request, this.httpContext);
    }

    @Override // cn.wanghaomiao.seimi.core.SeimiDownloader
    public Response metaRefresh(String str) throws Exception {
        if (!str.startsWith("http")) {
            str = getRealUrl(this.httpContext) + str;
        }
        this.logger.info("Seimi refresh url to={} from={}", str, this.currentReqBuilder.getUri());
        this.currentReqBuilder.setUri(str);
        this.httpResponse = this.hc.execute(this.currentReqBuilder.build(), this.httpContext);
        return renderResponse(this.httpResponse, this.currentRequest, this.httpContext);
    }

    @Override // cn.wanghaomiao.seimi.core.SeimiDownloader
    public int statusCode() {
        return this.httpResponse.getStatusLine().getStatusCode();
    }

    @Override // cn.wanghaomiao.seimi.core.SeimiDownloader
    public void addCookies(String str, List<SeimiCookie> list) {
        if (list == null || list.size() <= 0) {
            return;
        }
        CookieStore cookieStore = this.crawlerModel.getCookieStore();
        for (SeimiCookie seimiCookie : list) {
            BasicClientCookie basicClientCookie = new BasicClientCookie(seimiCookie.getName(), seimiCookie.getValue());
            basicClientCookie.setPath(StringUtils.isNotBlank(seimiCookie.getPath()) ? seimiCookie.getPath() : "/");
            basicClientCookie.setDomain(StringUtils.isNotBlank(seimiCookie.getDomain()) ? seimiCookie.getDomain() : StrFormatUtil.getDodmain(str));
            cookieStore.addCookie(basicClientCookie);
        }
    }

    private Response renderResponse(HttpResponse httpResponse, Request request, HttpContext httpContext) {
        Response response = new Response();
        HttpEntity entity = httpResponse.getEntity();
        response.setSeimiHttpType(SeimiHttpType.APACHE_HC);
        response.setRealUrl(getRealUrl(httpContext));
        response.setUrl(request.getUrl());
        response.setRequest(request);
        response.setMeta(request.getMeta());
        if (entity != null) {
            Header firstHeader = httpResponse.getFirstHeader("Referer");
            if (firstHeader != null) {
                response.setReferer(firstHeader.getValue());
            }
            String lowerCase = entity.getContentType().getValue().toLowerCase();
            if (lowerCase.contains("text") || lowerCase.contains("json") || lowerCase.contains("ajax")) {
                response.setBodyType(BodyType.TEXT);
                try {
                    response.setData(EntityUtils.toByteArray(entity));
                    Charset charset = ContentType.get(entity).getCharset();
                    if (charset == null) {
                        response.setContent(new String(response.getData(), "ISO-8859-1"));
                        String renderRealCharset = renderRealCharset(response);
                        response.setContent(new String(response.getContent().getBytes("ISO-8859-1"), renderRealCharset));
                        response.setCharset(renderRealCharset);
                    } else {
                        response.setContent(new String(response.getData(), charset));
                        response.setCharset(charset.name());
                    }
                } catch (Exception e) {
                    this.logger.error("no content data");
                }
            } else {
                response.setBodyType(BodyType.BINARY);
                try {
                    response.setData(EntityUtils.toByteArray(entity));
                    response.setContent(StringUtils.substringAfterLast(request.getUrl(), "/"));
                } catch (Exception e2) {
                    this.logger.error("no data can be read from httpResponse");
                }
            }
        }
        return response;
    }

    private String renderRealCharset(Response response) throws NoSuchFunctionException, XpathSyntaxErrorException, NoSuchAxisException {
        JXDocument document = response.document();
        String trim = StrFormatUtil.getFirstEmStr(document.sel("//meta[@charset]/@charset"), "").trim();
        if (StringUtils.isBlank(trim)) {
            trim = StrFormatUtil.getFirstEmStr(document.sel("//meta[@http-equiv='charset']/@content"), "").trim();
        }
        if (StringUtils.isBlank(trim)) {
            trim = StrFormatUtil.parseCharset(StringUtils.join(document.sel("//meta[@http-equiv='Content-Type']/@content|//meta[@http-equiv='content-type']/@content"), ";").trim().toLowerCase());
        }
        return StringUtils.isNotBlank(trim) ? trim : "UTF-8";
    }

    private String getRealUrl(HttpContext httpContext) {
        Object attribute = httpContext.getAttribute("http.target_host");
        Object attribute2 = httpContext.getAttribute("http.request");
        if (attribute == null || attribute2 == null) {
            return null;
        }
        HttpHost httpHost = (HttpHost) attribute;
        HttpUriRequest httpUriRequest = (HttpUriRequest) attribute2;
        return httpUriRequest.getURI().isAbsolute() ? httpUriRequest.getURI().toString() : httpHost.toString() + httpUriRequest.getURI().toString();
    }
}
