package cn.wanghaomiao.seimi.core;

import cn.wanghaomiao.seimi.annotation.Interceptor;
import cn.wanghaomiao.seimi.def.BaseSeimiCrawler;
import cn.wanghaomiao.seimi.http.SeimiHttpType;
import cn.wanghaomiao.seimi.http.hc.HcDownloader;
import cn.wanghaomiao.seimi.http.okhttp.OkHttpDownloader;
import cn.wanghaomiao.seimi.struct.CrawlerModel;
import cn.wanghaomiao.seimi.struct.Request;
import cn.wanghaomiao.seimi.struct.Response;
import cn.wanghaomiao.seimi.utils.StructValidator;
import com.alibaba.fastjson.JSON;
import java.lang.reflect.Method;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:cn/wanghaomiao/seimi/core/SeimiProcessor.class */
public class SeimiProcessor implements Runnable {
    private SeimiQueue queue;
    private List<SeimiInterceptor> interceptors;
    private CrawlerModel crawlerModel;
    private BaseSeimiCrawler crawler;
    private Logger logger = LoggerFactory.getLogger(getClass());
    private Pattern metaRefresh = Pattern.compile("<(?:META|meta|Meta)\\s+(?:HTTP-EQUIV|http-equiv)\\s*=\\s*\"refresh\".*(?:url|URL)=(\\S*)\".*/?>");

    public SeimiProcessor(List<SeimiInterceptor> list, CrawlerModel crawlerModel) {
        this.queue = crawlerModel.getQueueInstance();
        this.interceptors = list;
        this.crawlerModel = crawlerModel;
        this.crawler = crawlerModel.getInstance();
    }

    @Override // java.lang.Runnable
    public void run() {
        while (true) {
            Request request = null;
            try {
                request = this.queue.bPop(this.crawlerModel.getCrawlerName());
                if (request != null) {
                    if (this.crawlerModel == null) {
                        this.logger.error("No such crawler name:'{}'", request.getCrawlerName());
                    } else {
                        if (request.isStop()) {
                            this.logger.info("SeimiProcessor[{}] will stop!", Thread.currentThread().getName());
                            return;
                        }
                        if (!StructValidator.validateAnno(request)) {
                            this.logger.warn("Request={} is illegal", JSON.toJSONString(request));
                        } else if (!StructValidator.validateAllowRules(this.crawler.allowRules(), request.getUrl())) {
                            this.logger.warn("Request={} will be dropped by allowRules=[{}]", JSON.toJSONString(request), StringUtils.join(this.crawler.allowRules(), ","));
                        } else if (StructValidator.validateDenyRules(this.crawler.denyRules(), request.getUrl())) {
                            this.logger.warn("Request={} will be dropped by denyRules=[{}]", JSON.toJSONString(request), StringUtils.join(this.crawler.denyRules(), ","));
                        } else if (request.getCurrentReqCount() < request.getMaxReqCount()) {
                            if (!request.isSkipDuplicateFilter() && this.crawlerModel.isUseUnrepeated() && this.queue.isProcessed(request) && request.getCurrentReqCount() == 0) {
                                this.logger.info("This request has bean processed,so current request={} will be dropped!", JSON.toJSONString(request));
                            } else {
                                this.queue.addProcessed(request);
                                SeimiDownloader hcDownloader = SeimiHttpType.APACHE_HC.val() == this.crawlerModel.getSeimiHttpType().val() ? new HcDownloader(this.crawlerModel) : new OkHttpDownloader(this.crawlerModel);
                                Response process = hcDownloader.process(request);
                                Matcher matcher = this.metaRefresh.matcher(process.getContent());
                                for (int i = 0; !request.isUseSeimiAgent() && matcher.find() && i < 3; i++) {
                                    process = hcDownloader.metaRefresh(matcher.group(1).replaceAll("'", ""));
                                    matcher = this.metaRefresh.matcher(process.getContent());
                                }
                                Method method = this.crawlerModel.getMemberMethods().get(request.getCallBack());
                                if (method != null) {
                                    for (SeimiInterceptor seimiInterceptor : this.interceptors) {
                                        if (((Interceptor) seimiInterceptor.getClass().getAnnotation(Interceptor.class)).everyMethod() || method.isAnnotationPresent(seimiInterceptor.getTargetAnnotationClass()) || this.crawlerModel.getClazz().isAnnotationPresent(seimiInterceptor.getTargetAnnotationClass())) {
                                            seimiInterceptor.before(method, process);
                                        }
                                    }
                                    if (this.crawlerModel.getDelay() > 0) {
                                        TimeUnit.SECONDS.sleep(this.crawlerModel.getDelay());
                                    }
                                    method.invoke(this.crawlerModel.getInstance(), process);
                                    for (SeimiInterceptor seimiInterceptor2 : this.interceptors) {
                                        if (((Interceptor) seimiInterceptor2.getClass().getAnnotation(Interceptor.class)).everyMethod() || method.isAnnotationPresent(seimiInterceptor2.getTargetAnnotationClass()) || this.crawlerModel.getClazz().isAnnotationPresent(seimiInterceptor2.getTargetAnnotationClass())) {
                                            seimiInterceptor2.after(method, process);
                                        }
                                    }
                                    this.logger.debug("Crawler[{}] ,url={} ,responseStatus={}", new Object[]{this.crawlerModel.getCrawlerName(), request.getUrl(), Integer.valueOf(hcDownloader.statusCode())});
                                }
                            }
                        }
                    }
                }
            } catch (Exception e) {
                this.logger.error(e.getMessage(), e);
                if (request != null) {
                    if (request.getCurrentReqCount() < request.getMaxReqCount()) {
                        request.incrReqCount();
                        this.queue.push(request);
                        this.logger.info("Request process error,req will go into queue again,url={},maxReqCount={},currentReqCount={}", new Object[]{request.getUrl(), Integer.valueOf(request.getMaxReqCount()), Integer.valueOf(request.getCurrentReqCount())});
                    } else if (request.getCurrentReqCount() >= request.getMaxReqCount() && request.getMaxReqCount() > 0) {
                        this.crawler.handleErrorRequest(request);
                    }
                }
            }
        }
    }
}
