package us.codecraft.webmagic;

import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.collections.CollectionUtils;
import org.apache.log4j.Logger;
import us.codecraft.webmagic.downloader.Downloader;
import us.codecraft.webmagic.downloader.HttpClientDownloader;
import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.pipeline.Pipeline;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.scheduler.QueueScheduler;
import us.codecraft.webmagic.scheduler.Scheduler;
import us.codecraft.webmagic.utils.ThreadUtils;

/* loaded from: input_file:us/codecraft/webmagic/Spider.class */
public class Spider implements Runnable, Task {
    protected Downloader downloader;
    protected PageProcessor pageProcessor;
    protected List<String> startUrls;
    protected Site site;
    protected String uuid;
    protected ExecutorService executorService;
    protected static final int STAT_INIT = 0;
    protected static final int STAT_RUNNING = 1;
    protected static final int STAT_STOPPED = 2;
    protected List<Pipeline> pipelines = new ArrayList();
    protected Scheduler scheduler = new QueueScheduler();
    protected Logger logger = Logger.getLogger(getClass());
    protected int threadNum = STAT_RUNNING;
    protected AtomicInteger stat = new AtomicInteger(STAT_INIT);

    public Spider(PageProcessor pageProcessor) {
        this.pageProcessor = pageProcessor;
        this.site = pageProcessor.getSite();
        this.startUrls = pageProcessor.getSite().getStartUrls();
    }

    public static Spider create(PageProcessor pageProcessor) {
        return new Spider(pageProcessor);
    }

    public Spider startUrls(List<String> list) {
        checkIfNotRunning();
        this.startUrls = list;
        return this;
    }

    public Spider setUUID(String str) {
        this.uuid = str;
        return this;
    }

    public Spider scheduler(Scheduler scheduler) {
        return setScheduler(scheduler);
    }

    public Spider setScheduler(Scheduler scheduler) {
        checkIfNotRunning();
        this.scheduler = scheduler;
        return this;
    }

    public Spider pipeline(Pipeline pipeline) {
        return addPipeline(pipeline);
    }

    public Spider addPipeline(Pipeline pipeline) {
        checkIfNotRunning();
        this.pipelines.add(pipeline);
        return this;
    }

    public Spider clearPipeline() {
        this.pipelines = new ArrayList();
        return this;
    }

    public Spider downloader(Downloader downloader) {
        return setDownloader(downloader);
    }

    public Spider setDownloader(Downloader downloader) {
        checkIfNotRunning();
        this.downloader = downloader;
        return this;
    }

    protected void checkComponent() {
        if (this.downloader == null) {
            this.downloader = new HttpClientDownloader();
        }
        if (this.pipelines.isEmpty()) {
            this.pipelines.add(new ConsolePipeline());
        }
        this.downloader.setThread(this.threadNum);
    }

    @Override // java.lang.Runnable
    public void run() {
        if (!this.stat.compareAndSet(STAT_INIT, STAT_RUNNING)) {
            throw new IllegalStateException("Spider is already running!");
        }
        checkComponent();
        if (this.startUrls != null) {
            Iterator<String> it = this.startUrls.iterator();
            while (it.hasNext()) {
                this.scheduler.push(new Request(it.next()), this);
            }
        }
        Request poll = this.scheduler.poll(this);
        if (this.executorService == null) {
            while (poll != null) {
                processRequest(poll);
                poll = this.scheduler.poll(this);
            }
        } else {
            final AtomicInteger atomicInteger = new AtomicInteger(STAT_INIT);
            while (true) {
                if (poll == null) {
                    try {
                        Thread.sleep(100L);
                    } catch (InterruptedException e) {
                    }
                } else {
                    final Request request = poll;
                    atomicInteger.incrementAndGet();
                    this.executorService.execute(new Runnable() { // from class: us.codecraft.webmagic.Spider.1
                        @Override // java.lang.Runnable
                        public void run() {
                            Spider.this.processRequest(request);
                            atomicInteger.decrementAndGet();
                        }
                    });
                }
                poll = this.scheduler.poll(this);
                if (atomicInteger.get() == 0) {
                    poll = this.scheduler.poll(this);
                    if (poll == null) {
                        break;
                    }
                }
            }
            this.executorService.shutdown();
        }
        this.stat.compareAndSet(STAT_RUNNING, STAT_STOPPED);
        destroy();
    }

    protected void destroy() {
        destroyEach(this.downloader);
        destroyEach(this.pageProcessor);
        Iterator<Pipeline> it = this.pipelines.iterator();
        while (it.hasNext()) {
            destroyEach(it.next());
        }
    }

    private void destroyEach(Object obj) {
        if (obj instanceof Closeable) {
            try {
                ((Closeable) obj).close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    public void test(String... strArr) {
        checkComponent();
        if (strArr.length > 0) {
            int length = strArr.length;
            for (int i = STAT_INIT; i < length; i += STAT_RUNNING) {
                processRequest(new Request(strArr[i]));
            }
        }
    }

    protected void processRequest(Request request) {
        Page download = this.downloader.download(request, this);
        if (download == null) {
            sleep(this.site.getSleepTime());
            return;
        }
        this.pageProcessor.process(download);
        addRequest(download);
        if (!download.getResultItems().isSkip()) {
            Iterator<Pipeline> it = this.pipelines.iterator();
            while (it.hasNext()) {
                it.next().process(download.getResultItems(), this);
            }
        }
        sleep(this.site.getSleepTime());
    }

    protected void sleep(int i) {
        try {
            Thread.sleep(i);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }

    protected void addRequest(Page page) {
        if (CollectionUtils.isNotEmpty(page.getTargetRequests())) {
            Iterator<Request> it = page.getTargetRequests().iterator();
            while (it.hasNext()) {
                this.scheduler.push(it.next(), this);
            }
        }
    }

    protected void checkIfNotRunning() {
        if (!this.stat.compareAndSet(STAT_INIT, STAT_INIT)) {
            throw new IllegalStateException("Spider is already running!");
        }
    }

    public void runAsync() {
        Thread thread = new Thread(this);
        thread.setDaemon(false);
        thread.start();
    }

    public Spider thread(int i) {
        checkIfNotRunning();
        this.threadNum = i;
        if (i <= 0) {
            throw new IllegalArgumentException("threadNum should be more than one!");
        }
        if (i == STAT_RUNNING) {
            return this;
        }
        synchronized (this) {
            this.executorService = ThreadUtils.newFixedThreadPool(i);
        }
        return this;
    }

    @Override // us.codecraft.webmagic.Task
    public String getUUID() {
        if (this.uuid != null) {
            return this.uuid;
        }
        if (this.site != null) {
            return this.site.getDomain();
        }
        return null;
    }

    @Override // us.codecraft.webmagic.Task
    public Site getSite() {
        return this.site;
    }
}
