package edu.uci.ics.crawler4j.crawler;

import edu.uci.ics.crawler4j.fetcher.PageFetcher;
import edu.uci.ics.crawler4j.frontier.DocIDServer;
import edu.uci.ics.crawler4j.frontier.Frontier;
import edu.uci.ics.crawler4j.frontier.FrontierConfiguration;
import edu.uci.ics.crawler4j.parser.Parser;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;
import edu.uci.ics.crawler4j.url.TLDList;
import edu.uci.ics.crawler4j.url.URLCanonicalizer;
import edu.uci.ics.crawler4j.url.WebURL;
import edu.uci.ics.crawler4j.url.WebURLFactory;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:edu/uci/ics/crawler4j/crawler/CrawlController.class */
public class CrawlController {
    static final Logger logger = LoggerFactory.getLogger(CrawlController.class);
    private final CrawlConfig config;
    protected Object customData;
    protected List<Object> crawlersLocalData;
    protected boolean finished;
    private Throwable error;
    protected boolean shuttingDown;
    protected PageFetcher pageFetcher;
    protected RobotstxtServer robotstxtServer;
    protected Frontier frontier;
    protected DocIDServer docIdServer;
    protected TLDList tldList;
    protected WebURLFactory webURLFactory;
    protected final Object waitingLock;
    protected final FrontierConfiguration frontierConfiguration;
    protected Parser parser;

    /* loaded from: input_file:edu/uci/ics/crawler4j/crawler/CrawlController$DefaultWebCrawlerFactory.class */
    private static class DefaultWebCrawlerFactory<T extends WebCrawler> implements WebCrawlerFactory<T> {
        final Class<T> clazz;

        DefaultWebCrawlerFactory(Class<T> cls) {
            this.clazz = cls;
        }

        @Override // edu.uci.ics.crawler4j.crawler.CrawlController.WebCrawlerFactory
        public T newInstance() throws Exception {
            try {
                return this.clazz.newInstance();
            } catch (ReflectiveOperationException e) {
                throw e;
            }
        }
    }

    /* loaded from: input_file:edu/uci/ics/crawler4j/crawler/CrawlController$SingleInstanceFactory.class */
    private static class SingleInstanceFactory<T extends WebCrawler> implements WebCrawlerFactory<T> {
        final T instance;

        SingleInstanceFactory(T t) {
            this.instance = t;
        }

        @Override // edu.uci.ics.crawler4j.crawler.CrawlController.WebCrawlerFactory
        public T newInstance() throws Exception {
            return this.instance;
        }
    }

    /* loaded from: input_file:edu/uci/ics/crawler4j/crawler/CrawlController$WebCrawlerFactory.class */
    public interface WebCrawlerFactory<T extends WebCrawler> {
        T newInstance() throws Exception;
    }

    public CrawlController(CrawlConfig crawlConfig, PageFetcher pageFetcher, RobotstxtServer robotstxtServer, FrontierConfiguration frontierConfiguration) throws Exception {
        this(crawlConfig, pageFetcher, null, robotstxtServer, null, frontierConfiguration);
    }

    public CrawlController(CrawlConfig crawlConfig, PageFetcher pageFetcher, RobotstxtServer robotstxtServer, TLDList tLDList, FrontierConfiguration frontierConfiguration) throws Exception {
        this(crawlConfig, pageFetcher, null, robotstxtServer, tLDList, frontierConfiguration);
    }

    public CrawlController(CrawlConfig crawlConfig, PageFetcher pageFetcher, Parser parser, RobotstxtServer robotstxtServer, TLDList tLDList, FrontierConfiguration frontierConfiguration) throws Exception {
        this.crawlersLocalData = new ArrayList();
        this.waitingLock = new Object();
        crawlConfig.validate();
        this.config = crawlConfig;
        File file = new File(crawlConfig.getCrawlStorageFolder());
        if (!file.exists()) {
            if (!file.mkdirs()) {
                throw new Exception("couldn't create the storage folder: " + file.getAbsolutePath() + " does it already exist ?");
            }
            logger.debug("Created folder: " + file.getAbsolutePath());
        }
        this.tldList = tLDList == null ? new TLDList(crawlConfig) : tLDList;
        URLCanonicalizer.setHaltOnError(crawlConfig.isHaltOnError());
        this.frontierConfiguration = frontierConfiguration;
        this.frontier = frontierConfiguration.getFrontier();
        this.docIdServer = frontierConfiguration.getDocIDServer();
        this.webURLFactory = frontierConfiguration.getWebURLFactory();
        this.pageFetcher = pageFetcher;
        this.parser = parser == null ? new Parser(crawlConfig, tLDList, this.webURLFactory) : parser;
        this.robotstxtServer = robotstxtServer;
        this.finished = false;
        this.shuttingDown = false;
        robotstxtServer.setCrawlConfig(crawlConfig);
    }

    public Parser getParser() {
        return this.parser;
    }

    public <T extends WebCrawler> void start(Class<T> cls, int i) {
        start(new DefaultWebCrawlerFactory(cls), i, true);
    }

    public <T extends WebCrawler> void start(T t) {
        start(new SingleInstanceFactory(t), 1, true);
    }

    public <T extends WebCrawler> void start(WebCrawlerFactory<T> webCrawlerFactory, int i) {
        start(webCrawlerFactory, i, true);
    }

    public <T extends WebCrawler> void startNonBlocking(WebCrawlerFactory<T> webCrawlerFactory, int i) {
        start(webCrawlerFactory, i, false);
    }

    public <T extends WebCrawler> void startNonBlocking(Class<T> cls, int i) {
        start(new DefaultWebCrawlerFactory(cls), i, false);
    }

    protected <T extends WebCrawler> void start(final WebCrawlerFactory<T> webCrawlerFactory, int i, boolean z) {
        try {
            this.finished = false;
            setError(null);
            this.crawlersLocalData.clear();
            final ArrayList arrayList = new ArrayList();
            final ArrayList arrayList2 = new ArrayList();
            for (int i2 = 1; i2 <= i; i2++) {
                T newInstance = webCrawlerFactory.newInstance();
                Thread thread = new Thread(newInstance, "Crawler " + i2);
                newInstance.setThread(thread);
                newInstance.init(i2, this);
                thread.start();
                arrayList2.add(newInstance);
                arrayList.add(thread);
                logger.info("Crawler {} started", Integer.valueOf(i2));
            }
            new Thread(new Runnable() { // from class: edu.uci.ics.crawler4j.crawler.CrawlController.1
                @Override // java.lang.Runnable
                public void run() {
                    try {
                        synchronized (CrawlController.this.waitingLock) {
                            while (true) {
                                CrawlController.sleep(CrawlController.this.config.getThreadMonitoringDelaySeconds());
                                boolean z2 = false;
                                for (int i3 = 0; i3 < arrayList.size(); i3++) {
                                    if (((Thread) arrayList.get(i3)).isAlive()) {
                                        if (((WebCrawler) arrayList2.get(i3)).isNotWaitingForNewURLs()) {
                                            z2 = true;
                                        }
                                    } else if (!CrawlController.this.shuttingDown && !CrawlController.this.config.isHaltOnError()) {
                                        CrawlController.logger.info("Thread {} was dead, I'll recreate it", Integer.valueOf(i3));
                                        WebCrawler newInstance2 = webCrawlerFactory.newInstance();
                                        Thread thread2 = new Thread(newInstance2, "Crawler " + (i3 + 1));
                                        arrayList.remove(i3);
                                        arrayList.add(i3, thread2);
                                        newInstance2.setThread(thread2);
                                        newInstance2.init(i3 + 1, this);
                                        thread2.start();
                                        arrayList2.remove(i3);
                                        arrayList2.add(i3, newInstance2);
                                    }
                                    Throwable error = ((WebCrawler) arrayList2.get(i3)).getError();
                                    if (error != null && CrawlController.this.config.isHaltOnError()) {
                                        throw new RuntimeException("error on thread [" + ((Thread) arrayList.get(i3)).getName() + "]", error);
                                    }
                                }
                                boolean isShutdownOnEmptyQueue = CrawlController.this.config.isShutdownOnEmptyQueue();
                                if (!z2 && isShutdownOnEmptyQueue) {
                                    CrawlController.logger.info("It looks like no thread is working, waiting for " + CrawlController.this.config.getThreadShutdownDelaySeconds() + " seconds to make sure...");
                                    CrawlController.sleep(CrawlController.this.config.getThreadShutdownDelaySeconds());
                                    boolean z3 = false;
                                    for (int i4 = 0; i4 < arrayList.size(); i4++) {
                                        if (((Thread) arrayList.get(i4)).isAlive() && ((WebCrawler) arrayList2.get(i4)).isNotWaitingForNewURLs()) {
                                            z3 = true;
                                        }
                                    }
                                    if (!z3) {
                                        if (CrawlController.this.shuttingDown) {
                                            break;
                                        }
                                        if (CrawlController.this.frontier.getQueueLength() <= 0) {
                                            CrawlController.logger.info("No thread is working and no more URLs are in queue waiting for another " + CrawlController.this.config.getThreadShutdownDelaySeconds() + " seconds to make sure...");
                                            CrawlController.sleep(CrawlController.this.config.getThreadShutdownDelaySeconds());
                                            if (CrawlController.this.frontier.getQueueLength() <= 0) {
                                                break;
                                            }
                                        }
                                    }
                                }
                            }
                            CrawlController.logger.info("All of the crawlers are stopped. Finishing the process...");
                            CrawlController.this.frontier.finish();
                            for (WebCrawler webCrawler : arrayList2) {
                                webCrawler.onBeforeExit();
                                CrawlController.this.crawlersLocalData.add(webCrawler.getMyLocalData());
                            }
                            CrawlController.logger.info("Waiting for " + CrawlController.this.config.getCleanupDelaySeconds() + " seconds before final clean up...");
                            CrawlController.sleep(CrawlController.this.config.getCleanupDelaySeconds());
                            CrawlController.this.frontier.close();
                            CrawlController.this.docIdServer.close();
                            CrawlController.this.pageFetcher.shutDown();
                            CrawlController.this.finished = true;
                            CrawlController.this.waitingLock.notifyAll();
                            CrawlController.this.frontierConfiguration.close();
                        }
                    } catch (Throwable th) {
                        if (!CrawlController.this.config.isHaltOnError()) {
                            CrawlController.logger.error("Unexpected Error", th);
                            return;
                        }
                        CrawlController.this.setError(th);
                        synchronized (CrawlController.this.waitingLock) {
                            CrawlController.this.frontier.finish();
                            CrawlController.this.frontier.close();
                            CrawlController.this.docIdServer.close();
                            CrawlController.this.pageFetcher.shutDown();
                            CrawlController.this.waitingLock.notifyAll();
                            CrawlController.this.frontierConfiguration.close();
                        }
                    }
                }
            }).start();
            if (z) {
                waitUntilFinish();
            }
        } catch (Exception e) {
            if (this.config.isHaltOnError()) {
                if (!(e instanceof RuntimeException)) {
                    throw new RuntimeException("error running the monitor thread", e);
                }
                throw ((RuntimeException) e);
            }
            logger.error("Error happened", e);
        }
    }

    public void waitUntilFinish() {
        Throwable error;
        while (!this.finished) {
            synchronized (this.waitingLock) {
                if (this.config.isHaltOnError() && (error = getError()) != null && this.config.isHaltOnError()) {
                    if (error instanceof RuntimeException) {
                        throw ((RuntimeException) error);
                    }
                    if (!(error instanceof Error)) {
                        throw new RuntimeException("error on monitor thread", error);
                    }
                    throw ((Error) error);
                }
                if (this.finished) {
                    return;
                }
                try {
                    this.waitingLock.wait();
                } catch (InterruptedException e) {
                    logger.error("Error occurred", e);
                }
            }
        }
    }

    public List<Object> getCrawlersLocalData() {
        return this.crawlersLocalData;
    }

    protected static void sleep(int i) {
        try {
            Thread.sleep(i * 1000);
        } catch (InterruptedException e) {
        }
    }

    public void addSeed(String str) throws IOException, InterruptedException {
        addSeed(str, -1);
    }

    public void addSeed(String str, int i) throws IOException, InterruptedException {
        String canonicalURL = URLCanonicalizer.getCanonicalURL(str);
        if (canonicalURL == null) {
            logger.error("Invalid seed URL: {}", str);
            return;
        }
        if (i >= 0) {
            try {
                this.docIdServer.addUrlAndDocId(canonicalURL, i);
            } catch (RuntimeException e) {
                if (this.config.isHaltOnError()) {
                    throw e;
                }
                logger.error("Could not add seed: {}", e.getMessage());
            }
        } else {
            if (this.docIdServer.getDocId(canonicalURL) > 0) {
                logger.trace("This URL is already seen.");
                return;
            }
            i = this.docIdServer.getNewDocID(canonicalURL);
        }
        WebURL newWebUrl = this.webURLFactory.newWebUrl();
        newWebUrl.setTldList(this.tldList);
        newWebUrl.setURL(canonicalURL);
        newWebUrl.setDocid(i);
        newWebUrl.setDepth((short) 0);
        if (this.robotstxtServer.allows(newWebUrl)) {
            this.frontier.schedule(newWebUrl);
        } else {
            logger.warn("Robots.txt does not allow this seed: {}", str);
        }
    }

    public void addSeenUrl(String str, int i) throws UnsupportedEncodingException {
        String canonicalURL = URLCanonicalizer.getCanonicalURL(str);
        if (canonicalURL == null) {
            logger.error("Invalid Url: {} (can't cannonicalize it!)", str);
            return;
        }
        try {
            this.docIdServer.addUrlAndDocId(canonicalURL, i);
        } catch (RuntimeException e) {
            if (this.config.isHaltOnError()) {
                throw e;
            }
            logger.error("Could not add seen url: {}", e.getMessage());
        }
    }

    public PageFetcher getPageFetcher() {
        return this.pageFetcher;
    }

    public void setPageFetcher(PageFetcher pageFetcher) {
        this.pageFetcher = pageFetcher;
    }

    public RobotstxtServer getRobotstxtServer() {
        return this.robotstxtServer;
    }

    public void setRobotstxtServer(RobotstxtServer robotstxtServer) {
        this.robotstxtServer = robotstxtServer;
    }

    public Frontier getFrontier() {
        return this.frontier;
    }

    public void setFrontier(Frontier frontier) {
        this.frontier = frontier;
    }

    public DocIDServer getDocIdServer() {
        return this.docIdServer;
    }

    public void setDocIdServer(DocIDServer docIDServer) {
        this.docIdServer = docIDServer;
    }

    @Deprecated
    public Object getCustomData() {
        return this.customData;
    }

    @Deprecated
    public void setCustomData(Object obj) {
        this.customData = obj;
    }

    public boolean isFinished() {
        return this.finished;
    }

    public boolean isShuttingDown() {
        return this.shuttingDown;
    }

    public void shutdown() {
        logger.info("Shutting down...");
        this.shuttingDown = true;
        this.pageFetcher.shutDown();
        this.frontier.finish();
    }

    public CrawlConfig getConfig() {
        return this.config;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public synchronized Throwable getError() {
        return this.error;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public synchronized void setError(Throwable th) {
        this.error = th;
    }

    public TLDList getTldList() {
        return this.tldList;
    }

    public WebURLFactory getWebURLFactory() {
        return this.webURLFactory;
    }

    public void setWebURLFactory(WebURLFactory webURLFactory) {
        this.webURLFactory = webURLFactory;
    }
}
