package com.credibledoc.log.labelizer.crawler;

import com.credibledoc.log.labelizer.config.Config;
import com.credibledoc.log.labelizer.exception.LabelizerRuntimeException;
import com.credibledoc.log.labelizer.github.GithubRepo;
import com.credibledoc.log.labelizer.github.GithubRepoRepository;
import com.credibledoc.log.labelizer.github.VisitedUrl;
import com.credibledoc.log.labelizer.github.VisitedUrlRepository;
import com.credibledoc.log.labelizer.pagepattern.PagePattern;
import com.credibledoc.log.labelizer.pagepattern.PagePatternRepository;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import dev.morphia.query.internal.MorphiaCursor;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import org.jetbrains.annotations.NotNull;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/credibledoc/log/labelizer/crawler/Crawler.class */
public class Crawler {
    private static final Logger logger = LoggerFactory.getLogger(Crawler.class);
    private static final String HTTPS_API_GITHUB_COM = "https://api.github.com";
    private static final String SEARCH_REPOSITORIES_Q = "/search/repositories?q=";
    private static final String LANGUAGE_JAVA = "+language%3Ajava?";
    private static final String PER_PAGE = "&per_page=";
    private static final int NUM_ITEMS_PER_PAGE_100 = 100;
    private static final int DELAY_BETWEEN_REQUESTS_SECONDS_2 = 2;
    private static final int MAX_THREADS_25 = 25;
    private static final String FIELD_FULL_NAME = "full_name";
    private static final String FIELD_LANGUAGE = "language";
    private static final String JAVA = "Java";
    private static final int MAX_QUERY_LENGTH_256 = 256;
    private static final String PLACEHOLDER = "##placeholder##";
    private static final int DIGITS_IN_PAGE_NUM = 2;

    public static void main(String[] strArr) {
        PagePatternRepository pagePatternRepository = PagePatternRepository.getInstance();
        logger.info("Context path: '{}'", new File("").getAbsolutePath());
        logger.info("Repository created. HashCode: {}", Integer.valueOf(pagePatternRepository.hashCode()));
        new Crawler().startJobs();
    }

    private void startJobs() {
        try {
            findRepositories();
            List<GithubRepo> selectNotVisited = GithubRepoRepository.getInstance().selectNotVisited();
            StringBuilder sb = new StringBuilder(MAX_QUERY_LENGTH_256);
            String str = "https://api.github.com/search/code?q=mm+in%3Afile" + PLACEHOLDER + PER_PAGE + "100&page=";
            ArrayList arrayList = new ArrayList();
            Iterator<GithubRepo> it = selectNotVisited.iterator();
            while (it.hasNext()) {
                searchLinksInRepository(sb, str, arrayList, it.next());
            }
            execute();
        } catch (Exception e) {
            throw new LabelizerRuntimeException(e);
        }
    }

    private void searchLinksInRepository(StringBuilder sb, String str, List<GithubRepo> list, GithubRepo githubRepo) {
        VisitedUrlRepository visitedUrlRepository = VisitedUrlRepository.getInstance();
        GithubRepoRepository githubRepoRepository = GithubRepoRepository.getInstance();
        String str2 = "+repo:" + githubRepo.getFullName();
        if ((str.length() - PLACEHOLDER.length()) + sb.length() + str2.length() + 2 <= MAX_QUERY_LENGTH_256) {
            sb.append(str2);
            githubRepo.setVisited(true);
            list.add(githubRepo);
            return;
        }
        String replace = str.replace(PLACEHOLDER, sb);
        sb.setLength(0);
        int asInt = savePages(1, replace).getAsJsonPrimitive("total_count").getAsInt() / 100;
        for (int i = 2; i <= asInt && i < 11; i++) {
            if (!visitedUrlRepository.contains(replace + i)) {
                savePages(i, replace);
            }
        }
        githubRepoRepository.save(list);
        list.clear();
        sb.append(str2);
        githubRepo.setVisited(true);
        list.add(githubRepo);
    }

    private void findRepositories() throws IOException {
        VisitedUrlRepository visitedUrlRepository = VisitedUrlRepository.getInstance();
        for (String str : Config.getGithubSearchKeywords()) {
            for (int i = 1; i <= 10; i++) {
                String str2 = "https://api.github.com/search/repositories?q=" + str + LANGUAGE_JAVA + PER_PAGE + "100&page=" + i;
                if (!visitedUrlRepository.contains(str2)) {
                    Iterator it = getNextRepoList(str2).getAsJsonArray("items").iterator();
                    while (it.hasNext()) {
                        getAndSaveJavaRepos((JsonElement) it.next());
                    }
                    visitedUrlRepository.save(Collections.singletonList(new VisitedUrl(str2)));
                }
            }
        }
    }

    @NotNull
    private JsonObject savePages(int i, String str) {
        JsonObject nextSearchResult = getNextSearchResult(str + i);
        JsonArray asJsonArray = nextSearchResult.getAsJsonArray("items");
        ArrayList arrayList = new ArrayList();
        Iterator it = asJsonArray.iterator();
        while (it.hasNext()) {
            String asString = ((JsonElement) it.next()).get("html_url").getAsString();
            if (!PagePatternRepository.getInstance().containsPage(asString)) {
                PagePattern pagePattern = new PagePattern();
                pagePattern.setPageUrl(asString);
                arrayList.add(pagePattern);
            }
        }
        PagePatternRepository.getInstance().save(arrayList);
        return nextSearchResult;
    }

    private JsonObject getNextSearchResult(String str) {
        try {
            return new JsonParser().parse(createConnection(str).execute().body()).getAsJsonObject();
        } catch (Exception e) {
            throw new LabelizerRuntimeException(e);
        }
    }

    private void getAndSaveJavaRepos(JsonElement jsonElement) throws IOException {
        JsonObject asJsonObject = jsonElement.getAsJsonObject();
        JsonElement jsonElement2 = asJsonObject.get(FIELD_LANGUAGE);
        if (jsonElement2.isJsonNull() || !JAVA.equals(jsonElement2.getAsString())) {
            return;
        }
        JsonObject asJsonObject2 = asJsonObject.get("owner").getAsJsonObject();
        String asString = asJsonObject.getAsJsonPrimitive(FIELD_FULL_NAME).getAsString();
        String asString2 = asJsonObject2.get("repos_url").getAsString();
        if (GithubRepoRepository.getInstance().contains(asString)) {
            return;
        }
        getAndSaveRepos(asString2);
    }

    private void getAndSaveRepos(String str) throws IOException {
        JsonArray asJsonArray = new JsonParser().parse(createConnection(str).execute().body()).getAsJsonArray();
        ArrayList arrayList = new ArrayList();
        Iterator it = asJsonArray.iterator();
        while (it.hasNext()) {
            JsonObject asJsonObject = ((JsonElement) it.next()).getAsJsonObject();
            JsonElement jsonElement = asJsonObject.get(FIELD_LANGUAGE);
            if (!jsonElement.isJsonNull() && JAVA.equals(jsonElement.getAsString())) {
                arrayList.add(new GithubRepo(asJsonObject.get(FIELD_FULL_NAME).getAsString()));
            }
        }
        GithubRepoRepository.getInstance().save(arrayList);
    }

    private void execute() {
        try {
            ExecutorService newFixedThreadPool = Executors.newFixedThreadPool(MAX_THREADS_25);
            MorphiaCursor<PagePattern> cursorOfEmptyPatterns = PagePatternRepository.getInstance().getCursorOfEmptyPatterns();
            while (cursorOfEmptyPatterns.hasNext()) {
                createRunnable((PagePattern) cursorOfEmptyPatterns.next(), newFixedThreadPool);
            }
            newFixedThreadPool.shutdown();
        } catch (Exception e) {
            throw new LabelizerRuntimeException(e);
        }
    }

    private void createRunnable(PagePattern pagePattern, ExecutorService executorService) {
        executorService.submit(new RunnableCrawler(pagePattern));
    }

    private JsonObject getNextRepoList(String str) {
        try {
            return new JsonParser().parse(createConnection(str).execute().body()).getAsJsonObject();
        } catch (Exception e) {
            PagePattern pagePattern = new PagePattern();
            pagePattern.setPageUrl(str);
            StringWriter stringWriter = new StringWriter();
            e.printStackTrace(new PrintWriter(stringWriter));
            pagePattern.setErrorMessage("Error in the getNextSearchPage method. Message: " + e.getMessage() + ". StackTrace: " + stringWriter.toString());
            PagePatternRepository.getInstance().save(Collections.singletonList(pagePattern));
            throw new LabelizerRuntimeException(e);
        }
    }

    private Connection createConnection(String str) {
        try {
            Thread.sleep(2050L);
            return Jsoup.connect(str).ignoreContentType(true).header("Accept", "application/vnd.github.v3+json").header("Authorization", "token " + Config.getGithubOauthToken());
        } catch (Exception e) {
            throw new LabelizerRuntimeException(e);
        }
    }
}
