package gobblin.ingestion.google.webmaster;

import avro.shaded.com.google.common.base.Joiner;
import com.google.api.client.googleapis.batch.json.JsonBatchCallback;
import com.google.api.client.googleapis.json.GoogleJsonError;
import com.google.api.client.http.HttpHeaders;
import com.google.api.client.repackaged.com.google.common.base.Preconditions;
import com.google.api.services.webmasters.model.ApiDimensionFilter;
import com.google.api.services.webmasters.model.SearchAnalyticsQueryResponse;
import com.google.common.base.Optional;
import gobblin.configuration.WorkUnitState;
import gobblin.ingestion.google.webmaster.GoogleWebmasterDataFetcher;
import gobblin.ingestion.google.webmaster.GoogleWebmasterFilter;
import gobblin.util.ExecutorsUtils;
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Deque;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Random;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:gobblin/ingestion/google/webmaster/GoogleWebmasterExtractorIterator.class */
public class GoogleWebmasterExtractorIterator {
    private static final Logger LOG = LoggerFactory.getLogger(GoogleWebmasterExtractorIterator.class);
    private final int TIME_OUT;
    private final int BATCH_SIZE;
    private final int GROUP_SIZE;
    private final boolean ADVANCED_MODE;
    private final int MAX_RETRY_ROUNDS;
    private final int INITIAL_COOL_DOWN;
    private final int COOL_DOWN_STEP;
    private final double REQUESTS_PER_SECOND;
    private final int PAGE_LIMIT;
    private final int QUERY_LIMIT;
    private final GoogleWebmasterDataFetcher _webmaster;
    private final String _startDate;
    private final String _endDate;
    private final String _country;
    private Thread _producerThread;
    private LinkedBlockingDeque<String[]> _cachedQueries = new LinkedBlockingDeque<>(2000);
    private final Map<GoogleWebmasterFilter.Dimension, ApiDimensionFilter> _filterMap;
    private final List<GoogleWebmasterFilter.Dimension> _requestedDimensions;
    private final List<GoogleWebmasterDataFetcher.Metric> _requestedMetrics;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:gobblin/ingestion/google/webmaster/GoogleWebmasterExtractorIterator$ResponseProducer.class */
    public class ResponseProducer implements Runnable {
        private Deque<ProducerJob> _jobsToProcess;
        private static final double REPORT_PARTITIONS = 20.0d;

        ResponseProducer(Collection<ProducerJob> collection) {
            int size = collection.size();
            if (size == 0) {
                this._jobsToProcess = new ArrayDeque();
                return;
            }
            if (!GoogleWebmasterExtractorIterator.this.ADVANCED_MODE) {
                if (collection.getClass().equals(ArrayDeque.class)) {
                    this._jobsToProcess = (ArrayDeque) collection;
                    return;
                } else {
                    this._jobsToProcess = new ArrayDeque(collection);
                    return;
                }
            }
            ArrayList arrayList = new ArrayList(size);
            Iterator<ProducerJob> it = collection.iterator();
            while (it.hasNext()) {
                arrayList.add(it.next().getPage());
            }
            UrlTriePrefixGrouper urlTriePrefixGrouper = new UrlTriePrefixGrouper(new UrlTrie(GoogleWebmasterExtractorIterator.this._webmaster.getSiteProperty(), arrayList), GoogleWebmasterExtractorIterator.this.GROUP_SIZE);
            this._jobsToProcess = new ArrayDeque(size);
            while (urlTriePrefixGrouper.hasNext()) {
                this._jobsToProcess.add(new TrieBasedProducerJob(GoogleWebmasterExtractorIterator.this._startDate, GoogleWebmasterExtractorIterator.this._endDate, urlTriePrefixGrouper.next(), urlTriePrefixGrouper.getGroupSize()));
            }
        }

        @Override // java.lang.Runnable
        public void run() {
            Random random = new Random();
            int i = 0;
            while (i <= GoogleWebmasterExtractorIterator.this.MAX_RETRY_ROUNDS) {
                int i2 = 0;
                Iterator<ProducerJob> it = this._jobsToProcess.iterator();
                while (it.hasNext()) {
                    i2 += it.next().getPagesSize();
                }
                if (i > 0) {
                    GoogleWebmasterExtractorIterator.LOG.info(String.format("Starting #%d round retries of size %d for %s", Integer.valueOf(i), Integer.valueOf(i2), GoogleWebmasterExtractorIterator.this._country));
                }
                long ceil = (long) Math.ceil(1000.0d / GoogleWebmasterExtractorIterator.this.REQUESTS_PER_SECOND);
                int max = Math.max(1, (int) Math.round(Math.ceil(i2 / REPORT_PARTITIONS)));
                ConcurrentLinkedDeque<ProducerJob> concurrentLinkedDeque = new ConcurrentLinkedDeque<>();
                ExecutorService newFixedThreadPool = Executors.newFixedThreadPool(10, ExecutorsUtils.newDaemonThreadFactory(Optional.of(GoogleWebmasterExtractorIterator.LOG), Optional.of(getClass().getSimpleName())));
                int i3 = 0;
                int i4 = 0;
                ArrayList arrayList = new ArrayList(GoogleWebmasterExtractorIterator.this.BATCH_SIZE);
                while (!this._jobsToProcess.isEmpty()) {
                    ProducerJob poll = this._jobsToProcess.poll();
                    i3 += poll.getPagesSize();
                    if (i3 >= max) {
                        i4 += i3;
                        i3 = 0;
                        GoogleWebmasterExtractorIterator.LOG.info(String.format("ResponseProducer progress: %d of %d processed for %s", Integer.valueOf(i4), Integer.valueOf(i2), GoogleWebmasterExtractorIterator.this._country));
                    }
                    if (arrayList.size() < GoogleWebmasterExtractorIterator.this.BATCH_SIZE) {
                        arrayList.add(poll);
                    }
                    if (arrayList.size() == GoogleWebmasterExtractorIterator.this.BATCH_SIZE) {
                        submitJob(ceil, concurrentLinkedDeque, newFixedThreadPool, arrayList);
                        arrayList = new ArrayList(GoogleWebmasterExtractorIterator.this.BATCH_SIZE);
                    }
                }
                if (!arrayList.isEmpty()) {
                    submitJob(ceil, concurrentLinkedDeque, newFixedThreadPool, arrayList);
                }
                GoogleWebmasterExtractorIterator.LOG.info(String.format("Submitted all jobs at round %d.", Integer.valueOf(i)));
                try {
                    newFixedThreadPool.shutdown();
                    GoogleWebmasterExtractorIterator.LOG.info(String.format("Wait for download-query-data jobs to finish at round %d... Next round now has size %d.", Integer.valueOf(i), Integer.valueOf(concurrentLinkedDeque.size())));
                    if (!newFixedThreadPool.awaitTermination(GoogleWebmasterExtractorIterator.this.TIME_OUT, TimeUnit.MINUTES)) {
                        newFixedThreadPool.shutdownNow();
                        GoogleWebmasterExtractorIterator.LOG.warn(String.format("Timed out while downloading query data for country-%s at round %d. Next round now has size %d.", GoogleWebmasterExtractorIterator.this._country, Integer.valueOf(i), Integer.valueOf(concurrentLinkedDeque.size())));
                    }
                    if (concurrentLinkedDeque.isEmpty()) {
                        break;
                    }
                    i++;
                    this._jobsToProcess = concurrentLinkedDeque;
                    try {
                        Thread.sleep(GoogleWebmasterExtractorIterator.this.INITIAL_COOL_DOWN + (GoogleWebmasterExtractorIterator.this.COOL_DOWN_STEP * random.nextInt(i)));
                    } catch (InterruptedException e) {
                        e.printStackTrace();
                    }
                } catch (InterruptedException e2) {
                    throw new RuntimeException(e2);
                }
            }
            if (i == GoogleWebmasterExtractorIterator.this.MAX_RETRY_ROUNDS + 1) {
                GoogleWebmasterExtractorIterator.LOG.error(String.format("Exceeded maximum retries. There are %d unprocessed jobs.", Integer.valueOf(this._jobsToProcess.size())));
                StringBuilder sb = new StringBuilder();
                sb.append("You can add as hot start jobs to continue: ").append(System.lineSeparator()).append(System.lineSeparator());
                sb.append(ProducerJob.serialize(this._jobsToProcess));
                sb.append(System.lineSeparator());
                GoogleWebmasterExtractorIterator.LOG.error(sb.toString());
            }
            GoogleWebmasterExtractorIterator.LOG.info(String.format("ResponseProducer finishes for %s from %s to %s at retry round %d", GoogleWebmasterExtractorIterator.this._country, GoogleWebmasterExtractorIterator.this._startDate, GoogleWebmasterExtractorIterator.this._endDate, Integer.valueOf(i)));
        }

        private void submitJob(long j, ConcurrentLinkedDeque<ProducerJob> concurrentLinkedDeque, ExecutorService executorService, List<ProducerJob> list) {
            try {
                Thread.sleep(j);
            } catch (InterruptedException e) {
            }
            executorService.submit(getResponses(list, concurrentLinkedDeque, GoogleWebmasterExtractorIterator.this._cachedQueries));
        }

        private Runnable getResponse(final ProducerJob producerJob, final ConcurrentLinkedDeque<ProducerJob> concurrentLinkedDeque, final LinkedBlockingDeque<String[]> linkedBlockingDeque) {
            return new Runnable() { // from class: gobblin.ingestion.google.webmaster.GoogleWebmasterExtractorIterator.ResponseProducer.1
                @Override // java.lang.Runnable
                public void run() {
                    try {
                        ArrayList arrayList = new ArrayList();
                        arrayList.addAll(GoogleWebmasterExtractorIterator.this._filterMap.values());
                        arrayList.add(GoogleWebmasterFilter.pageFilter(producerJob.getOperator(), producerJob.getPage()));
                        ResponseProducer.this.onSuccess(producerJob, GoogleWebmasterExtractorIterator.this._webmaster.performSearchAnalyticsQuery(producerJob.getStartDate(), producerJob.getEndDate(), GoogleWebmasterExtractorIterator.this.QUERY_LIMIT, GoogleWebmasterExtractorIterator.this._requestedDimensions, GoogleWebmasterExtractorIterator.this._requestedMetrics, arrayList), linkedBlockingDeque, concurrentLinkedDeque);
                    } catch (IOException e) {
                        ResponseProducer.this.onFailure(e.getMessage(), producerJob, concurrentLinkedDeque);
                    }
                }
            };
        }

        private Runnable getResponses(final List<ProducerJob> list, final ConcurrentLinkedDeque<ProducerJob> concurrentLinkedDeque, final LinkedBlockingDeque<String[]> linkedBlockingDeque) {
            final int size = list.size();
            return size == 1 ? getResponse(list.get(0), concurrentLinkedDeque, linkedBlockingDeque) : new Runnable() { // from class: gobblin.ingestion.google.webmaster.GoogleWebmasterExtractorIterator.ResponseProducer.2
                @Override // java.lang.Runnable
                public void run() {
                    try {
                        ArrayList arrayList = new ArrayList(size);
                        ArrayList arrayList2 = new ArrayList(size);
                        for (final ProducerJob producerJob : list) {
                            String page = producerJob.getPage();
                            ArrayList arrayList3 = new ArrayList();
                            arrayList3.addAll(GoogleWebmasterExtractorIterator.this._filterMap.values());
                            arrayList3.add(GoogleWebmasterFilter.pageFilter(producerJob.getOperator(), page));
                            arrayList.add(arrayList3);
                            arrayList2.add(new JsonBatchCallback<SearchAnalyticsQueryResponse>() { // from class: gobblin.ingestion.google.webmaster.GoogleWebmasterExtractorIterator.ResponseProducer.2.1
                                public void onFailure(GoogleJsonError googleJsonError, HttpHeaders httpHeaders) throws IOException {
                                    this.onFailure(googleJsonError.getMessage(), producerJob, concurrentLinkedDeque);
                                }

                                public void onSuccess(SearchAnalyticsQueryResponse searchAnalyticsQueryResponse, HttpHeaders httpHeaders) throws IOException {
                                    this.onSuccess(producerJob, GoogleWebmasterDataFetcher.convertResponse(GoogleWebmasterExtractorIterator.this._requestedMetrics, searchAnalyticsQueryResponse), linkedBlockingDeque, concurrentLinkedDeque);
                                }
                            });
                            GoogleWebmasterExtractorIterator.LOG.debug("Ready to submit " + producerJob);
                        }
                        GoogleWebmasterExtractorIterator.this._webmaster.performSearchAnalyticsQueryInBatch(list, arrayList, arrayList2, GoogleWebmasterExtractorIterator.this._requestedDimensions, GoogleWebmasterExtractorIterator.this.QUERY_LIMIT);
                    } catch (IOException e) {
                        GoogleWebmasterExtractorIterator.LOG.warn("Batch request failed. Jobs: " + Joiner.on(",").join(list));
                        Iterator it = list.iterator();
                        while (it.hasNext()) {
                            concurrentLinkedDeque.add((ProducerJob) it.next());
                        }
                    }
                }
            };
        }

        /* JADX INFO: Access modifiers changed from: private */
        public void onFailure(String str, ProducerJob producerJob, ConcurrentLinkedDeque<ProducerJob> concurrentLinkedDeque) {
            GoogleWebmasterExtractorIterator.LOG.debug(String.format("OnFailure: will retry job %s.%sReason:%s", producerJob, System.lineSeparator(), str));
            concurrentLinkedDeque.add(producerJob);
        }

        /* JADX INFO: Access modifiers changed from: private */
        public void onSuccess(ProducerJob producerJob, List<String[]> list, LinkedBlockingDeque<String[]> linkedBlockingDeque, ConcurrentLinkedDeque<ProducerJob> concurrentLinkedDeque) {
            int size = list.size();
            if (size == 5000) {
                List<? extends ProducerJob> partitionJobs = producerJob.partitionJobs();
                if (!partitionJobs.isEmpty()) {
                    GoogleWebmasterExtractorIterator.LOG.info(String.format("Partition current job %s", producerJob));
                    concurrentLinkedDeque.addAll(partitionJobs);
                    return;
                }
                GoogleWebmasterExtractorIterator.LOG.warn(String.format("There might be more query data for your job %s. Currently, downloading more than the Google API limit '%d' is not supported.", producerJob, Integer.valueOf(GoogleWebmasterClient.API_ROW_LIMIT)));
            }
            GoogleWebmasterExtractorIterator.LOG.debug(String.format("Finished %s. Records %d.", producerJob, Integer.valueOf(size)));
            try {
                Iterator<String[]> it = list.iterator();
                while (it.hasNext()) {
                    linkedBlockingDeque.put(it.next());
                }
            } catch (InterruptedException e) {
                GoogleWebmasterExtractorIterator.LOG.error(e.getMessage());
                throw new RuntimeException(e);
            }
        }
    }

    public GoogleWebmasterExtractorIterator(GoogleWebmasterDataFetcher googleWebmasterDataFetcher, String str, String str2, List<GoogleWebmasterFilter.Dimension> list, List<GoogleWebmasterDataFetcher.Metric> list2, Map<GoogleWebmasterFilter.Dimension, ApiDimensionFilter> map, WorkUnitState workUnitState) {
        Preconditions.checkArgument(!map.containsKey(GoogleWebmasterFilter.Dimension.PAGE), "Doesn't support filters for page for the time being. Will implement support later. If page filter is provided, the code won't take the responsibility of get all pages, so it will just return all queries for that page.");
        this._webmaster = googleWebmasterDataFetcher;
        this._startDate = str;
        this._endDate = str2;
        this._requestedDimensions = list;
        this._requestedMetrics = list2;
        this._filterMap = map;
        this._country = GoogleWebmasterFilter.countryFilterToString(map.get(GoogleWebmasterFilter.Dimension.COUNTRY));
        this.PAGE_LIMIT = workUnitState.getPropAsInt(GoogleWebMasterSource.KEY_REQUEST_PAGE_LIMIT, GoogleWebmasterClient.API_ROW_LIMIT);
        Preconditions.checkArgument(this.PAGE_LIMIT >= 1, "Page limit must be at least 1.");
        this.QUERY_LIMIT = workUnitState.getPropAsInt(GoogleWebMasterSource.KEY_REQUEST_QUERY_LIMIT, GoogleWebmasterClient.API_ROW_LIMIT);
        Preconditions.checkArgument(this.QUERY_LIMIT >= 1, "Query limit must be at least 1.");
        this.TIME_OUT = workUnitState.getPropAsInt(GoogleWebMasterSource.KEY_REQUEST_TIME_OUT, 5);
        Preconditions.checkArgument(this.TIME_OUT > 0, "Time out must be positive.");
        this.MAX_RETRY_ROUNDS = workUnitState.getPropAsInt(GoogleWebMasterSource.KEY_REQUEST_TUNING_RETRIES, 20);
        Preconditions.checkArgument(this.MAX_RETRY_ROUNDS >= 0, "Retry rounds cannot be negative.");
        this.INITIAL_COOL_DOWN = workUnitState.getPropAsInt(GoogleWebMasterSource.KEY_REQUEST_TUNING_INITIAL_COOL_DOWN, 300);
        Preconditions.checkArgument(this.INITIAL_COOL_DOWN >= 0, "Initial cool down time cannot be negative.");
        this.COOL_DOWN_STEP = workUnitState.getPropAsInt(GoogleWebMasterSource.KEY_REQUEST_TUNING_COOL_DOWN_STEP, 50);
        Preconditions.checkArgument(this.COOL_DOWN_STEP >= 0, "Cool down step time cannot be negative.");
        this.REQUESTS_PER_SECOND = workUnitState.getPropAsDouble(GoogleWebMasterSource.KEY_REQUEST_TUNING_REQUESTS_PER_SECOND, 2.25d);
        Preconditions.checkArgument(this.REQUESTS_PER_SECOND > 0.0d, "Requests per second must be positive.");
        this.BATCH_SIZE = workUnitState.getPropAsInt(GoogleWebMasterSource.KEY_REQUEST_TUNING_BATCH_SIZE, 2);
        Preconditions.checkArgument(this.BATCH_SIZE >= 1, "Batch size must be at least 1.");
        this.GROUP_SIZE = workUnitState.getPropAsInt(GoogleWebMasterSource.KEY_REQUEST_TUNING_GROUP_SIZE, 500);
        Preconditions.checkArgument(this.GROUP_SIZE >= 1, "Group size must be at least 1.");
        this.ADVANCED_MODE = workUnitState.getPropAsBoolean(GoogleWebMasterSource.KEY_REQUEST_TUNING_ALGORITHM, false);
        if (this.ADVANCED_MODE) {
            Preconditions.checkArgument(this.PAGE_LIMIT == 5000, "Page limit must be set at 5000 if you want to use the advanced algorithm. This indicates that you understand what you are doing.");
        }
    }

    public boolean hasNext() throws IOException {
        initialize();
        if (!this._cachedQueries.isEmpty()) {
            return true;
        }
        try {
            String[] poll = this._cachedQueries.poll(1L, TimeUnit.SECONDS);
            while (poll == null) {
                if (!this._producerThread.isAlive()) {
                    LOG.info("Producer job has finished. No more query data in the queue.");
                    return false;
                }
                poll = this._cachedQueries.poll(1L, TimeUnit.SECONDS);
            }
            this._cachedQueries.putFirst(poll);
            return true;
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    }

    private void initialize() throws IOException {
        if (this._producerThread == null) {
            this._producerThread = new Thread(new ResponseProducer(this._webmaster.getAllPages(this._startDate, this._endDate, this._country, this.PAGE_LIMIT)));
            this._producerThread.start();
        }
    }

    public String[] next() throws IOException {
        if (hasNext()) {
            return this._cachedQueries.remove();
        }
        throw new NoSuchElementException();
    }

    String getCountry() {
        return this._country;
    }
}
