package com.facebook.presto.hive;

import com.facebook.presto.hadoop.HadoopFileStatus;
import com.facebook.presto.hive.HiveBucketing;
import com.facebook.presto.hive.metastore.Column;
import com.facebook.presto.hive.metastore.MetastoreUtil;
import com.facebook.presto.hive.metastore.Partition;
import com.facebook.presto.hive.metastore.Table;
import com.facebook.presto.hive.util.HiveFileIterator;
import com.facebook.presto.hive.util.ResumableTask;
import com.facebook.presto.hive.util.ResumableTasks;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.HostAddress;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.StandardErrorCode;
import com.facebook.presto.spi.predicate.TupleDomain;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.io.CharStreams;
import io.airlift.units.DataSize;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Deque;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.OptionalInt;
import java.util.Properties;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.Executor;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.stream.Stream;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;

/* loaded from: input_file:com/facebook/presto/hive/BackgroundHiveSplitLoader.class */
public class BackgroundHiveSplitLoader implements HiveSplitLoader {
    private static final String CORRUPT_BUCKETING = "Hive table is corrupt. It is declared as being bucketed, but the files do not match the bucketing declaration.";
    public static final CompletableFuture<?> COMPLETED_FUTURE = CompletableFuture.completedFuture(null);
    private final String connectorId;
    private final Table table;
    private final Optional<HiveBucketHandle> bucketHandle;
    private final List<HiveBucketing.HiveBucket> buckets;
    private final HdfsEnvironment hdfsEnvironment;
    private final NamenodeStats namenodeStats;
    private final DirectoryLister directoryLister;
    private final DataSize maxSplitSize;
    private final int maxPartitionBatchSize;
    private final DataSize maxInitialSplitSize;
    private final boolean recursiveDirWalkerEnabled;
    private final Executor executor;
    private final ConnectorSession session;
    private final ConcurrentLazyQueue<HivePartitionMetadata> partitions;
    private final AtomicInteger remainingInitialSplits;
    private HiveSplitSource hiveSplitSource;
    private volatile boolean stopped;
    private final Deque<HiveFileIterator> fileIterators = new ConcurrentLinkedDeque();
    private final ReentrantReadWriteLock taskExecutionLock = new ReentrantReadWriteLock();

    /* loaded from: input_file:com/facebook/presto/hive/BackgroundHiveSplitLoader$HiveSplitLoaderTask.class */
    private class HiveSplitLoaderTask implements ResumableTask {
        private HiveSplitLoaderTask() {
        }

        @Override // com.facebook.presto.hive.util.ResumableTask
        public ResumableTask.TaskStatus process() {
            CompletableFuture loadSplits;
            while (!BackgroundHiveSplitLoader.this.stopped) {
                try {
                    BackgroundHiveSplitLoader.this.taskExecutionLock.readLock().lock();
                    try {
                        loadSplits = BackgroundHiveSplitLoader.this.loadSplits();
                        BackgroundHiveSplitLoader.this.taskExecutionLock.readLock().unlock();
                        BackgroundHiveSplitLoader.this.invokeFinishedIfNecessary();
                    } catch (Throwable th) {
                        BackgroundHiveSplitLoader.this.taskExecutionLock.readLock().unlock();
                        throw th;
                    }
                } catch (Exception e) {
                    BackgroundHiveSplitLoader.this.hiveSplitSource.fail(e);
                }
                if (!loadSplits.isDone()) {
                    return ResumableTask.TaskStatus.continueOn(loadSplits);
                }
            }
            return ResumableTask.TaskStatus.finished();
        }
    }

    public BackgroundHiveSplitLoader(String str, Table table, Iterable<HivePartitionMetadata> iterable, Optional<HiveBucketHandle> optional, List<HiveBucketing.HiveBucket> list, ConnectorSession connectorSession, HdfsEnvironment hdfsEnvironment, NamenodeStats namenodeStats, DirectoryLister directoryLister, Executor executor, int i, int i2, boolean z) {
        this.connectorId = str;
        this.table = table;
        this.bucketHandle = optional;
        this.buckets = list;
        this.maxSplitSize = HiveSessionProperties.getMaxSplitSize(connectorSession);
        this.maxPartitionBatchSize = i;
        this.session = connectorSession;
        this.hdfsEnvironment = hdfsEnvironment;
        this.namenodeStats = namenodeStats;
        this.directoryLister = directoryLister;
        this.maxInitialSplitSize = HiveSessionProperties.getMaxInitialSplitSize(connectorSession);
        this.remainingInitialSplits = new AtomicInteger(i2);
        this.recursiveDirWalkerEnabled = z;
        this.executor = executor;
        this.partitions = new ConcurrentLazyQueue<>(iterable);
    }

    @Override // com.facebook.presto.hive.HiveSplitLoader
    public void start(HiveSplitSource hiveSplitSource) {
        this.hiveSplitSource = hiveSplitSource;
        for (int i = 0; i < this.maxPartitionBatchSize; i++) {
            ResumableTasks.submit(this.executor, new HiveSplitLoaderTask());
        }
    }

    @Override // com.facebook.presto.hive.HiveSplitLoader
    public void stop() {
        this.stopped = true;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void invokeFinishedIfNecessary() {
        if (this.partitions.isEmpty() && this.fileIterators.isEmpty()) {
            this.taskExecutionLock.writeLock().lock();
            try {
                if (this.partitions.isEmpty() && this.fileIterators.isEmpty()) {
                    this.hiveSplitSource.finished();
                }
            } finally {
                this.taskExecutionLock.writeLock().unlock();
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public CompletableFuture<?> loadSplits() throws IOException {
        HiveFileIterator poll = this.fileIterators.poll();
        if (poll == null) {
            HivePartitionMetadata poll2 = this.partitions.poll();
            if (poll2 == null) {
                return COMPLETED_FUTURE;
            }
            loadPartition(poll2);
            return COMPLETED_FUTURE;
        }
        while (poll.hasNext() && !this.stopped) {
            LocatedFileStatus locatedFileStatus = (LocatedFileStatus) poll.next();
            if (!HadoopFileStatus.isDirectory(locatedFileStatus)) {
                CompletableFuture<?> addToQueue = this.hiveSplitSource.addToQueue(createHiveSplits(poll.getPartitionName(), locatedFileStatus.getPath().toString(), locatedFileStatus.getBlockLocations(), 0L, locatedFileStatus.getLen(), poll.getSchema(), poll.getPartitionKeys(), HiveUtil.isSplittable(poll.getInputFormat(), this.hdfsEnvironment.getFileSystem(this.session.getUser(), locatedFileStatus.getPath()), locatedFileStatus.getPath()), this.session, OptionalInt.empty(), poll.getEffectivePredicate(), poll.getColumnCoercions()));
                if (!addToQueue.isDone()) {
                    this.fileIterators.addFirst(poll);
                    return addToQueue;
                }
            } else if (this.recursiveDirWalkerEnabled) {
                this.fileIterators.add(new HiveFileIterator(locatedFileStatus.getPath(), poll.getFileSystem(), poll.getDirectoryLister(), poll.getNamenodeStats(), poll.getPartitionName(), poll.getInputFormat(), poll.getSchema(), poll.getPartitionKeys(), poll.getEffectivePredicate(), poll.getColumnCoercions()));
            }
        }
        return COMPLETED_FUTURE;
    }

    private void loadPartition(HivePartitionMetadata hivePartitionMetadata) throws IOException {
        String partitionId = hivePartitionMetadata.getHivePartition().getPartitionId();
        Properties partitionSchema = getPartitionSchema(this.table, hivePartitionMetadata.getPartition());
        List<HivePartitionKey> partitionKeys = getPartitionKeys(this.table, hivePartitionMetadata.getPartition());
        TupleDomain<HiveColumnHandle> effectivePredicate = hivePartitionMetadata.getHivePartition().getEffectivePredicate();
        Path path = new Path(getPartitionLocation(this.table, hivePartitionMetadata.getPartition()));
        InputFormat<?, ?> inputFormat = HiveUtil.getInputFormat(this.hdfsEnvironment.getConfiguration(path), partitionSchema, false);
        FileSystem fileSystem = this.hdfsEnvironment.getFileSystem(this.session.getUser(), path);
        if (!(inputFormat instanceof SymlinkTextInputFormat)) {
            HiveFileIterator hiveFileIterator = new HiveFileIterator(path, fileSystem, this.directoryLister, this.namenodeStats, partitionId, inputFormat, partitionSchema, partitionKeys, effectivePredicate, hivePartitionMetadata.getColumnCoercions());
            if (!this.buckets.isEmpty()) {
                List<LocatedFileStatus> listAndSortBucketFiles = listAndSortBucketFiles(hiveFileIterator, this.buckets.get(0).getBucketCount());
                Iterator<HiveBucketing.HiveBucket> it = this.buckets.iterator();
                while (it.hasNext()) {
                    int bucketNumber = it.next().getBucketNumber();
                    LocatedFileStatus locatedFileStatus = listAndSortBucketFiles.get(bucketNumber);
                    this.hiveSplitSource.addToQueue(createHiveSplits(hiveFileIterator.getPartitionName(), locatedFileStatus.getPath().toString(), locatedFileStatus.getBlockLocations(), 0L, locatedFileStatus.getLen(), hiveFileIterator.getSchema(), hiveFileIterator.getPartitionKeys(), HiveUtil.isSplittable(hiveFileIterator.getInputFormat(), this.hdfsEnvironment.getFileSystem(this.session.getUser(), locatedFileStatus.getPath()), locatedFileStatus.getPath()), this.session, OptionalInt.of(bucketNumber), effectivePredicate, hivePartitionMetadata.getColumnCoercions()));
                }
                return;
            }
            if (!this.bucketHandle.isPresent()) {
                this.fileIterators.addLast(hiveFileIterator);
                return;
            }
            int bucketCount = this.bucketHandle.get().getBucketCount();
            List<LocatedFileStatus> listAndSortBucketFiles2 = listAndSortBucketFiles(hiveFileIterator, bucketCount);
            for (int i = 0; i < bucketCount; i++) {
                LocatedFileStatus locatedFileStatus2 = listAndSortBucketFiles2.get(i);
                this.hiveSplitSource.addToQueue(createHiveSplits(hiveFileIterator.getPartitionName(), locatedFileStatus2.getPath().toString(), locatedFileStatus2.getBlockLocations(), 0L, locatedFileStatus2.getLen(), hiveFileIterator.getSchema(), hiveFileIterator.getPartitionKeys(), HiveUtil.isSplittable(hiveFileIterator.getInputFormat(), this.hdfsEnvironment.getFileSystem(this.session.getUser(), locatedFileStatus2.getPath()), locatedFileStatus2.getPath()), this.session, OptionalInt.of(i), hiveFileIterator.getEffectivePredicate(), hivePartitionMetadata.getColumnCoercions()));
            }
            return;
        }
        if (this.bucketHandle.isPresent()) {
            throw new PrestoException(StandardErrorCode.NOT_SUPPORTED, "Bucketed table in SymlinkTextInputFormat is not yet supported");
        }
        for (Path path2 : getTargetPathsFromSymlink(fileSystem, path)) {
            TextInputFormat textInputFormat = new TextInputFormat();
            JobConf jobConf = new JobConf(this.hdfsEnvironment.getConfiguration(path2));
            jobConf.setInputFormat(TextInputFormat.class);
            textInputFormat.configure(jobConf);
            FileInputFormat.setInputPaths(jobConf, new Path[]{path2});
            for (FileSplit fileSplit : textInputFormat.getSplits(jobConf, 0)) {
                FileSystem fileSystem2 = this.hdfsEnvironment.getFileSystem(this.session.getUser(), fileSplit.getPath());
                FileStatus fileStatus = fileSystem2.getFileStatus(fileSplit.getPath());
                this.hiveSplitSource.addToQueue(createHiveSplits(partitionId, fileStatus.getPath().toString(), fileSystem2.getFileBlockLocations(fileStatus, fileSplit.getStart(), fileSplit.getLength()), fileSplit.getStart(), fileSplit.getLength(), partitionSchema, partitionKeys, false, this.session, OptionalInt.empty(), effectivePredicate, hivePartitionMetadata.getColumnCoercions()));
                if (this.stopped) {
                    return;
                }
            }
        }
    }

    private static List<LocatedFileStatus> listAndSortBucketFiles(HiveFileIterator hiveFileIterator, int i) {
        ArrayList arrayList = new ArrayList(i);
        while (hiveFileIterator.hasNext()) {
            LocatedFileStatus locatedFileStatus = (LocatedFileStatus) hiveFileIterator.next();
            if (HadoopFileStatus.isDirectory(locatedFileStatus)) {
                throw new PrestoException(HiveErrorCode.HIVE_INVALID_BUCKET_FILES, String.format("%s Found sub-directory in bucket directory for partition: %s", CORRUPT_BUCKETING, hiveFileIterator.getPartitionName()));
            }
            arrayList.add(locatedFileStatus);
        }
        if (arrayList.size() != i) {
            throw new PrestoException(HiveErrorCode.HIVE_INVALID_BUCKET_FILES, String.format("%s The number of files in the directory (%s) does not match the declared bucket count (%s) for partition: %s", CORRUPT_BUCKETING, Integer.valueOf(arrayList.size()), Integer.valueOf(i), hiveFileIterator.getPartitionName()));
        }
        arrayList.sort(null);
        return arrayList;
    }

    private static List<Path> getTargetPathsFromSymlink(FileSystem fileSystem, Path path) {
        try {
            FileStatus[] listStatus = fileSystem.listStatus(path, FileUtils.HIDDEN_FILES_PATH_FILTER);
            ArrayList arrayList = new ArrayList();
            for (FileStatus fileStatus : listStatus) {
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader((InputStream) fileSystem.open(fileStatus.getPath()), StandardCharsets.UTF_8));
                Throwable th = null;
                try {
                    try {
                        Stream map = CharStreams.readLines(bufferedReader).stream().map(Path::new);
                        arrayList.getClass();
                        map.forEach((v1) -> {
                            r1.add(v1);
                        });
                        if (bufferedReader != null) {
                            if (0 != 0) {
                                try {
                                    bufferedReader.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                bufferedReader.close();
                            }
                        }
                    } finally {
                    }
                } finally {
                }
            }
            return arrayList;
        } catch (IOException e) {
            throw new PrestoException(HiveErrorCode.HIVE_BAD_DATA, "Error parsing symlinks from: " + path, e);
        }
    }

    private List<HiveSplit> createHiveSplits(String str, String str2, BlockLocation[] blockLocationArr, long j, long j2, Properties properties, List<HivePartitionKey> list, boolean z, ConnectorSession connectorSession, OptionalInt optionalInt, TupleDomain<HiveColumnHandle> tupleDomain, Map<Integer, HiveType> map) throws IOException {
        long j3;
        ImmutableList.Builder builder = ImmutableList.builder();
        boolean isForceLocalScheduling = HiveSessionProperties.isForceLocalScheduling(connectorSession);
        if (z) {
            for (BlockLocation blockLocation : blockLocationArr) {
                List<HostAddress> hostAddress = toHostAddress(blockLocation.getHosts());
                long bytes = this.maxSplitSize.toBytes();
                boolean z2 = false;
                if (this.remainingInitialSplits.get() > 0) {
                    bytes = this.maxInitialSplitSize.toBytes();
                    z2 = true;
                }
                long ceil = (long) Math.ceil((blockLocation.getLength() * 1.0d) / Math.max(1, (int) (blockLocation.getLength() / bytes)));
                long j4 = 0;
                while (true) {
                    j3 = j4;
                    if (j3 >= blockLocation.getLength()) {
                        break;
                    }
                    if (this.remainingInitialSplits.decrementAndGet() < 0 && z2) {
                        z2 = false;
                        long bytes2 = this.maxSplitSize.toBytes();
                        ceil = (long) Math.ceil(((blockLocation.getLength() - j3) * 1.0d) / Math.max(1, (int) (r0 / bytes2)));
                    }
                    long min = Math.min(ceil, blockLocation.getLength() - j3);
                    builder.add(new HiveSplit(this.connectorId, this.table.getDatabaseName(), this.table.getTableName(), str, str2, blockLocation.getOffset() + j3, min, properties, list, hostAddress, optionalInt, isForceLocalScheduling && hasRealAddress(hostAddress), tupleDomain, map));
                    j4 = j3 + min;
                }
                Preconditions.checkState(j3 == blockLocation.getLength(), "Error splitting blocks");
            }
        } else {
            List<HostAddress> of = ImmutableList.of();
            if (blockLocationArr.length > 0) {
                of = toHostAddress(blockLocationArr[0].getHosts());
            }
            builder.add(new HiveSplit(this.connectorId, this.table.getDatabaseName(), this.table.getTableName(), str, str2, j, j2, properties, list, of, optionalInt, isForceLocalScheduling && hasRealAddress(of), tupleDomain, map));
        }
        return builder.build();
    }

    private static boolean hasRealAddress(List<HostAddress> list) {
        return list.stream().anyMatch(hostAddress -> {
            return !hostAddress.getHostText().equals("localhost");
        });
    }

    private static List<HostAddress> toHostAddress(String[] strArr) {
        ImmutableList.Builder builder = ImmutableList.builder();
        for (String str : strArr) {
            builder.add(HostAddress.fromString(str));
        }
        return builder.build();
    }

    private static List<HivePartitionKey> getPartitionKeys(Table table, Optional<Partition> optional) {
        if (!optional.isPresent()) {
            return ImmutableList.of();
        }
        ImmutableList.Builder builder = ImmutableList.builder();
        List<Column> partitionColumns = table.getPartitionColumns();
        List<String> values = optional.get().getValues();
        HiveUtil.checkCondition(partitionColumns.size() == values.size(), HiveErrorCode.HIVE_INVALID_METADATA, "Expected %s partition key values, but got %s", Integer.valueOf(partitionColumns.size()), Integer.valueOf(values.size()));
        for (int i = 0; i < partitionColumns.size(); i++) {
            String name = partitionColumns.get(i).getName();
            HiveType type = partitionColumns.get(i).getType();
            if (!type.isSupportedType()) {
                throw new PrestoException(StandardErrorCode.NOT_SUPPORTED, String.format("Unsupported Hive type %s found in partition keys of table %s.%s", type, table.getDatabaseName(), table.getTableName()));
            }
            String str = values.get(i);
            HiveUtil.checkCondition(str != null, HiveErrorCode.HIVE_INVALID_PARTITION_VALUE, "partition key value cannot be null for field: %s", name);
            builder.add(new HivePartitionKey(name, type, str));
        }
        return builder.build();
    }

    private static Properties getPartitionSchema(Table table, Optional<Partition> optional) {
        return !optional.isPresent() ? MetastoreUtil.getHiveSchema(table) : MetastoreUtil.getHiveSchema(optional.get(), table);
    }

    private static String getPartitionLocation(Table table, Optional<Partition> optional) {
        return !optional.isPresent() ? table.getStorage().getLocation() : optional.get().getStorage().getLocation();
    }
}
