package org.gorpipe.gor.driver.providers.stream.datatypes.parquet;

import java.io.IOException;
import java.nio.file.FileVisitOption;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Optional;
import java.util.PriorityQueue;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.hadoop.conf.Configuration;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.example.data.Group;
import org.apache.parquet.filter2.compat.FilterCompat;
import org.apache.parquet.filter2.predicate.FilterApi;
import org.apache.parquet.filter2.predicate.FilterPredicate;
import org.apache.parquet.filter2.predicate.Operators;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.ParquetReader;
import org.apache.parquet.hadoop.example.GroupReadSupport;
import org.apache.parquet.hadoop.util.HadoopInputFile;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Type;
import org.gorpipe.exceptions.GorSystemException;
import org.gorpipe.gor.driver.meta.SourceReference;
import org.gorpipe.gor.driver.providers.stream.StreamSourceFile;
import org.gorpipe.gor.driver.providers.stream.sources.StreamSource;
import org.gorpipe.gor.driver.providers.stream.sources.file.FileSource;
import org.gorpipe.gor.driver.providers.stream.sources.wrappers.RetryWrapper;
import org.gorpipe.gor.model.GenomicIterator;
import org.gorpipe.gor.model.Line;
import org.gorpipe.gor.model.Row;
import org.gorpipe.gor.session.GorSession;
import org.gorpipe.model.gor.RowObj;
import org.gorpipe.util.standalone.GorStandalone;

/* loaded from: input_file:org/gorpipe/gor/driver/providers/stream/datatypes/parquet/ParquetFileIterator.class */
public class ParquetFileIterator extends GenomicIterator {
    private final GenomicIterator.ChromoLookup lookup;
    private Path resultPath;
    private int[] sortCols;
    private MessageType schema;
    private FilterPredicate filterPredicate;
    private FilterPredicate seekfilterPredicate;
    private FilterCompat.Filter filter;
    private String partitioningCol;
    private boolean partColPresent;
    private Row row;
    private final PriorityQueue<ParquetRowReader> mergeParquet = new PriorityQueue<>();
    private List<org.apache.hadoop.fs.Path> parquetPaths = new ArrayList();
    private List<org.apache.hadoop.fs.Path> parquetPathsForSeek = new ArrayList();
    private boolean nor = false;
    private final Configuration configuration = new Configuration(true);
    private final GroupReadSupport readSupport = new GroupReadSupport();

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/gorpipe/gor/driver/providers/stream/datatypes/parquet/ParquetFileIterator$Path2ParquetReader.class */
    public class Path2ParquetReader implements Function<org.apache.hadoop.fs.Path, ParquetRowReader> {
        IOException ioe;

        Path2ParquetReader() {
        }

        @Override // java.util.function.Function
        public ParquetRowReader apply(org.apache.hadoop.fs.Path path) {
            try {
                return ParquetFileIterator.this.initParquetReader(path);
            } catch (IOException e) {
                this.ioe = e;
                return null;
            }
        }

        public Optional<IOException> getError() {
            return Optional.ofNullable(this.ioe);
        }
    }

    public ParquetFileIterator(StreamSourceFile streamSourceFile) {
        this.lookup = streamSourceFile.getFileSource().getSourceReference().getLookup();
        this.resultPath = resolvePath(streamSourceFile);
    }

    private Path resolvePath(StreamSourceFile streamSourceFile) {
        SourceReference sourceReference = resolveFileSource(streamSourceFile).getSourceReference();
        Path path = Paths.get(sourceReference.url, new String[0]);
        if (!path.isAbsolute() && GorStandalone.isStandalone()) {
            path = Paths.get(sourceReference.commonRoot, new String[0]).resolve(path);
        }
        return path;
    }

    private FileSource resolveFileSource(StreamSourceFile streamSourceFile) {
        StreamSource fileSource = streamSourceFile.getFileSource();
        return fileSource instanceof RetryWrapper ? (FileSource) ((RetryWrapper) fileSource).getWrapped() : (FileSource) streamSourceFile.getFileSource();
    }

    @Override // org.gorpipe.gor.model.GenomicIterator
    public void init(GorSession gorSession) {
        if (gorSession != null) {
            this.nor = gorSession.getNorContext();
            gorSession.getGorContext().getSortCols().ifPresent(str -> {
                this.sortCols = Arrays.stream(str.split(",")).mapToInt(Integer::parseInt).toArray();
            });
        }
        init();
    }

    private void init() {
        if (this.resultPath != null) {
            try {
                try {
                    this.parquetPaths = init(this.resultPath);
                    this.parquetPathsForSeek = new ArrayList(this.parquetPaths);
                    this.resultPath = null;
                } catch (IOException e) {
                    throw new GorSystemException("Init parquet", e);
                }
            } catch (Throwable th) {
                this.resultPath = null;
                throw th;
            }
        }
    }

    private String[] extractPartCol(String str) {
        Path parent = Paths.get(str, new String[0]).getParent();
        if (parent == null) {
            return null;
        }
        String[] split = parent.getFileName().toString().split("=");
        if (split.length == 2) {
            return split;
        }
        return null;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v19, types: [java.util.Collection] */
    /* JADX WARN: Type inference failed for: r0v41, types: [java.util.Collection] */
    private List<org.apache.hadoop.fs.Path> init(Path path) throws IOException {
        Set singleton;
        String[] extractPartCol;
        if (Files.isDirectory(path, new LinkOption[0])) {
            Stream<Path> walk = Files.walk(path, new FileVisitOption[0]);
            try {
                singleton = (Collection) walk.filter(ParquetFileIterator::isParquetDataFile).collect(Collectors.toList());
                if (walk != null) {
                    walk.close();
                }
                Iterator it = singleton.iterator();
                if (it.hasNext() && (extractPartCol = extractPartCol(((Path) it.next()).toString())) != null) {
                    this.partitioningCol = extractPartCol[0];
                    if (this.partitioningCol.toLowerCase().startsWith("chrom")) {
                        this.nor = true;
                        singleton = (Collection) singleton.stream().sorted(Comparator.comparing((v0) -> {
                            return v0.getParent();
                        })).collect(Collectors.toList());
                    }
                }
            } catch (Throwable th) {
                if (walk != null) {
                    try {
                        walk.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
                throw th;
            }
        } else {
            singleton = Collections.singleton(path);
        }
        headerInit(((Path) singleton.stream().findFirst().get()).toString(), this.partitioningCol);
        return (List) singleton.stream().map(path2 -> {
            return new org.apache.hadoop.fs.Path(path2.toString());
        }).collect(Collectors.toList());
    }

    private static boolean isParquetDataFile(Path path) {
        return path.getFileName().toString().endsWith(".parquet") && !Files.isDirectory(path, new LinkOption[0]);
    }

    private void updateFilter() {
        if (this.filterPredicate != null && this.seekfilterPredicate != null) {
            this.filter = FilterCompat.get(FilterApi.and(this.filterPredicate, this.seekfilterPredicate));
            return;
        }
        if (this.filterPredicate != null) {
            this.filter = FilterCompat.get(this.filterPredicate);
        } else if (this.seekfilterPredicate != null) {
            this.filter = FilterCompat.get(this.seekfilterPredicate);
        } else {
            this.filter = null;
        }
    }

    private ParquetRowReader initParquetReader(org.apache.hadoop.fs.Path path) throws IOException {
        ParquetReader.Builder withConf = ParquetReader.builder(this.readSupport, path).withConf(this.configuration);
        if (this.filter != null) {
            withConf.withFilter(this.filter);
        }
        ParquetReader build = withConf.build();
        String[] extractPartCol = extractPartCol(path.toString());
        String str = extractPartCol != null ? extractPartCol[1] : null;
        return this.nor ? new NorParquetRowReader(build, this.sortCols, str) : new ParquetRowReader((ParquetReader<Group>) build, this.lookup, str);
    }

    private void subInit() throws IOException {
        if (this.nor && (this.sortCols == null || this.sortCols.length == 1)) {
            if (this.parquetPaths.size() > 0) {
                ParquetRowReader initParquetReader = initParquetReader(this.parquetPaths.remove(0));
                if (initParquetReader.row != null) {
                    this.mergeParquet.add(initParquetReader);
                    return;
                }
                return;
            }
            return;
        }
        Path2ParquetReader path2ParquetReader = new Path2ParquetReader();
        List list = (List) this.parquetPaths.parallelStream().map(path2ParquetReader).takeWhile((v0) -> {
            return Objects.nonNull(v0);
        }).collect(Collectors.toList());
        if (path2ParquetReader.getError().isPresent()) {
            throw path2ParquetReader.getError().get();
        }
        list.stream().filter(parquetRowReader -> {
            return parquetRowReader.row != null;
        }).forEach(parquetRowReader2 -> {
            this.mergeParquet.add(parquetRowReader2);
        });
        this.parquetPaths.clear();
    }

    private void headerInit(String str, String str2) throws IOException {
        ParquetFileReader open = ParquetFileReader.open(HadoopInputFile.fromPath(new org.apache.hadoop.fs.Path(str), this.configuration));
        try {
            this.schema = open.getFooter().getFileMetaData().getSchema();
            setHeader((String) this.schema.getFields().stream().map(this::getTypeName).collect(Collectors.joining("\t")));
            this.readSupport.init(this.configuration, (Map) null, this.schema);
            if (open != null) {
                open.close();
            }
            if (str2 != null) {
                String lowerCase = str2.toLowerCase();
                this.partColPresent = Arrays.asList(super.getHeader().toLowerCase().split("\t")).contains(lowerCase);
                if (!this.partColPresent) {
                    setHeader(lowerCase.equals("chrom") ? str2 + "\t" + super.getHeader() : super.getHeader() + "\t" + str2);
                }
            }
            if (this.nor) {
                setHeader("ChromNOR\tPosNOR\t" + super.getHeader());
            }
        } catch (Throwable th) {
            if (open != null) {
                try {
                    open.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    private String getTypeName(Type type) {
        return type.getName();
    }

    @Override // org.gorpipe.gor.model.GenomicIterator, java.lang.AutoCloseable
    public void close() {
        this.mergeParquet.forEach((v0) -> {
            v0.close();
        });
    }

    @Override // org.gorpipe.gor.model.GenomicIterator
    public String getHeader() {
        init();
        return super.getHeader();
    }

    @Override // org.gorpipe.gor.model.GenomicIterator
    public boolean seek(String str, int i) {
        Binary fromString = Binary.fromString(str);
        String[] split = getHeader().split("\t");
        if (i <= 1) {
            this.seekfilterPredicate = FilterApi.eq(FilterApi.binaryColumn(split[0]), fromString);
        } else {
            this.seekfilterPredicate = FilterApi.and(FilterApi.eq(FilterApi.binaryColumn(split[0]), fromString), FilterApi.gtEq(FilterApi.intColumn(split[1]), Integer.valueOf(i)));
        }
        updateFilter();
        this.mergeParquet.forEach((v0) -> {
            v0.close();
        });
        this.mergeParquet.clear();
        this.parquetPaths = new ArrayList(this.parquetPathsForSeek);
        return true;
    }

    @Override // org.gorpipe.gor.model.GenomicIterator
    public boolean next(Line line) {
        return false;
    }

    @Override // org.gorpipe.gor.model.GenomicIterator, java.util.Iterator
    public boolean hasNext() {
        ParquetRowReader poll = this.mergeParquet.poll();
        if (poll == null) {
            if (this.parquetPaths.size() <= 0) {
                return false;
            }
            try {
                subInit();
                return hasNext();
            } catch (IOException e) {
                throw new GorSystemException("Error while reading parquet file", e);
            }
        }
        this.row = poll.next();
        if (this.partitioningCol != null && !this.partColPresent) {
            if (this.partitioningCol.toLowerCase().equals("chrom")) {
                this.row = RowObj.apply(poll.getPart() + "\t" + this.row.toString());
            } else {
                this.row = RowObj.apply(this.row.toString() + "\t" + poll.getPart());
            }
        }
        if (!poll.hasNext()) {
            return true;
        }
        this.mergeParquet.add(poll);
        return true;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // org.gorpipe.gor.model.GenomicIterator, java.util.Iterator
    public Row next() {
        if (this.row == null) {
            throw new NoSuchElementException();
        }
        return this.row;
    }

    private FilterPredicate intFilter(String str, String str2, char c, boolean z) {
        int parseInt = Integer.parseInt(str);
        Operators.IntColumn intColumn = FilterApi.intColumn(str2);
        return c == '=' ? FilterApi.eq(intColumn, Integer.valueOf(parseInt)) : c == '<' ? z ? FilterApi.ltEq(intColumn, Integer.valueOf(parseInt)) : FilterApi.lt(intColumn, Integer.valueOf(parseInt)) : z ? FilterApi.gtEq(intColumn, Integer.valueOf(parseInt)) : FilterApi.gt(intColumn, Integer.valueOf(parseInt));
    }

    private FilterPredicate longFilter(String str, String str2, char c, boolean z) {
        long parseLong = Long.parseLong(str);
        Operators.LongColumn longColumn = FilterApi.longColumn(str2);
        return c == '=' ? FilterApi.eq(longColumn, Long.valueOf(parseLong)) : c == '<' ? z ? FilterApi.ltEq(longColumn, Long.valueOf(parseLong)) : FilterApi.lt(longColumn, Long.valueOf(parseLong)) : z ? FilterApi.gtEq(longColumn, Long.valueOf(parseLong)) : FilterApi.gt(longColumn, Long.valueOf(parseLong));
    }

    private FilterPredicate doubleFilter(String str, String str2, char c, boolean z) {
        double parseDouble = Double.parseDouble(str);
        Operators.DoubleColumn doubleColumn = FilterApi.doubleColumn(str2);
        return c == '=' ? FilterApi.eq(doubleColumn, Double.valueOf(parseDouble)) : c == '<' ? z ? FilterApi.ltEq(doubleColumn, Double.valueOf(parseDouble)) : FilterApi.lt(doubleColumn, Double.valueOf(parseDouble)) : z ? FilterApi.gtEq(doubleColumn, Double.valueOf(parseDouble)) : FilterApi.gt(doubleColumn, Double.valueOf(parseDouble));
    }

    private FilterPredicate stringFilter(String str, String str2, char c, boolean z) {
        Binary fromString = Binary.fromString(str);
        Operators.BinaryColumn binaryColumn = FilterApi.binaryColumn(str2);
        return c == '=' ? FilterApi.eq(binaryColumn, fromString) : c == '<' ? z ? FilterApi.ltEq(binaryColumn, fromString) : FilterApi.lt(binaryColumn, fromString) : z ? FilterApi.gtEq(binaryColumn, fromString) : FilterApi.gt(binaryColumn, fromString);
    }

    private FilterPredicate inFilter(String str, String str2) {
        String[] split = str.split(",");
        Operators.BinaryColumn binaryColumn = FilterApi.binaryColumn(str2);
        FilterPredicate filterPredicate = null;
        for (String str3 : split) {
            FilterPredicate eq = FilterApi.eq(binaryColumn, Binary.fromString(str3.trim().replace("'", "")));
            filterPredicate = filterPredicate == null ? eq : FilterApi.or(filterPredicate, eq);
        }
        return filterPredicate;
    }

    private FilterPredicate getFilterPredicate(PrimitiveType.PrimitiveTypeName primitiveTypeName, String str, String str2, String str3, char c, char c2, boolean z) {
        if (primitiveTypeName.equals(PrimitiveType.PrimitiveTypeName.INT32)) {
            return intFilter(str2, str3, c, z);
        }
        if (primitiveTypeName.equals(PrimitiveType.PrimitiveTypeName.INT64)) {
            return longFilter(str2, str3, c, z);
        }
        if (primitiveTypeName.equals(PrimitiveType.PrimitiveTypeName.DOUBLE)) {
            return doubleFilter(str2, str3, c, z);
        }
        if (str.endsWith("'")) {
            return stringFilter(str.substring(str3.length() + (z ? 3 : 2), str.length() - 1), str3, c, z);
        }
        if (str.endsWith(")") && c == 'i' && c2 == 'n' && str.charAt(str3.length() + 2) == '(') {
            return inFilter(str.substring(str3.length() + 3, str.length() - 1), str3);
        }
        return null;
    }

    private void mergeWithPreviousFilter(FilterPredicate filterPredicate) {
        if (this.filterPredicate == null) {
            this.filterPredicate = filterPredicate;
        } else {
            this.filterPredicate = FilterApi.and(this.filterPredicate, filterPredicate);
        }
        updateFilter();
    }

    @Override // org.gorpipe.gor.model.GenomicIterator
    public boolean pushdownFilter(String str) {
        String replace = str.replace(" ", "");
        String upperCase = replace.toUpperCase();
        String[] split = getHeader().split("\t");
        if (upperCase.startsWith("IN(")) {
            mergeWithPreviousFilter(inFilter(replace.substring(3, replace.length() - 1), split[split.length - 1]));
            return true;
        }
        int i = 0;
        while (i < split.length && !upperCase.startsWith(split[i].toUpperCase())) {
            i++;
        }
        if (i >= split.length) {
            return false;
        }
        String str2 = split[i];
        char charAt = replace.charAt(str2.length());
        char charAt2 = replace.charAt(str2.length() + 1);
        if (charAt != '<' && charAt != '>' && charAt != '=' && charAt != 'i') {
            return false;
        }
        boolean z = (charAt == '<' || charAt == '>') && charAt2 == '=';
        FilterPredicate filterPredicate = getFilterPredicate(((ColumnDescriptor) this.schema.getColumns().get(i)).getPrimitiveType().getPrimitiveTypeName(), replace, replace.substring(str2.length() + (z ? 2 : 1), replace.length() - 1), str2, charAt, charAt2, z);
        if (filterPredicate == null) {
            return false;
        }
        mergeWithPreviousFilter(filterPredicate);
        return true;
    }

    @Override // org.gorpipe.gor.model.GenomicIterator
    public GenomicIterator select(int[] iArr) {
        StringBuilder sb = new StringBuilder();
        sb.append("message gortable {\n");
        List fields = this.schema.getFields();
        for (int i : iArr) {
            sb.append(fields.get(i));
            sb.append(";\n");
        }
        sb.append("}");
        this.configuration.set("parquet.read.schema", sb.toString());
        selectHeader(iArr);
        return this;
    }
}
