package com.linkedin.feathr.offline.job;

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.types.ByteType$;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.IntegerType$;
import org.apache.spark.sql.types.LongType$;
import org.apache.spark.sql.types.NumericType;
import org.apache.spark.sql.types.ShortType$;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.util.sketch.BloomFilter;
import scala.Function2;
import scala.MatchError;
import scala.Predef$;
import scala.Tuple2;
import scala.collection.IterableLike;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableLike;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;

/* compiled from: DataFrameStatFunctions.scala */
@ScalaSignature(bytes = "\u0006\u0001\u001d4A!\u0002\u0004\u0001#!A\u0001\u0004\u0001B\u0001B\u0003%\u0011\u0004\u0003\u00044\u0001\u0011\u0005\u0001\u0002\u000e\u0005\u0006q\u0001!\t!\u000f\u0005\u0006?\u0002!I\u0001\u0019\u0002\u0017\t\u0006$\u0018M\u0012:b[\u0016\u001cF/\u0019;Gk:\u001cG/[8og*\u0011q\u0001C\u0001\u0004U>\u0014'BA\u0005\u000b\u0003\u001dygM\u001a7j]\u0016T!a\u0003\u0007\u0002\r\u0019,\u0017\r\u001e5s\u0015\tia\"\u0001\u0005mS:\\W\rZ5o\u0015\u0005y\u0011aA2p[\u000e\u00011C\u0001\u0001\u0013!\t\u0019b#D\u0001\u0015\u0015\u0005)\u0012!B:dC2\f\u0017BA\f\u0015\u0005\u0019\te.\u001f*fM\u0006\u0011AM\u001a\t\u00035Ar!aG\u0017\u000f\u0005qQcBA\u000f(\u001d\tqBE\u0004\u0002 E5\t\u0001E\u0003\u0002\"!\u00051AH]8pizJ\u0011aI\u0001\u0004_J<\u0017BA\u0013'\u0003\u0019\t\u0007/Y2iK*\t1%\u0003\u0002)S\u0005)1\u000f]1sW*\u0011QEJ\u0005\u0003W1\n1a]9m\u0015\tA\u0013&\u0003\u0002/_\u00059\u0001/Y2lC\u001e,'BA\u0016-\u0013\t\t$GA\u0005ECR\fgI]1nK*\u0011afL\u0001\u0007y%t\u0017\u000e\u001e \u0015\u0005U:\u0004C\u0001\u001c\u0001\u001b\u00051\u0001\"\u0002\r\u0003\u0001\u0004I\u0012A\u00062bi\u000eD7I]3bi\u0016\u0014En\\8n\r&dG/\u001a:\u0015\tiRUK\u0017\t\u0004w}\u0012eB\u0001\u001f?\u001d\tyR(C\u0001\u0016\u0013\tqC#\u0003\u0002A\u0003\n\u00191+Z9\u000b\u00059\"\u0002CA\"I\u001b\u0005!%BA#G\u0003\u0019\u00198.\u001a;dQ*\u0011q\tL\u0001\u0005kRLG.\u0003\u0002J\t\nY!\t\\8p[\u001aKG\u000e^3s\u0011\u0015Y5\u00011\u0001M\u0003-\u0019w\u000e\\;n]:\u000bW.Z:\u0011\u0007mzT\n\u0005\u0002O%:\u0011q\n\u0015\t\u0003?QI!!\u0015\u000b\u0002\rA\u0013X\rZ3g\u0013\t\u0019FK\u0001\u0004TiJLgn\u001a\u0006\u0003#RAQAV\u0002A\u0002]\u000b\u0001#\u001a=qK\u000e$X\r\u001a(v[&#X-\\:\u0011\u0005MA\u0016BA-\u0015\u0005\u0011auN\\4\t\u000bm\u001b\u0001\u0019\u0001/\u0002\u0007\u0019\u0004\b\u000f\u0005\u0002\u0014;&\u0011a\f\u0006\u0002\u0007\t>,(\r\\3\u0002+\t\fGo\u00195Ck&dGM\u00117p_64\u0015\u000e\u001c;feR\u0011!(\u0019\u0005\u0006E\u0012\u0001\raY\u0001\"S:LG/[1mSj,GM\u00117p_64\u0015\u000e\u001c;fe^KG\u000f[\"pYVlgn\u001d\t\u0004w}\"\u0007\u0003B\nf\u001b\nK!A\u001a\u000b\u0003\rQ+\b\u000f\\33\u0001")
/* loaded from: input_file:com/linkedin/feathr/offline/job/DataFrameStatFunctions.class */
public class DataFrameStatFunctions {
    private final Dataset<Row> df;

    public Seq<BloomFilter> batchCreateBloomFilter(Seq<String> seq, long j, double d) {
        return batchBuildBloomFilter((Seq) seq.map(str -> {
            return new Tuple2(str, BloomFilter.create(j, d));
        }, Seq$.MODULE$.canBuildFrom()));
    }

    private Seq<BloomFilter> batchBuildBloomFilter(Seq<Tuple2<String, BloomFilter>> seq) {
        Seq seq2 = (Seq) seq.map(tuple2 -> {
            return (String) tuple2._1();
        }, Seq$.MODULE$.canBuildFrom());
        Dataset select = this.df.select((String) seq2.head(), (Seq) seq2.tail());
        Seq seq3 = (Seq) ((TraversableLike) ((Seq) select.schema().map(structField -> {
            boolean z;
            DataType dataType = structField.dataType();
            Predef$ predef$ = Predef$.MODULE$;
            StringType$ stringType$ = StringType$.MODULE$;
            if (dataType != null ? !dataType.equals(stringType$) : stringType$ != null) {
                if (!(dataType instanceof NumericType)) {
                    z = false;
                    predef$.require(z, () -> {
                        return new StringBuilder(68).append("Bloom filter only supports string type and integral types, but got ").append(dataType).append(".").toString();
                    });
                    return dataType;
                }
            }
            z = true;
            predef$.require(z, () -> {
                return new StringBuilder(68).append("Bloom filter only supports string type and integral types, but got ").append(dataType).append(".").toString();
            });
            return dataType;
        }, Seq$.MODULE$.canBuildFrom())).zip(seq, Seq$.MODULE$.canBuildFrom())).map(tuple22 -> {
            if (tuple22 != null) {
                DataType dataType = (DataType) tuple22._1();
                Tuple2 tuple22 = (Tuple2) tuple22._2();
                if (tuple22 != null) {
                    return new BloomFilterContext(dataType, (String) tuple22._1(), (BloomFilter) tuple22._2());
                }
            }
            throw new MatchError(tuple22);
        }, Seq$.MODULE$.canBuildFrom());
        return (Seq) ((Seq) select.queryExecution().toRdd().treeAggregate(seq3, (seq4, internalRow) -> {
            ((IterableLike) seq4.zipWithIndex(Seq$.MODULE$.canBuildFrom())).foreach(tuple23 -> {
                $anonfun$batchBuildBloomFilter$6(internalRow, tuple23);
                return BoxedUnit.UNIT;
            });
            return seq4;
        }, (seq5, seq6) -> {
            return (Seq) ((TraversableLike) seq5.zip(seq6, Seq$.MODULE$.canBuildFrom())).map(tuple23 -> {
                if (tuple23 == null) {
                    throw new MatchError(tuple23);
                }
                BloomFilterContext bloomFilterContext = (BloomFilterContext) tuple23._1();
                bloomFilterContext.initializedFilter().mergeInPlace(((BloomFilterContext) tuple23._2()).initializedFilter());
                return bloomFilterContext;
            }, Seq$.MODULE$.canBuildFrom());
        }, select.queryExecution().toRdd().treeAggregate$default$4(seq3), ClassTag$.MODULE$.apply(Seq.class))).map(bloomFilterContext -> {
            return bloomFilterContext.initializedFilter();
        }, Seq$.MODULE$.canBuildFrom());
    }

    public static final /* synthetic */ void $anonfun$batchBuildBloomFilter$7(int i, BloomFilter bloomFilter, InternalRow internalRow) {
        bloomFilter.putBinary(internalRow.getUTF8String(i).getBytes());
    }

    public static final /* synthetic */ void $anonfun$batchBuildBloomFilter$8(int i, BloomFilter bloomFilter, InternalRow internalRow) {
        bloomFilter.putLong(internalRow.getByte(i));
    }

    public static final /* synthetic */ void $anonfun$batchBuildBloomFilter$9(int i, BloomFilter bloomFilter, InternalRow internalRow) {
        bloomFilter.putLong(internalRow.getShort(i));
    }

    public static final /* synthetic */ void $anonfun$batchBuildBloomFilter$10(int i, BloomFilter bloomFilter, InternalRow internalRow) {
        bloomFilter.putLong(internalRow.getInt(i));
    }

    public static final /* synthetic */ void $anonfun$batchBuildBloomFilter$11(int i, BloomFilter bloomFilter, InternalRow internalRow) {
        bloomFilter.putLong(internalRow.getLong(i));
    }

    public static final /* synthetic */ void $anonfun$batchBuildBloomFilter$6(InternalRow internalRow, Tuple2 tuple2) {
        Function2 function2;
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        BloomFilterContext bloomFilterContext = (BloomFilterContext) tuple2._1();
        int _2$mcI$sp = tuple2._2$mcI$sp();
        DataType dataType = bloomFilterContext.dataType();
        if (StringType$.MODULE$.equals(dataType)) {
            function2 = (bloomFilter, internalRow2) -> {
                $anonfun$batchBuildBloomFilter$7(_2$mcI$sp, bloomFilter, internalRow2);
                return BoxedUnit.UNIT;
            };
        } else if (ByteType$.MODULE$.equals(dataType)) {
            function2 = (bloomFilter2, internalRow3) -> {
                $anonfun$batchBuildBloomFilter$8(_2$mcI$sp, bloomFilter2, internalRow3);
                return BoxedUnit.UNIT;
            };
        } else if (ShortType$.MODULE$.equals(dataType)) {
            function2 = (bloomFilter3, internalRow4) -> {
                $anonfun$batchBuildBloomFilter$9(_2$mcI$sp, bloomFilter3, internalRow4);
                return BoxedUnit.UNIT;
            };
        } else if (IntegerType$.MODULE$.equals(dataType)) {
            function2 = (bloomFilter4, internalRow5) -> {
                $anonfun$batchBuildBloomFilter$10(_2$mcI$sp, bloomFilter4, internalRow5);
                return BoxedUnit.UNIT;
            };
        } else {
            if (!LongType$.MODULE$.equals(dataType)) {
                throw new IllegalArgumentException(new StringBuilder(86).append("Bloom filter only supports string type and integral types, ").append("and does not support type ").append(dataType).append(".").toString());
            }
            function2 = (bloomFilter5, internalRow6) -> {
                $anonfun$batchBuildBloomFilter$11(_2$mcI$sp, bloomFilter5, internalRow6);
                return BoxedUnit.UNIT;
            };
        }
    }

    public DataFrameStatFunctions(Dataset<Row> dataset) {
        this.df = dataset;
    }
}
