package com.linkedin.feathr.offline.join;

import com.linkedin.feathr.offline.client.DataFrameColName$;
import com.linkedin.feathr.offline.config.FeatureJoinConfig;
import com.linkedin.feathr.offline.job.DataFrameStatFunctions;
import com.linkedin.feathr.offline.join.algorithms.SaltedSparkJoin;
import com.linkedin.feathr.offline.join.algorithms.SaltedSparkJoin$;
import com.linkedin.feathr.offline.swa.SlidingWindowFeatureUtils$;
import com.linkedin.feathr.offline.util.SourceUtils$;
import com.linkedin.feathr.offline.util.datetime.DateTimeInterval;
import org.apache.log4j.Logger;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$;
import org.apache.spark.sql.functions$;
import org.apache.spark.util.sketch.BloomFilter;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Some;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.Iterable;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.SeqLike;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.$colon;
import scala.collection.immutable.Map;
import scala.collection.immutable.Map$;
import scala.collection.immutable.Nil$;
import scala.collection.mutable.ArrayOps;
import scala.collection.mutable.HashMap;
import scala.collection.mutable.Iterable$;
import scala.math.package$;
import scala.runtime.BoxesRunTime;

/* compiled from: OptimizerUtils.scala */
/* loaded from: input_file:com/linkedin/feathr/offline/join/OptimizerUtils$.class */
public final class OptimizerUtils$ {
    public static OptimizerUtils$ MODULE$;
    private transient Logger log;
    private final long maxExpectedItemForBloomfilter;
    private final double bloomFilterFPP;
    private volatile transient boolean bitmap$trans$0;

    static {
        new OptimizerUtils$();
    }

    public long maxExpectedItemForBloomfilter() {
        return this.maxExpectedItemForBloomfilter;
    }

    public double bloomFilterFPP() {
        return this.bloomFilterFPP;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v8, types: [com.linkedin.feathr.offline.join.OptimizerUtils$] */
    private Logger log$lzycompute() {
        ?? r0 = this;
        synchronized (r0) {
            if (!this.bitmap$trans$0) {
                this.log = Logger.getLogger(getClass().getName());
                r0 = this;
                r0.bitmap$trans$0 = true;
            }
        }
        return this.log;
    }

    private Logger log() {
        return !this.bitmap$trans$0 ? log$lzycompute() : this.log;
    }

    public PreprocessedObservation preProcessObservation(Dataset<Row> dataset, FeatureJoinConfig featureJoinConfig, Seq<Tuple2<Seq<Object>, Seq<String>>> seq, Seq<String> seq2, Option<Object> option, Option<SaltedSparkJoin.JoinParameters> option2, Seq<String> seq3) {
        Tuple2<Option<DateTimeInterval>, Option<String>> obsSwaDataTimeRange = SlidingWindowFeatureUtils$.MODULE$.getObsSwaDataTimeRange(dataset, featureJoinConfig.settings());
        if (obsSwaDataTimeRange == null) {
            throw new MatchError(obsSwaDataTimeRange);
        }
        Tuple2 tuple2 = new Tuple2((Option) obsSwaDataTimeRange._1(), (Option) obsSwaDataTimeRange._2());
        Option option3 = (Option) tuple2._1();
        Seq<String> slidingWindowRelatedColumns = getSlidingWindowRelatedColumns((Option) tuple2._2(), seq, seq2);
        HashMap<Seq<Object>, String> hashMap = new HashMap<>();
        Tuple3 tuple3 = (Tuple3) seq.foldLeft(new Tuple3(hashMap, dataset, Nil$.MODULE$), (tuple32, tuple22) -> {
            Seq seq4 = (Seq) tuple22._1();
            Seq<String> seq5 = (Seq) seq4.map(seq2, Seq$.MODULE$.canBuildFrom());
            HashMap hashMap2 = (HashMap) tuple32._1();
            Dataset<Row> dataset2 = (Dataset) tuple32._2();
            DataFrameKeyCombiner apply = DataFrameKeyCombiner$.MODULE$.apply();
            Tuple2<String, Dataset<Row>> combine = apply.combine(dataset2, seq5, apply.combine$default$3());
            if (combine == null) {
                throw new MatchError(combine);
            }
            Tuple2 tuple22 = new Tuple2((String) combine._1(), (Dataset) combine._2());
            String str = (String) tuple22._1();
            Dataset dataset3 = (Dataset) tuple22._2();
            hashMap2.put(seq4, str);
            return new Tuple3(hashMap2, dataset3, ((TraversableLike) tuple32._3()).$plus$plus(new $colon.colon(str, Nil$.MODULE$), Seq$.MODULE$.canBuildFrom()));
        });
        if (tuple3 == null) {
            throw new MatchError(tuple3);
        }
        Tuple2 tuple23 = new Tuple2((Dataset) tuple3._2(), (Seq) tuple3._3());
        Dataset<Row> dataset2 = (Dataset) tuple23._1();
        Seq seq4 = (Seq) tuple23._2();
        Seq<String> seq5 = ((TraversableOnce) hashMap.map(tuple24 -> {
            return (String) tuple24._2();
        }, Iterable$.MODULE$.canBuildFrom())).toSeq();
        Seq seq6 = (Seq) new $colon.colon(DataFrameColName$.MODULE$.UidColumnName(), Nil$.MODULE$).$plus$plus(slidingWindowRelatedColumns, Seq$.MODULE$.canBuildFrom());
        Dataset<Row> withColumn = dataset2.withColumn(DataFrameColName$.MODULE$.UidColumnName(), functions$.MODULE$.monotonically_increasing_id());
        Seq seq7 = (Seq) ((SeqLike) ((TraversableLike) seq6.$plus$plus(seq5, Seq$.MODULE$.canBuildFrom())).$plus$plus(seq3, Seq$.MODULE$.canBuildFrom())).distinct();
        return new PreprocessedObservation(generateBloomFilters(option, withColumn, seq, seq5, hashMap), withColumn.select((String) seq7.head(), (Seq) seq7.tail()), withColumn.drop(seq4), option3, slidingWindowRelatedColumns, generateSaltedJoinFrequentItems(hashMap.toMap(Predef$.MODULE$.$conforms()), dataset2, option2));
    }

    private Option<Map<Seq<Object>, BloomFilter>> generateBloomFilters(Option<Object> option, Dataset<Row> dataset, Seq<Tuple2<Seq<Object>, Seq<String>>> seq, Seq<String> seq2, HashMap<Seq<Object>, String> hashMap) {
        boolean z;
        boolean z2;
        Some some;
        long min = package$.MODULE$.min(SourceUtils$.MODULE$.estimateRDDRow(dataset.rdd(), SourceUtils$.MODULE$.estimateRDDRow$default$2()), maxExpectedItemForBloomfilter());
        boolean z3 = option.isDefined() && BoxesRunTime.unboxToInt(option.get()) == 0;
        if (min <= 0) {
            z2 = true;
        } else {
            if (option.isEmpty()) {
                z = true;
            } else {
                int unboxToInt = BoxesRunTime.unboxToInt(option.get());
                z = unboxToInt == -1 || (unboxToInt > 0 && min < ((long) unboxToInt));
            }
            z2 = z && seq.nonEmpty();
        }
        boolean z4 = z2;
        if (z3 || !z4) {
            some = None$.MODULE$;
        } else {
            some = new Some(((TraversableOnce) ((TraversableLike) hashMap.toSeq().zip(new DataFrameStatFunctions(dataset).batchCreateBloomFilter(seq2, min > 0 ? min : maxExpectedItemForBloomfilter(), bloomFilterFPP()), Seq$.MODULE$.canBuildFrom())).map(tuple2 -> {
                if (tuple2 != null) {
                    Tuple2 tuple2 = (Tuple2) tuple2._1();
                    BloomFilter bloomFilter = (BloomFilter) tuple2._2();
                    if (tuple2 != null) {
                        return new Tuple2((Seq) tuple2._1(), bloomFilter);
                    }
                }
                throw new MatchError(tuple2);
            }, Seq$.MODULE$.canBuildFrom())).toMap(Predef$.MODULE$.$conforms()));
        }
        return some;
    }

    private Seq<String> getSlidingWindowRelatedColumns(Option<String> option, Seq<Tuple2<Seq<Object>, Seq<String>>> seq, Seq<String> seq2) {
        return (Seq) option.map(str -> {
            Seq seq3;
            Seq seq4;
            SparkSession orCreate = SparkSession$.MODULE$.builder().getOrCreate();
            try {
                seq3 = getTopLevelReferencedFields$1(str, orCreate);
            } catch (Exception unused) {
                seq3 = Nil$.MODULE$;
            }
            Seq seq5 = seq3;
            try {
                seq4 = (Seq) seq.flatMap(tuple2 -> {
                    return (Seq) ((Seq) tuple2._1()).flatMap(obj -> {
                        return $anonfun$getSlidingWindowRelatedColumns$4(seq2, orCreate, BoxesRunTime.unboxToInt(obj));
                    }, Seq$.MODULE$.canBuildFrom());
                }, Seq$.MODULE$.canBuildFrom());
            } catch (Exception unused2) {
                seq4 = Nil$.MODULE$;
            }
            return (Seq) ((SeqLike) seq5.$plus$plus(seq4, Seq$.MODULE$.canBuildFrom())).distinct();
        }).getOrElse(() -> {
            return Nil$.MODULE$;
        });
    }

    private Map<Seq<Object>, Dataset<Row>> generateSaltedJoinFrequentItems(Map<Seq<Object>, String> map, Dataset<Row> dataset, Option<SaltedSparkJoin.JoinParameters> option) {
        return option.isEmpty() ? Predef$.MODULE$.Map().apply(Nil$.MODULE$) : (Map) map.flatMap(tuple2 -> {
            Iterable option2Iterable;
            if (tuple2 == null) {
                throw new MatchError(tuple2);
            }
            Seq seq = (Seq) tuple2._1();
            String str = (String) tuple2._2();
            Dataset<Row> frequentItemsDataFrame = SaltedSparkJoin$.MODULE$.getFrequentItemsDataFrame(((SaltedSparkJoin.JoinParameters) option.get()).estimator(), dataset, str, ((SaltedSparkJoin.JoinParameters) option.get()).frequentItemThreshold());
            if (new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) frequentItemsDataFrame.head(1))).nonEmpty()) {
                option2Iterable = Option$.MODULE$.option2Iterable(new Some(new Tuple2(seq, frequentItemsDataFrame)));
            } else {
                MODULE$.log().info(new StringBuilder(40).append("Salted Join: no frequent items for key: ").append(str).toString());
                option2Iterable = Option$.MODULE$.option2Iterable(None$.MODULE$);
            }
            return option2Iterable;
        }, Map$.MODULE$.canBuildFrom());
    }

    private static final Seq getTopLevelReferencedFields$1(String str, SparkSession sparkSession) {
        return ((TraversableOnce) sparkSession.sessionState().sqlParser().parseExpression(str).references().map(attribute -> {
            return (String) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(attribute.name().split("\\."))).head();
        }, scala.collection.Iterable$.MODULE$.canBuildFrom())).toSeq();
    }

    public static final /* synthetic */ Seq $anonfun$getSlidingWindowRelatedColumns$4(Seq seq, SparkSession sparkSession, int i) {
        return getTopLevelReferencedFields$1((String) seq.apply(i), sparkSession);
    }

    private OptimizerUtils$() {
        MODULE$ = this;
        this.maxExpectedItemForBloomfilter = 50000000L;
        this.bloomFilterFPP = 0.05d;
    }
}
