package ai.chronon.spark;

import ai.chronon.api.Constants$;
import ai.chronon.api.StructField;
import ai.chronon.online.SparkConversions$;
import ai.chronon.spark.Extensions;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.StructType;
import scala.Array$;
import scala.Predef$;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.GenTraversableOnce;
import scala.collection.IterableLike;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableLike;
import scala.collection.immutable.Nil$;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;

/* compiled from: Join.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005ee\u0001B\u0001\u0003\u0001%\u0011AAS8j]*\u00111\u0001B\u0001\u0006gB\f'o\u001b\u0006\u0003\u000b\u0019\tqa\u00195s_:|gNC\u0001\b\u0003\t\t\u0017n\u0001\u0001\u0014\u0005\u0001Q\u0001CA\u0006\r\u001b\u0005\u0011\u0011BA\u0007\u0003\u0005!\u0011\u0015m]3K_&t\u0007\u0002C\b\u0001\u0005\u0003\u0005\u000b\u0011\u0002\t\u0002\u0011)|\u0017N\\\"p]\u001a\u0004\"!\u0005\u000b\u000e\u0003IQ!a\u0005\u0003\u0002\u0007\u0005\u0004\u0018.\u0003\u0002\u0002%!Aa\u0003\u0001B\u0001B\u0003%q#\u0001\u0007f]\u0012\u0004\u0016M\u001d;ji&|g\u000e\u0005\u0002\u0019=9\u0011\u0011\u0004H\u0007\u00025)\t1$A\u0003tG\u0006d\u0017-\u0003\u0002\u001e5\u00051\u0001K]3eK\u001aL!a\b\u0011\u0003\rM#(/\u001b8h\u0015\ti\"\u0004\u0003\u0005#\u0001\t\u0005\t\u0015!\u0003$\u0003)!\u0018M\u00197f+RLGn\u001d\t\u0003\u0017\u0011J!!\n\u0002\u0003\u0015Q\u000b'\r\\3Vi&d7\u000f\u0003\u0005(\u0001\t\u0005\t\u0015!\u0003)\u00035\u00198.\u001b9GSJ\u001cH\u000fS8mKB\u0011\u0011$K\u0005\u0003Ui\u0011qAQ8pY\u0016\fg\u000eC\u0003-\u0001\u0011\u0005Q&\u0001\u0004=S:LGO\u0010\u000b\u0006]=\u0002\u0014G\r\t\u0003\u0017\u0001AQaD\u0016A\u0002AAQAF\u0016A\u0002]AQAI\u0016A\u0002\rBqaJ\u0016\u0011\u0002\u0003\u0007\u0001\u0006C\u00045\u0001\t\u0007I\u0011B\u001b\u0002\u001d\t|w\u000e^:ue\u0006\u0004H+\u00192mKV\tq\u0003\u0003\u00048\u0001\u0001\u0006IaF\u0001\u0010E>|Go\u001d;sCB$\u0016M\u00197fA!)\u0011\b\u0001C\u0005u\u0005I\u0001/\u00193GS\u0016dGm\u001d\u000b\u0004wQ3\u0006C\u0001\u001fR\u001d\tidJ\u0004\u0002?\u0017:\u0011q(\u0013\b\u0003\u0001\u001as!!\u0011#\u000e\u0003\tS!a\u0011\u0005\u0002\rq\u0012xn\u001c;?\u0013\u0005)\u0015aA8sO&\u0011q\tS\u0001\u0007CB\f7\r[3\u000b\u0003\u0015K!a\u0001&\u000b\u0005\u001dC\u0015B\u0001'N\u0003\r\u0019\u0018\u000f\u001c\u0006\u0003\u0007)K!a\u0014)\u0002\u000fA\f7m[1hK*\u0011A*T\u0005\u0003%N\u0013\u0011\u0002R1uC\u001a\u0013\u0018-\\3\u000b\u0005=\u0003\u0006\"B+9\u0001\u0004Y\u0014A\u00013g\u0011\u00159\u0006\b1\u0001Y\u0003)\u0019HO];diRK\b/\u001a\t\u00033rk\u0011A\u0017\u0006\u00037B\u000bQ\u0001^=qKNL!!\u0018.\u0003\u0015M#(/^2u)f\u0004X\rC\u0003`\u0001\u0011%\u0001-A\u0007u_N\u0003\u0018M]6TG\",W.\u0019\u000b\u00031\u0006DQA\u00190A\u0002\r\faAZ5fY\u0012\u001c\bc\u00013hS6\tQM\u0003\u0002g5\u0005Q1m\u001c7mK\u000e$\u0018n\u001c8\n\u0005!,'aA*fcB\u0011\u0011C[\u0005\u0003WJ\u00111b\u0015;sk\u000e$h)[3mI\")Q\u000e\u0001C\u0005]\u0006\t\u0002/\u00193FqR,'O\\1m\r&,G\u000eZ:\u0015\u0007mz\u0017\u000fC\u0003qY\u0002\u00071(A\u0006c_>$8\u000f\u001e:ba\u00123\u0007\"\u0002:m\u0001\u0004\u0019\u0018!\u00042p_R\u001cHO]1q\u0013:4w\u000e\u0005\u0002\fi&\u0011QO\u0001\u0002\u000e\u0005>|Go\u001d;sCBLeNZ8\t\u000b]\u0004A\u0011\u0002=\u0002!A\fGm\u0012:pkB\u0014\u0015PR5fY\u0012\u001cHcA\u001ezw\")!P\u001ea\u0001w\u0005Q!-Y:f\u0015>Lg\u000e\u00124\t\u000bI4\b\u0019A:\t\u000bu\u0004A\u0011\u0002@\u00023\u0019Lg\u000e\u001a\"p_R\u001cHO]1q'\u0016$8i\u001c<fe&twm\u001d\u000b\b\u007f\u0006U\u0011qCA\r!\u0011!w-!\u0001\u0011\u000fe\t\u0019!a\u0002\u0002\u000e%\u0019\u0011Q\u0001\u000e\u0003\rQ+\b\u000f\\33!\rY\u0011\u0011B\u0005\u0004\u0003\u0017\u0011!\u0001\u0005&pS:\u0004\u0016M\u001d;NKR\fG-\u0019;b!\u0011!w-a\u0004\u0011\u0007-\t\t\"C\u0002\u0002\u0014\t\u00111bQ8wKJLgnZ*fi\")\u0001\u000f a\u0001w!)!\u000f a\u0001g\"9\u00111\u0004?A\u0002\u0005u\u0011!\u00037fMR\u0014\u0016M\\4f!\rY\u0011qD\u0005\u0004\u0003C\u0011!A\u0004)beRLG/[8o%\u0006tw-\u001a\u0005\b\u0003K\u0001A\u0011IA\u0014\u00031\u0019w.\u001c9vi\u0016\u0014\u0016M\\4f)\u001dY\u0014\u0011FA\u0017\u0003_Aq!a\u000b\u0002$\u0001\u00071(\u0001\u0004mK\u001a$HI\u001a\u0005\t\u00037\t\u0019\u00031\u0001\u0002\u001e!1!/a\tA\u0002MDq!a\r\u0001\t\u0003\t)$A\bbaBd\u0017\u0010R3sSZ\fG/[8o)\u001dY\u0014qGA\u001e\u0003{Aq!!\u000f\u00022\u0001\u00071(\u0001\u0004cCN,GI\u001a\u0005\u0007e\u0006E\u0002\u0019A:\t\u0011\u0005}\u0012\u0011\u0007a\u0001\u0003\u0003\n1\u0002\\3gi\u000e{G.^7ogB\u0019AmZ\f\t\u000f\u0005\u0015\u0003\u0001\"\u0001\u0002H\u000592\r\\3b]V\u00038i\u001c8uKb$X/\u00197GS\u0016dGm\u001d\u000b\bw\u0005%\u0013QJA(\u0011\u001d\tY%a\u0011A\u0002m\nqAZ5oC2$e\r\u0003\u0004s\u0003\u0007\u0002\ra\u001d\u0005\t\u0003\u007f\t\u0019\u00051\u0001\u0002B!9\u00111\u000b\u0001\u0005\n\u0005U\u0013!F2p[B,H/\u001a\"p_R\u001cHO]1q)\u0006\u0014G.\u001a\u000b\bw\u0005]\u0013\u0011LA/\u0011\u001d\tY#!\u0015A\u0002mB\u0001\"a\u0017\u0002R\u0001\u0007\u0011QD\u0001\u0006e\u0006tw-\u001a\u0005\u0007e\u0006E\u0003\u0019A:\t\u000f\u0005\u0005\u0004\u0001\"\u0003\u0002d\u0005\u0019b-\u001b8e+:4\u0017\u000e\u001c7fIJ+7m\u001c:egR)1(!\u001a\u0002h!1\u0001/a\u0018A\u0002mB\u0001\"!\u001b\u0002`\u0001\u0007\u0011QB\u0001\fG>4XM]5oON+GoB\u0005\u0002n\t\t\t\u0011#\u0001\u0002p\u0005!!j\\5o!\rY\u0011\u0011\u000f\u0004\t\u0003\t\t\t\u0011#\u0001\u0002tM!\u0011\u0011OA;!\rI\u0012qO\u0005\u0004\u0003sR\"AB!osJ+g\rC\u0004-\u0003c\"\t!! \u0015\u0005\u0005=\u0004BCAA\u0003c\n\n\u0011\"\u0001\u0002\u0004\u0006YB\u0005\\3tg&t\u0017\u000e\u001e\u0013he\u0016\fG/\u001a:%I\u00164\u0017-\u001e7uIQ*\"!!\"+\u0007!\n9i\u000b\u0002\u0002\nB!\u00111RAK\u001b\t\tiI\u0003\u0003\u0002\u0010\u0006E\u0015!C;oG\",7m[3e\u0015\r\t\u0019JG\u0001\u000bC:tw\u000e^1uS>t\u0017\u0002BAL\u0003\u001b\u0013\u0011#\u001e8dQ\u0016\u001c7.\u001a3WCJL\u0017M\\2f\u0001")
/* loaded from: input_file:ai/chronon/spark/Join.class */
public class Join extends BaseJoin {
    public final ai.chronon.api.Join ai$chronon$spark$Join$$joinConf;
    public final TableUtils ai$chronon$spark$Join$$tableUtils;
    private final boolean skipFirstHole;
    private final String ai$chronon$spark$Join$$bootstrapTable;

    public String ai$chronon$spark$Join$$bootstrapTable() {
        return this.ai$chronon$spark$Join$$bootstrapTable;
    }

    private Dataset<Row> padFields(Dataset<Row> dataset, StructType structType) {
        return (Dataset) structType.foldLeft(dataset, new Join$$anonfun$padFields$1(this));
    }

    private StructType toSparkSchema(Seq<StructField> seq) {
        return SparkConversions$.MODULE$.fromChrononSchema(new ai.chronon.api.StructType("", (StructField[]) seq.toArray(ClassTag$.MODULE$.apply(StructField.class))));
    }

    public Dataset<Row> ai$chronon$spark$Join$$padExternalFields(Dataset<Row> dataset, BootstrapInfo bootstrapInfo) {
        StructType sparkSchema = toSparkSchema((Seq) ((TraversableLike) bootstrapInfo.externalParts().filter(new Join$$anonfun$1(this))).flatMap(new Join$$anonfun$2(this), Seq$.MODULE$.canBuildFrom()));
        return withContextualFields$1(withNonContextualFields$1(dataset, sparkSchema), toSparkSchema((Seq) ((TraversableLike) bootstrapInfo.externalParts().filter(new Join$$anonfun$3(this))).flatMap(new Join$$anonfun$4(this), Seq$.MODULE$.canBuildFrom())));
    }

    private Dataset<Row> padGroupByFields(Dataset<Row> dataset, BootstrapInfo bootstrapInfo) {
        return padFields(dataset, toSparkSchema((Seq) bootstrapInfo.joinParts().flatMap(new Join$$anonfun$5(this), Seq$.MODULE$.canBuildFrom())));
    }

    private Seq<Tuple2<JoinPartMetadata, Seq<CoveringSet>>> findBootstrapSetCoverings(Dataset<Row> dataset, BootstrapInfo bootstrapInfo, PartitionRange partitionRange) {
        Seq<Tuple2<JoinPartMetadata, Seq<CoveringSet>>> seq = (Seq) bootstrapInfo.joinParts().map(new Join$$anonfun$7(this, bootstrapInfo, Predef$.MODULE$.refArrayOps(dataset.columns()).contains(Constants$.MODULE$.MatchedHashes()) ? Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((Row[]) dataset.groupBy(Constants$.MODULE$.MatchedHashes(), Predef$.MODULE$.wrapRefArray(new String[0])).agg(functions$.MODULE$.count(functions$.MODULE$.lit(BoxesRunTime.boxToInteger(1))).as("row_count"), Predef$.MODULE$.wrapRefArray(new Column[0])).collect()).map(new Join$$anonfun$6(this), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).toSeq() : Seq$.MODULE$.apply(Nil$.MODULE$)), Seq$.MODULE$.canBuildFrom());
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"\\n======= CoveringSet for JoinPart ", " for PartitionRange(", ", ", ") =======\\n"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{this.ai$chronon$spark$Join$$joinConf.metaData.name, partitionRange.start(), partitionRange.end()})));
        seq.foreach(new Join$$anonfun$findBootstrapSetCoverings$1(this));
        return seq;
    }

    @Override // ai.chronon.spark.BaseJoin
    public Dataset<Row> computeRange(Dataset<Row> dataset, PartitionRange partitionRange, BootstrapInfo bootstrapInfo) {
        Dataset<Row> dataset2;
        if (Predef$.MODULE$.refArrayOps(dataset.schema().names()).contains(Constants$.MODULE$.TimeColumn())) {
            Extensions.DataframeOps DataframeOps = Extensions$.MODULE$.DataframeOps(dataset);
            dataset2 = DataframeOps.withTimeBasedColumn(Constants$.MODULE$.TimePartitionColumn(), DataframeOps.withTimeBasedColumn$default$2(), DataframeOps.withTimeBasedColumn$default$3());
        } else {
            dataset2 = dataset;
        }
        Dataset<Row> computeBootstrapTable = computeBootstrapTable(dataset2, partitionRange, bootstrapInfo);
        Dataset drop = ((Dataset) ((Seq) findBootstrapSetCoverings(computeBootstrapTable, bootstrapInfo, partitionRange).flatMap(new Join$$anonfun$12(this, partitionRange, computeBootstrapTable), Seq$.MODULE$.canBuildFrom())).foldLeft(computeBootstrapTable, new Join$$anonfun$14(this))).drop(Predef$.MODULE$.wrapRefArray(new String[]{Constants$.MODULE$.MatchedHashes(), Constants$.MODULE$.TimePartitionColumn()}));
        Dataset<Row> cleanUpContextualFields = cleanUpContextualFields(applyDerivation(padGroupByFields(drop.selectExpr(Predef$.MODULE$.wrapRefArray((String[]) Predef$.MODULE$.refArrayOps(drop.columns()).filter(bootstrapInfo.fieldNames().$plus$plus(Predef$.MODULE$.refArrayOps(computeBootstrapTable.columns()))))), bootstrapInfo), bootstrapInfo, Predef$.MODULE$.wrapRefArray(dataset.columns())), bootstrapInfo, Predef$.MODULE$.wrapRefArray(dataset.columns()));
        cleanUpContextualFields.explain();
        return cleanUpContextualFields;
    }

    public Dataset<Row> applyDerivation(Dataset<Row> dataset, BootstrapInfo bootstrapInfo, Seq<String> seq) {
        if (!this.ai$chronon$spark$Join$$joinConf.isSetDerivations() || this.ai$chronon$spark$Join$$joinConf.derivations.isEmpty()) {
            return dataset;
        }
        Seq derivationProjection = ai.chronon.api.Extensions$.MODULE$.JoinOps(this.ai$chronon$spark$Join$$joinConf).derivationProjection(bootstrapInfo.baseValueNames());
        return dataset.select(Predef$.MODULE$.wrapRefArray((Column[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(dataset.columns()).flatMap(new Join$$anonfun$15(this, bootstrapInfo, seq, derivationProjection.toMap(Predef$.MODULE$.$conforms())), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Column.class)))).$plus$plus((GenTraversableOnce) derivationProjection.flatMap(new Join$$anonfun$16(this, seq, Predef$.MODULE$.refArrayOps(dataset.columns()).toSet()), Seq$.MODULE$.canBuildFrom()), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Column.class)))));
    }

    public Dataset<Row> cleanUpContextualFields(Dataset<Row> dataset, BootstrapInfo bootstrapInfo, Seq<String> seq) {
        return (Dataset) ((Seq) ((TraversableLike) ((TraversableLike) bootstrapInfo.externalParts().filter(new Join$$anonfun$17(this))).flatMap(new Join$$anonfun$18(this), Seq$.MODULE$.canBuildFrom())).map(new Join$$anonfun$19(this), Seq$.MODULE$.canBuildFrom())).foldLeft(dataset, new Join$$anonfun$cleanUpContextualFields$1(this, seq, this.ai$chronon$spark$Join$$joinConf.isSetDerivations() ? (Seq) ai.chronon.api.Extensions$.MODULE$.JoinOps(this.ai$chronon$spark$Join$$joinConf).derivationProjection(bootstrapInfo.baseValueNames()).map(new Join$$anonfun$20(this), Seq$.MODULE$.canBuildFrom()) : Seq$.MODULE$.apply(Nil$.MODULE$)));
    }

    private Dataset<Row> computeBootstrapTable(Dataset<Row> dataset, PartitionRange partitionRange, BootstrapInfo bootstrapInfo) {
        boolean z = this.ai$chronon$spark$Join$$joinConf.metaData.isSetTableProperties() && this.ai$chronon$spark$Join$$joinConf.metaData.tableProperties.containsKey(Constants$.MODULE$.ChrononOOCTable());
        if (!this.ai$chronon$spark$Join$$joinConf.isSetBootstrapParts() && !z) {
            return ai$chronon$spark$Join$$padExternalFields(dataset, bootstrapInfo);
        }
        long currentTimeMillis = System.currentTimeMillis();
        ai$chronon$spark$Join$$validateReservedColumns$1(dataset, ai.chronon.api.Extensions$.MODULE$.SourceOps(this.ai$chronon$spark$Join$$joinConf.left).table(), (Seq) Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{Constants$.MODULE$.BootstrapHash(), Constants$.MODULE$.MatchedHashes()})));
        ((IterableLike) this.ai$chronon$spark$Join$$tableUtils.unfilledRanges(ai$chronon$spark$Join$$bootstrapTable(), partitionRange, this.ai$chronon$spark$Join$$tableUtils.unfilledRanges$default$3(), this.ai$chronon$spark$Join$$tableUtils.unfilledRanges$default$4(), this.ai$chronon$spark$Join$$tableUtils.unfilledRanges$default$5(), this.skipFirstHole).getOrElse(new Join$$anonfun$computeBootstrapTable$1(this))).foreach(new Join$$anonfun$computeBootstrapTable$2(this, dataset, bootstrapInfo));
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Finished computing bootstrap table ", " in ", " minutes"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{ai.chronon.api.Extensions$.MODULE$.MetadataOps(this.ai$chronon$spark$Join$$joinConf.metaData).bootstrapTable(), BoxesRunTime.boxToLong((System.currentTimeMillis() - currentTimeMillis) / 60000)})));
        return this.ai$chronon$spark$Join$$tableUtils.sql(partitionRange.genScanQuery(null, ai$chronon$spark$Join$$bootstrapTable(), partitionRange.genScanQuery$default$3(), partitionRange.genScanQuery$default$4()));
    }

    public Dataset<Row> ai$chronon$spark$Join$$findUnfilledRecords(Dataset<Row> dataset, Seq<CoveringSet> seq) {
        return Predef$.MODULE$.refArrayOps(dataset.columns()).contains(Constants$.MODULE$.MatchedHashes()) ? dataset.filter(new Join$$anonfun$ai$chronon$spark$Join$$findUnfilledRecords$1(this, seq)) : dataset;
    }

    private final Dataset withNonContextualFields$1(Dataset dataset, StructType structType) {
        return padFields(dataset, structType);
    }

    private final Dataset withContextualFields$1(Dataset dataset, StructType structType) {
        return (Dataset) structType.foldLeft(dataset, new Join$$anonfun$withContextualFields$1$1(this));
    }

    public final void ai$chronon$spark$Join$$validateReservedColumns$1(Dataset dataset, String str, Seq seq) {
        Seq seq2 = (Seq) seq.filter(new Join$$anonfun$21(this, Predef$.MODULE$.refArrayOps(dataset.schema().fieldNames())));
        Predef$.MODULE$.assert(seq2.isEmpty(), new Join$$anonfun$ai$chronon$spark$Join$$validateReservedColumns$1$1(this, str, seq2));
    }

    /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
    public Join(ai.chronon.api.Join join, String str, TableUtils tableUtils, boolean z) {
        super(join, str, tableUtils, z);
        this.ai$chronon$spark$Join$$joinConf = join;
        this.ai$chronon$spark$Join$$tableUtils = tableUtils;
        this.skipFirstHole = z;
        this.ai$chronon$spark$Join$$bootstrapTable = ai.chronon.api.Extensions$.MODULE$.MetadataOps(join.metaData).bootstrapTable();
    }
}
