Ram
06/13/2023, 7:36 AMdeltaTable = ... # DeltaTable with schema (key, value)
changesDF = spark.table("changes")
latestChangeForEachKey = changesDF \
.selectExpr("key", "struct(time, newValue, deleted) as otherCols") \
.groupBy("key") \
.agg(max("otherCols").alias("latest")) \
.select("key", "latest.*") \
deltaTable.alias("t").merge(
latestChangeForEachKey.alias("s"),
"s.key = t.key") \
.whenMatchedDelete(condition = "s.deleted = true").execute()
vs
deltaTable.alias("t").merge(
latestChangeForEachKey.alias("s"),
"s.key = t.key") \
.whenMatchedDelete(None).execute()