From d72b332ffe63097e6f53893cb9f3b4a1a570b929 Mon Sep 17 00:00:00 2001
From: Cam Saul <1455846+camsaul@users.noreply.github.com>
Date: Tue, 4 Apr 2023 09:53:51 -0700
Subject: [PATCH] [MLv2] Disallow duplicate UUIDs in a query (#29696)

* Disallow duplicate UUIDs in a query

* Update test

* Fixes :wrench:
---
 src/metabase/lib/schema.cljc                  | 16 ++---
 src/metabase/lib/schema/util.cljc             | 56 +++++++++++++++
 .../schema/expression/conditional_test.cljc   |  2 -
 test/metabase/lib/schema/util_test.cljc       | 69 +++++++++++++++++++
 test/metabase/lib/schema_test.cljc            | 15 ++++
 5 files changed, 148 insertions(+), 10 deletions(-)
 create mode 100644 src/metabase/lib/schema/util.cljc
 create mode 100644 test/metabase/lib/schema/util_test.cljc
 create mode 100644 test/metabase/lib/schema_test.cljc

diff --git a/src/metabase/lib/schema.cljc b/src/metabase/lib/schema.cljc
index 6f406b032a7..3fd81b22b86 100644
--- a/src/metabase/lib/schema.cljc
+++ b/src/metabase/lib/schema.cljc
@@ -8,7 +8,6 @@
   future we can deprecate that namespace and eventually do away with it entirely."
   (:require
    [metabase.lib.schema.aggregation :as aggregation]
-   [metabase.lib.schema.common :as common]
    [metabase.lib.schema.expression :as expression]
    [metabase.lib.schema.expression.arithmetic]
    [metabase.lib.schema.expression.conditional]
@@ -20,6 +19,7 @@
    [metabase.lib.schema.literal]
    [metabase.lib.schema.order-by :as order-by]
    [metabase.lib.schema.ref :as ref]
+   [metabase.lib.schema.util :as lib.schema.util]
    [metabase.util.malli.registry :as mr]))
 
 (comment metabase.lib.schema.expression.arithmetic/keep-me
@@ -32,7 +32,6 @@
 (mr/def ::stage.native
   [:map
    [:lib/type [:= :mbql.stage/native]]
-   [:lib/options ::common/options]
    [:native any?]
    [:args {:optional true} [:sequential any?]]])
 
@@ -51,7 +50,6 @@
   [:and
    [:map
     [:lib/type     [:= :mbql.stage/mbql]]
-    [:lib/options  ::common/options]
     [:joins        {:optional true} [:ref ::join/joins]]
     [:expressions  {:optional true} [:ref ::expression/expressions]]
     [:breakout     {:optional true} ::breakouts]
@@ -105,8 +103,10 @@
    [:* ::stage.additional]])
 
 (mr/def ::query
-  [:map
-   [:lib/type [:= :mbql/query]]
-   [:database ::id/database]
-   [:type [:= :pipeline]]
-   [:stages ::stages]])
+  [:and
+   [:map
+    [:lib/type [:= :mbql/query]]
+    [:database ::id/database]
+    [:type [:= :pipeline]]
+    [:stages ::stages]]
+   lib.schema.util/UniqueUUIDs])
diff --git a/src/metabase/lib/schema/util.cljc b/src/metabase/lib/schema/util.cljc
new file mode 100644
index 00000000000..840eef52ecf
--- /dev/null
+++ b/src/metabase/lib/schema/util.cljc
@@ -0,0 +1,56 @@
+(ns metabase.lib.schema.util
+  (:require
+   [metabase.lib.options :as lib.options]))
+
+(declare collect-uuids)
+
+(defn- collect-uuids-in-map [m]
+  (into (if-let [our-uuid (or (:lib/uuid (lib.options/options m))
+                              (:lib/uuid m))]
+          [our-uuid]
+          [])
+        (comp (remove (fn [[k _v]]
+                        (#{:lib/metadata :lib/stage-metadata :lib/options} k)))
+              (mapcat (fn [[_k v]]
+                        (collect-uuids v))))
+        m))
+
+(defn- collect-uuids-in-sequence [xs]
+  (into [] (mapcat collect-uuids) xs))
+
+(defn collect-uuids
+  "Return all the `:lib/uuid`s in a part of an MBQL query (a clause or map) as a sequence. This will be used to ensure
+  there are no duplicates."
+  [x]
+  (cond
+    (map? x)        (collect-uuids-in-map x)
+    (sequential? x) (collect-uuids-in-sequence x)
+    :else           nil))
+
+(defn- find-duplicate-uuid [x]
+  (transduce
+   identity
+   (fn
+     ([]
+      #{})
+     ([result]
+      (when (string? result)
+        result))
+     ([seen a-uuid]
+      (if (contains? seen a-uuid)
+        (reduced a-uuid)
+        (conj seen a-uuid))))
+   (collect-uuids x)))
+
+(defn unique-uuids?
+  "True if all the `:lib/uuid`s in something are unique."
+  [x]
+  (not (find-duplicate-uuid x)))
+
+(def UniqueUUIDs
+  "Malli schema for to ensure that all `:lib/uuid`s are unique."
+  [:fn
+   {:error/message "all :lib/uuids must be unique"
+    :error/fn      (fn [{:keys [value]} _]
+                     (str "Duplicate :lib/uuid " (pr-str (find-duplicate-uuid value))))}
+   #'unique-uuids?])
diff --git a/test/metabase/lib/schema/expression/conditional_test.cljc b/test/metabase/lib/schema/expression/conditional_test.cljc
index 5432a51d003..14a9a552d97 100644
--- a/test/metabase/lib/schema/expression/conditional_test.cljc
+++ b/test/metabase/lib/schema/expression/conditional_test.cljc
@@ -49,7 +49,6 @@
     (case-expr 1 1.1)
     :type/Number))
 
-
 (deftest ^:parallel coalesce-test
   (is (mc/validate
        :mbql.clause/coalesce
@@ -64,7 +63,6 @@
         :type         :pipeline
         :database     (meta/id)
         :stages       [{:lib/type     :mbql.stage/mbql,
-                        :lib/options  {:lib/uuid "455a9f5e-4996-4df9-82aa-01bc083b2efe"}
                         :source-table (meta/id :venues)
                         :expressions  {"expr"
                                        [:coalesce
diff --git a/test/metabase/lib/schema/util_test.cljc b/test/metabase/lib/schema/util_test.cljc
new file mode 100644
index 00000000000..b3507e6e223
--- /dev/null
+++ b/test/metabase/lib/schema/util_test.cljc
@@ -0,0 +1,69 @@
+(ns metabase.lib.schema.util-test
+  (:require
+   [clojure.test :refer [are deftest is]]
+   [malli.core :as mc]
+   [malli.error :as me]
+   [metabase.lib.schema.util :as lib.schema.util]))
+
+(defn- query [uuid-1 uuid-2]
+  {:lib/type :mbql/query
+   :type     :pipeline
+   :database 1
+   :stages   [{:lib/type     :mbql.stage/mbql
+               :source-table 2
+               :filter       [:=
+                              {:lib/uuid "00000000-0000-0000-0000-000000000010"}
+                              [:field
+                               {:lib/uuid uuid-1, :base-type :type/Text}
+                               3]
+                              4]}
+              {:lib/type :mbql.stage/mbql
+               :filter   [:=
+                          {:lib/uuid "00000000-0000-0000-0000-000000000020"}
+                          [:field
+                           {:lib/uuid uuid-2, :base-type :type/Text}
+                           "my_field"]
+                          4]}]})
+
+(def query-with-no-duplicate-uuids
+  (query "00000000-0000-0000-0000-000000000001"
+         "00000000-0000-0000-0000-000000000002"))
+
+(def query-with-duplicate-uuids
+  (query "00000000-0000-0000-0000-000000000001"
+         "00000000-0000-0000-0000-000000000001"))
+
+(deftest ^:parallel collect-uuids-test
+  (are [query expected-uuids] (= expected-uuids
+                                 (sort (lib.schema.util/collect-uuids query)))
+    query-with-no-duplicate-uuids
+    ["00000000-0000-0000-0000-000000000001"
+     "00000000-0000-0000-0000-000000000002"
+     "00000000-0000-0000-0000-000000000010"
+     "00000000-0000-0000-0000-000000000020"]
+
+    query-with-duplicate-uuids
+    ["00000000-0000-0000-0000-000000000001"
+     "00000000-0000-0000-0000-000000000001"
+     "00000000-0000-0000-0000-000000000010"
+     "00000000-0000-0000-0000-000000000020"]))
+
+(deftest ^:parallel collect-uuids-from-lib-options
+  (is (= ["f590f35f-9224-45f1-8334-422f15fc4abd"]
+         (lib.schema.util/collect-uuids
+          {:lib/type :mbql/query
+           :database 1
+           :type     :pipeline
+           :stages   [{:lib/type     :mbql.stage/mbql
+                       :source-table 2
+                       :lib/options  {:lib/uuid "f590f35f-9224-45f1-8334-422f15fc4abd"}}]}))))
+
+(deftest ^:parallel unique-uuids?-test
+  (is (lib.schema.util/unique-uuids? query-with-no-duplicate-uuids))
+  (is (not (lib.schema.util/unique-uuids? query-with-duplicate-uuids))))
+
+(deftest ^:parallel UniqueUUIDs-schema-test
+  (is (not (mc/explain lib.schema.util/UniqueUUIDs query-with-no-duplicate-uuids)))
+  (is (mc/explain lib.schema.util/UniqueUUIDs query-with-duplicate-uuids))
+  (is (= ["Duplicate :lib/uuid \"00000000-0000-0000-0000-000000000001\""]
+         (me/humanize (mc/explain lib.schema.util/UniqueUUIDs query-with-duplicate-uuids)))))
diff --git a/test/metabase/lib/schema_test.cljc b/test/metabase/lib/schema_test.cljc
new file mode 100644
index 00000000000..9e52501d115
--- /dev/null
+++ b/test/metabase/lib/schema_test.cljc
@@ -0,0 +1,15 @@
+(ns metabase.lib.schema-test
+  (:require
+   [clojure.test :refer [deftest is testing]]
+   [malli.core :as mc]
+   [malli.error :as me]
+   [metabase.lib.schema :as lib.schema]
+   [metabase.lib.schema.util-test :as lib.schema.util-test]))
+
+(deftest ^:parallel disallow-duplicate-uuids-test
+  (testing "sanity check: make sure query is valid with different UUIDs"
+    (is (not (mc/explain ::lib.schema/query lib.schema.util-test/query-with-no-duplicate-uuids))))
+  (testing "should not validate if UUIDs are duplicated"
+    (is (mc/explain ::lib.schema/query lib.schema.util-test/query-with-duplicate-uuids))
+    (is (= ["Duplicate :lib/uuid \"00000000-0000-0000-0000-000000000001\""]
+           (me/humanize (mc/explain ::lib.schema/query lib.schema.util-test/query-with-duplicate-uuids))))))
-- 
GitLab