From d72b332ffe63097e6f53893cb9f3b4a1a570b929 Mon Sep 17 00:00:00 2001 From: Cam Saul <1455846+camsaul@users.noreply.github.com> Date: Tue, 4 Apr 2023 09:53:51 -0700 Subject: [PATCH] [MLv2] Disallow duplicate UUIDs in a query (#29696) * Disallow duplicate UUIDs in a query * Update test * Fixes :wrench: --- src/metabase/lib/schema.cljc | 16 ++--- src/metabase/lib/schema/util.cljc | 56 +++++++++++++++ .../schema/expression/conditional_test.cljc | 2 - test/metabase/lib/schema/util_test.cljc | 69 +++++++++++++++++++ test/metabase/lib/schema_test.cljc | 15 ++++ 5 files changed, 148 insertions(+), 10 deletions(-) create mode 100644 src/metabase/lib/schema/util.cljc create mode 100644 test/metabase/lib/schema/util_test.cljc create mode 100644 test/metabase/lib/schema_test.cljc diff --git a/src/metabase/lib/schema.cljc b/src/metabase/lib/schema.cljc index 6f406b032a7..3fd81b22b86 100644 --- a/src/metabase/lib/schema.cljc +++ b/src/metabase/lib/schema.cljc @@ -8,7 +8,6 @@ future we can deprecate that namespace and eventually do away with it entirely." (:require [metabase.lib.schema.aggregation :as aggregation] - [metabase.lib.schema.common :as common] [metabase.lib.schema.expression :as expression] [metabase.lib.schema.expression.arithmetic] [metabase.lib.schema.expression.conditional] @@ -20,6 +19,7 @@ [metabase.lib.schema.literal] [metabase.lib.schema.order-by :as order-by] [metabase.lib.schema.ref :as ref] + [metabase.lib.schema.util :as lib.schema.util] [metabase.util.malli.registry :as mr])) (comment metabase.lib.schema.expression.arithmetic/keep-me @@ -32,7 +32,6 @@ (mr/def ::stage.native [:map [:lib/type [:= :mbql.stage/native]] - [:lib/options ::common/options] [:native any?] [:args {:optional true} [:sequential any?]]]) @@ -51,7 +50,6 @@ [:and [:map [:lib/type [:= :mbql.stage/mbql]] - [:lib/options ::common/options] [:joins {:optional true} [:ref ::join/joins]] [:expressions {:optional true} [:ref ::expression/expressions]] [:breakout {:optional true} ::breakouts] @@ -105,8 +103,10 @@ [:* ::stage.additional]]) (mr/def ::query - [:map - [:lib/type [:= :mbql/query]] - [:database ::id/database] - [:type [:= :pipeline]] - [:stages ::stages]]) + [:and + [:map + [:lib/type [:= :mbql/query]] + [:database ::id/database] + [:type [:= :pipeline]] + [:stages ::stages]] + lib.schema.util/UniqueUUIDs]) diff --git a/src/metabase/lib/schema/util.cljc b/src/metabase/lib/schema/util.cljc new file mode 100644 index 00000000000..840eef52ecf --- /dev/null +++ b/src/metabase/lib/schema/util.cljc @@ -0,0 +1,56 @@ +(ns metabase.lib.schema.util + (:require + [metabase.lib.options :as lib.options])) + +(declare collect-uuids) + +(defn- collect-uuids-in-map [m] + (into (if-let [our-uuid (or (:lib/uuid (lib.options/options m)) + (:lib/uuid m))] + [our-uuid] + []) + (comp (remove (fn [[k _v]] + (#{:lib/metadata :lib/stage-metadata :lib/options} k))) + (mapcat (fn [[_k v]] + (collect-uuids v)))) + m)) + +(defn- collect-uuids-in-sequence [xs] + (into [] (mapcat collect-uuids) xs)) + +(defn collect-uuids + "Return all the `:lib/uuid`s in a part of an MBQL query (a clause or map) as a sequence. This will be used to ensure + there are no duplicates." + [x] + (cond + (map? x) (collect-uuids-in-map x) + (sequential? x) (collect-uuids-in-sequence x) + :else nil)) + +(defn- find-duplicate-uuid [x] + (transduce + identity + (fn + ([] + #{}) + ([result] + (when (string? result) + result)) + ([seen a-uuid] + (if (contains? seen a-uuid) + (reduced a-uuid) + (conj seen a-uuid)))) + (collect-uuids x))) + +(defn unique-uuids? + "True if all the `:lib/uuid`s in something are unique." + [x] + (not (find-duplicate-uuid x))) + +(def UniqueUUIDs + "Malli schema for to ensure that all `:lib/uuid`s are unique." + [:fn + {:error/message "all :lib/uuids must be unique" + :error/fn (fn [{:keys [value]} _] + (str "Duplicate :lib/uuid " (pr-str (find-duplicate-uuid value))))} + #'unique-uuids?]) diff --git a/test/metabase/lib/schema/expression/conditional_test.cljc b/test/metabase/lib/schema/expression/conditional_test.cljc index 5432a51d003..14a9a552d97 100644 --- a/test/metabase/lib/schema/expression/conditional_test.cljc +++ b/test/metabase/lib/schema/expression/conditional_test.cljc @@ -49,7 +49,6 @@ (case-expr 1 1.1) :type/Number)) - (deftest ^:parallel coalesce-test (is (mc/validate :mbql.clause/coalesce @@ -64,7 +63,6 @@ :type :pipeline :database (meta/id) :stages [{:lib/type :mbql.stage/mbql, - :lib/options {:lib/uuid "455a9f5e-4996-4df9-82aa-01bc083b2efe"} :source-table (meta/id :venues) :expressions {"expr" [:coalesce diff --git a/test/metabase/lib/schema/util_test.cljc b/test/metabase/lib/schema/util_test.cljc new file mode 100644 index 00000000000..b3507e6e223 --- /dev/null +++ b/test/metabase/lib/schema/util_test.cljc @@ -0,0 +1,69 @@ +(ns metabase.lib.schema.util-test + (:require + [clojure.test :refer [are deftest is]] + [malli.core :as mc] + [malli.error :as me] + [metabase.lib.schema.util :as lib.schema.util])) + +(defn- query [uuid-1 uuid-2] + {:lib/type :mbql/query + :type :pipeline + :database 1 + :stages [{:lib/type :mbql.stage/mbql + :source-table 2 + :filter [:= + {:lib/uuid "00000000-0000-0000-0000-000000000010"} + [:field + {:lib/uuid uuid-1, :base-type :type/Text} + 3] + 4]} + {:lib/type :mbql.stage/mbql + :filter [:= + {:lib/uuid "00000000-0000-0000-0000-000000000020"} + [:field + {:lib/uuid uuid-2, :base-type :type/Text} + "my_field"] + 4]}]}) + +(def query-with-no-duplicate-uuids + (query "00000000-0000-0000-0000-000000000001" + "00000000-0000-0000-0000-000000000002")) + +(def query-with-duplicate-uuids + (query "00000000-0000-0000-0000-000000000001" + "00000000-0000-0000-0000-000000000001")) + +(deftest ^:parallel collect-uuids-test + (are [query expected-uuids] (= expected-uuids + (sort (lib.schema.util/collect-uuids query))) + query-with-no-duplicate-uuids + ["00000000-0000-0000-0000-000000000001" + "00000000-0000-0000-0000-000000000002" + "00000000-0000-0000-0000-000000000010" + "00000000-0000-0000-0000-000000000020"] + + query-with-duplicate-uuids + ["00000000-0000-0000-0000-000000000001" + "00000000-0000-0000-0000-000000000001" + "00000000-0000-0000-0000-000000000010" + "00000000-0000-0000-0000-000000000020"])) + +(deftest ^:parallel collect-uuids-from-lib-options + (is (= ["f590f35f-9224-45f1-8334-422f15fc4abd"] + (lib.schema.util/collect-uuids + {:lib/type :mbql/query + :database 1 + :type :pipeline + :stages [{:lib/type :mbql.stage/mbql + :source-table 2 + :lib/options {:lib/uuid "f590f35f-9224-45f1-8334-422f15fc4abd"}}]})))) + +(deftest ^:parallel unique-uuids?-test + (is (lib.schema.util/unique-uuids? query-with-no-duplicate-uuids)) + (is (not (lib.schema.util/unique-uuids? query-with-duplicate-uuids)))) + +(deftest ^:parallel UniqueUUIDs-schema-test + (is (not (mc/explain lib.schema.util/UniqueUUIDs query-with-no-duplicate-uuids))) + (is (mc/explain lib.schema.util/UniqueUUIDs query-with-duplicate-uuids)) + (is (= ["Duplicate :lib/uuid \"00000000-0000-0000-0000-000000000001\""] + (me/humanize (mc/explain lib.schema.util/UniqueUUIDs query-with-duplicate-uuids))))) diff --git a/test/metabase/lib/schema_test.cljc b/test/metabase/lib/schema_test.cljc new file mode 100644 index 00000000000..9e52501d115 --- /dev/null +++ b/test/metabase/lib/schema_test.cljc @@ -0,0 +1,15 @@ +(ns metabase.lib.schema-test + (:require + [clojure.test :refer [deftest is testing]] + [malli.core :as mc] + [malli.error :as me] + [metabase.lib.schema :as lib.schema] + [metabase.lib.schema.util-test :as lib.schema.util-test])) + +(deftest ^:parallel disallow-duplicate-uuids-test + (testing "sanity check: make sure query is valid with different UUIDs" + (is (not (mc/explain ::lib.schema/query lib.schema.util-test/query-with-no-duplicate-uuids)))) + (testing "should not validate if UUIDs are duplicated" + (is (mc/explain ::lib.schema/query lib.schema.util-test/query-with-duplicate-uuids)) + (is (= ["Duplicate :lib/uuid \"00000000-0000-0000-0000-000000000001\""] + (me/humanize (mc/explain ::lib.schema/query lib.schema.util-test/query-with-duplicate-uuids)))))) -- GitLab