From 65c881fe1f8bc9dee34432aaff5301e099f17fd4 Mon Sep 17 00:00:00 2001
From: Chris Truter <crisptrutski@users.noreply.github.com>
Date: Fri, 16 Feb 2024 17:58:41 +0200
Subject: [PATCH] Fix update hook and add select hook (#38751)

---
 src/metabase/models/field.clj              |   2 +-
 src/metabase/models/field_values.clj       |  95 +++++++++-----
 test/metabase/models/field_values_test.clj | 136 +++++++++++++++------
 3 files changed, 163 insertions(+), 70 deletions(-)

diff --git a/src/metabase/models/field.clj b/src/metabase/models/field.clj
index beceb84a438..0f2b0a677ea 100644
--- a/src/metabase/models/field.clj
+++ b/src/metabase/models/field.clj
@@ -193,7 +193,7 @@
 (defn values
   "Return the `FieldValues` associated with this `field`."
   [{:keys [id]}]
-  (t2/select [FieldValues :field_id :values], :field_id id))
+  (t2/select [FieldValues :field_id :values], :field_id id :type :full))
 
 (mu/defn nested-field-names->field-id :- [:maybe ms/PositiveInt]
   "Recusively find the field id for a nested field name, return nil if not found.
diff --git a/src/metabase/models/field_values.clj b/src/metabase/models/field_values.clj
index 40379a11812..c4b1f29bf88 100644
--- a/src/metabase/models/field_values.clj
+++ b/src/metabase/models/field_values.clj
@@ -23,6 +23,7 @@
   There is also more written about how these are used for remapping in the docstrings
   for [[metabase.models.params.chain-filter]] and [[metabase.query-processor.middleware.add-dimension-projections]]."
   (:require
+   [clojure.string :as str]
    [java-time.api :as t]
    [malli.core :as mc]
    [medley.core :as m]
@@ -104,26 +105,33 @@
                     {:human-readable-values human-readable-values
                      :status-code           400}))))
 
-(defn- assert-valid-field-values-type
+(defn- assert-valid-type-hash-combo
+  "Ensure that type is present, valid, and that a hash_key is provided iff this is an advanced field type."
   [{:keys [type hash_key] :as _field-values}]
-  (when type
-    (when-not (contains? field-values-types type)
-      (throw (ex-info (tru "Invalid field-values type.")
-                      {:type        type
-                       :stauts-code 400})))
-
-    (when (and (= type :full)
-               hash_key)
-      (throw (ex-info (tru "Full FieldValues shouldn't have hash_key.")
-                      {:type        type
-                       :hash_key    hash_key
-                       :status-code 400})))
-
-    (when (and (advanced-field-values-types type)
-               (empty? hash_key))
-      (throw (ex-info (tru "Advanced FieldValues requires a hash_key.")
-                      {:type        type
-                       :status-code 400})))))
+  (when-not (contains? field-values-types type)
+    (throw (ex-info (tru "Invalid field-values type.")
+                    {:type        type
+                     :status-code 400})))
+
+  (when (and (= type :full) hash_key)
+    (throw (ex-info (tru "Full FieldValues shouldn't have hash_key.")
+                    {:type        type
+                     :hash_key    hash_key
+                     :status-code 400})))
+
+  (when (and (advanced-field-values-types type) (str/blank? hash_key))
+    (throw (ex-info (tru "Advanced FieldValues require a hash_key.")
+                    {:type        type
+                     :status-code 400}))))
+
+(defn- assert-no-identity-changes [id changes]
+  (when (some #(contains? changes %) [:field_id :type :hash_key])
+    (throw (ex-info (tru "Can't update field_id, type, or hash_key for a FieldValues.")
+                    {:id          id
+                     :field_id    (:field_id changes)
+                     :type        (:type changes)
+                     :hash_key    (:hash_key changes)
+                     :status-code 400}))))
 
 (defn clear-advanced-field-values-for-field!
   "Remove all advanced FieldValues for a `field-or-id`."
@@ -138,29 +146,50 @@
 
 (t2/define-before-insert :model/FieldValues
   [{:keys [field_id] :as field-values}]
-  (u/prog1 (merge {:type :full}
-                  field-values)
+  (u/prog1 (update field-values :type #(keyword (or % :full)))
     (assert-valid-human-readable-values field-values)
-    (assert-valid-field-values-type field-values)
-    ;; if inserting a new full fieldvalues, make sure all the advanced field-values of this field is deleted
-    (when (= (:type <>) :full)
+    (assert-valid-type-hash-combo <>)
+    ;; if inserting a new full fieldvalues, make sure all the advanced field-values of this field are deleted
+    (when (= :full (:type <>))
       (clear-advanced-field-values-for-field! field_id))))
 
 (t2/define-before-update :model/FieldValues
   [field-values]
-  (let [{:keys [type values hash_key]} (t2/changes field-values)]
-    (u/prog1 field-values
+  (let [changes (t2/changes field-values)]
+    (u/prog1 (update field-values :type #(keyword (or % :full)))
+      (assert-no-identity-changes (:id field-values) changes)
       (assert-valid-human-readable-values field-values)
-      (when (or type hash_key)
-        (throw (ex-info (tru "Can't update type or hash_key for a FieldValues.")
-                        {:type        type
-                         :hash_key    hash_key
-                         :status-code 400})))
       ;; if we're updating the values of a Full FieldValues, delete all Advanced FieldValues of this field
-      (when (and values
-                 (= (:type field-values) :full))
+      (when (and (contains? changes :values) (= :full (:type <>)))
         (clear-advanced-field-values-for-field! (:field_id field-values))))))
 
+(defn- assert-coherent-query [{:keys [type hash_key] :as field-values}]
+  (cond
+    (nil? type)
+    (when (some? hash_key)
+      (throw (ex-info "Invalid query - cannot specify a hash_key without specifying the type"
+                      {:field-values field-values})))
+
+    (= :full (keyword type))
+    (when (some? hash_key)
+      (throw (ex-info "Invalid query - :full FieldValues cannot have a hash_key"
+                      {:field-values field-values})))
+
+    (and (contains? field-values :hash_key) (nil? hash_key))
+    (throw (ex-info "Invalid query - Advanced FieldValues can only specify a non-empty hash_key"
+                    {:field-values field-values}))))
+
+(defn- add-mismatched-hash-filter [{:keys [type] :as field-values}]
+  (cond
+    (= :full (keyword type)) (assoc field-values :hash_key nil)
+    (some? type)             (update field-values :hash_key #(or % [:not= nil]))
+    :else                    field-values))
+
+(t2/define-before-select :model/FieldValues
+  [{:keys [kv-args] :as query}]
+  (assert-coherent-query kv-args)
+  (update query :kv-args add-mismatched-hash-filter))
+
 (t2/define-after-select :model/FieldValues
   [field-values]
   (cond-> field-values
diff --git a/test/metabase/models/field_values_test.clj b/test/metabase/models/field_values_test.clj
index 593407d9d08..e424faa3c3e 100644
--- a/test/metabase/models/field_values_test.clj
+++ b/test/metabase/models/field_values_test.clj
@@ -19,7 +19,8 @@
    [metabase.util :as u]
    [next.jdbc :as next.jdbc]
    [toucan2.core :as t2]
-   [toucan2.tools.with-temp :as t2.with-temp]))
+   [toucan2.tools.with-temp :as t2.with-temp])
+  (:import (clojure.lang ExceptionInfo)))
 
 (deftest ^:parallel field-should-have-field-values?-test
   (doseq [[group input->expected] {"Text and Category Fields"
@@ -276,29 +277,66 @@
 ;;; |                                                 Life Cycle                                                     |
 ;;; +----------------------------------------------------------------------------------------------------------------+
 
-(deftest insert-field-values-type-test
-  (testing "fieldvalues type=:full shouldn't have hash_key"
-    (is (thrown-with-msg?
-         clojure.lang.ExceptionInfo
-         #"Full FieldValues shouldnt have hash_key"
-         (t2.with-temp/with-temp [FieldValues _ {:field_id (mt/id :venues :id)
-                                                 :type :full
-                                                 :hash_key "random-hash"}]))))
-
-  (testing "Advanced fieldvalues requires a hash_key"
-    (is (thrown-with-msg?
-         clojure.lang.ExceptionInfo
-         #"Advanced FieldValues requires a hash_key"
-         (t2.with-temp/with-temp [FieldValues _ {:field_id (mt/id :venues :id)
-                                                 :type :sandbox}])))))
+(deftest insert-field-values-hook-test
+  (testing "The model hooks prevent us inserting invalid type / hash_key combination"
+    (let [field-id (mt/id :venues :id)]
+      (try
+        (is (thrown-with-msg? ExceptionInfo
+                              #"Full FieldValues shouldnt have hash_key"
+                              (t2/insert! :model/FieldValues :field_id field-id :hash_key "12345")))
+        (is (thrown-with-msg? ExceptionInfo
+                              #"Full FieldValues shouldnt have hash_key"
+                              (t2/insert! :model/FieldValues :field_id field-id :type :full :hash_key "12345")))
+        (is (thrown-with-msg? ExceptionInfo
+                              #"Advanced FieldValues require a hash_key"
+                              (t2/insert! :model/FieldValues :field_id field-id :type :sandbox)))
+        (is (thrown-with-msg? ExceptionInfo
+                              #"Advanced FieldValues require a hash_key"
+                              (t2/insert! :model/FieldValues :field_id field-id :type :sandbox :hash_key " ")))
+        (finally
+          ;; Clean up in case there were any failed assertions, and we ended up inserting values
+          (t2/delete! :model/FieldValues :field_id field-id))))))
+
+(deftest update-field-values-hook-test
+  (t2.with-temp/with-temp [FieldValues {full-id :id}    {:field_id (mt/id :venues :id)
+                                                             :type     :full}
+                           FieldValues {sandbox-id :id} {:field_id (mt/id :venues :id)
+                                                             :type     :sandbox
+                                                             :hash_key "random-hash"}]
+    (testing "The model hooks prevent us changing the intrinsic identity of a field values"
+      (doseq [[id update-map] [[sandbox-id {:field_id 1}]
+                               [sandbox-id {:type :full}]
+                               [sandbox-id {:type nil}]
+                               ;; this one should be ok, but toucan doesn't give the hook enough info to know better
+                               [full-id {:type nil}]
+                               [full-id {:type :sandbox}]
+                               [sandbox-id {:hash_key "another-hash"}]
+                               [sandbox-id {:hash_key nil}]
+                               [full-id {:hash_key "random-hash"}]
+                               ;; not even if it keeps type / hash consistency
+                               [sandbox-id {:type :full, :hash_key nil}]
+                               [full-id {:type :sandbox, :hash_key "random-hash"}]]]
+        (is (thrown-with-msg? ExceptionInfo
+                              #"Cant update field_id, type, or hash_key for a FieldValues."
+                              (t2/update! :model/FieldValues id update-map)))))
+
+    (testing "The model hooks permits mention of the existing values"
+      (doseq [[id update-map] [[full-id {:field_id (mt/id :venues :id)}]
+                               [sandbox-id {:type :sandbox}]
+                               [full-id {:type :full}]
+                               [sandbox-id {:hash_key "random-hash"}]
+                               [full-id {:hash_key nil}]
+                               [full-id {:type :full, :hash_key nil}]
+                               [sandbox-id {:type :sandbox, :hash_key "random-hash"}]]]
+        (is (some? (t2/update! :model/FieldValues id update-map)))))))
 
 (deftest insert-full-field-values-should-remove-all-cached-field-values
-  (mt/with-temp [FieldValues sandbox-fv {:field_id (mt/id :venues :id)
-                                         :type     :sandbox
-                                         :hash_key "random-hash"}]
-    (t2/insert! FieldValues {:field_id (mt/id :venues :id)
-                             :type     :full})
-    (is (not (t2/exists? FieldValues :id (:id sandbox-fv))))))
+  (doseq [explicitly-full? [false true]]
+    (mt/with-temp [FieldValues sandbox-fv {:field_id (mt/id :venues :id)
+                                           :type     :sandbox
+                                           :hash_key "random-hash"}]
+      (t2/insert! FieldValues (cond-> {:field_id (mt/id :venues :id)} explicitly-full? (assoc :type :full)))
+      (is (not (t2/exists? FieldValues :id (:id sandbox-fv)))))))
 
 (deftest update-full-field-values-should-remove-all-cached-field-values
   (mt/with-temp [FieldValues fv         {:field_id (mt/id :venues :id)
@@ -309,20 +347,14 @@
     (t2/update! FieldValues (:id fv) {:values [1 2 3]})
     (is (not (t2/exists? FieldValues :id (:id sandbox-fv))))))
 
-(deftest cant-update-type-or-has-of-a-field-values-test
-  (t2.with-temp/with-temp [FieldValues fv {:field_id (mt/id :venues :id)
-                                           :type     :sandbox
-                                           :hash_key "random-hash"}]
-    (is (thrown-with-msg?
-         clojure.lang.ExceptionInfo
-         #"Cant update type or hash_key for a FieldValues."
-         (t2/update! FieldValues (:id fv) {:type :full})))
-
-    (is (thrown-with-msg?
-         clojure.lang.ExceptionInfo
-         #"Cant update type or hash_key for a FieldValues."
-         (t2/update! FieldValues (:id fv) {:hash_key "new-hash"})))))
-
+(deftest update-full-field-without-values-should-remove-not-all-cached-field-values
+  (mt/with-temp [FieldValues fv         {:field_id (mt/id :venues :id)
+                                         :type     :full}
+                 FieldValues sandbox-fv {:field_id (mt/id :venues :id)
+                                         :type     :sandbox
+                                         :hash_key "random-hash"}]
+    (t2/update! FieldValues (:id fv) {:updated_at (t/zoned-date-time)})
+    (is (t2/exists? FieldValues :id (:id sandbox-fv)))))
 
 (deftest identity-hash-test
   (testing "Field hashes are composed of the name and the table's identity-hash"
@@ -333,3 +365,35 @@
       (is (= "6f5bb4ba"
              (serdes/raw-hash [(serdes/identity-hash field)])
              (serdes/identity-hash fv))))))
+
+(deftest select-coherence-test
+  (testing "We cannot perform queries with invalid mixes of type and hash_key, which would return nothing"
+    (t2/select :model/FieldValues :field_id 1)
+    (t2/select :model/FieldValues :field_id 1 :type :full)
+    (is (thrown-with-msg? ExceptionInfo
+                          #"Invalid query - :full FieldValues cannot have a hash_key"
+                          (t2/select :model/FieldValues :field_id 1 :type :full :hash_key "12345")))
+
+    (t2/select :model/FieldValues :field_id 1 :type :sandbox)
+    (t2/select :model/FieldValues :field_id 1 :type :sandbox :hash_key "12345")
+    (is (thrown-with-msg? ExceptionInfo
+                          #"Invalid query - Advanced FieldValues can only specify a non-empty hash_key"
+                          (t2/select :model/FieldValues :field_id 1 :type :sandbox :hash_key nil)))))
+
+(deftest select-safety-filter-test
+  (testing "We do not modify queries that omit type"
+    ;; We could push down a WHERE clause to filter mismatched rows, but for performance reasons we do not.
+    (is (= {} (#'field-values/add-mismatched-hash-filter {})))
+    ;; Is there really a use-case for reading all these values?
+    ;; Perhaps we should require a type/hash combo - we would need to be careful it doesn't break any existing queries.
+    (is (= {:field_id 1} (#'field-values/add-mismatched-hash-filter {:field_id 1}))))
+
+  ;; There's an argument to be made that we should only query on these "identity" fields if the field-id is present,
+  ;; but perhaps there are use cases that I haven't considered.
+  (testing "Queries that fully specify the identity are not mangled"
+    (is (= {:type :full, :hash_key nil} (#'field-values/add-mismatched-hash-filter {:type :full, :hash_key nil})))
+    (is (= {:type :sandbox, :hash_key "random-hash"} (#'field-values/add-mismatched-hash-filter {:type :sandbox, :hash_key "random-hash"}))))
+
+  (testing "Ambiguous queries are upgraded to ensure invalid rows are filtered"
+    (is (= {:type :full, :hash_key nil} (#'field-values/add-mismatched-hash-filter {:type :full})))
+    (is (= {:type :sandbox, :hash_key [:not= nil]} (#'field-values/add-mismatched-hash-filter {:type :sandbox})))))
-- 
GitLab