diff --git a/src/metabase/models/field.clj b/src/metabase/models/field.clj index beceb84a43869c1d11fd9556b32bf62bd5712f9d..0f2b0a677ea8796ce924479710dfef93cc7ee108 100644 --- a/src/metabase/models/field.clj +++ b/src/metabase/models/field.clj @@ -193,7 +193,7 @@ (defn values "Return the `FieldValues` associated with this `field`." [{:keys [id]}] - (t2/select [FieldValues :field_id :values], :field_id id)) + (t2/select [FieldValues :field_id :values], :field_id id :type :full)) (mu/defn nested-field-names->field-id :- [:maybe ms/PositiveInt] "Recusively find the field id for a nested field name, return nil if not found. diff --git a/src/metabase/models/field_values.clj b/src/metabase/models/field_values.clj index 40379a1181227f3e411303823f3367ca32133c03..c4b1f29bf88067b003e382e8437736e22f0692a9 100644 --- a/src/metabase/models/field_values.clj +++ b/src/metabase/models/field_values.clj @@ -23,6 +23,7 @@ There is also more written about how these are used for remapping in the docstrings for [[metabase.models.params.chain-filter]] and [[metabase.query-processor.middleware.add-dimension-projections]]." (:require + [clojure.string :as str] [java-time.api :as t] [malli.core :as mc] [medley.core :as m] @@ -104,26 +105,33 @@ {:human-readable-values human-readable-values :status-code 400})))) -(defn- assert-valid-field-values-type +(defn- assert-valid-type-hash-combo + "Ensure that type is present, valid, and that a hash_key is provided iff this is an advanced field type." [{:keys [type hash_key] :as _field-values}] - (when type - (when-not (contains? field-values-types type) - (throw (ex-info (tru "Invalid field-values type.") - {:type type - :stauts-code 400}))) - - (when (and (= type :full) - hash_key) - (throw (ex-info (tru "Full FieldValues shouldn't have hash_key.") - {:type type - :hash_key hash_key - :status-code 400}))) - - (when (and (advanced-field-values-types type) - (empty? hash_key)) - (throw (ex-info (tru "Advanced FieldValues requires a hash_key.") - {:type type - :status-code 400}))))) + (when-not (contains? field-values-types type) + (throw (ex-info (tru "Invalid field-values type.") + {:type type + :status-code 400}))) + + (when (and (= type :full) hash_key) + (throw (ex-info (tru "Full FieldValues shouldn't have hash_key.") + {:type type + :hash_key hash_key + :status-code 400}))) + + (when (and (advanced-field-values-types type) (str/blank? hash_key)) + (throw (ex-info (tru "Advanced FieldValues require a hash_key.") + {:type type + :status-code 400})))) + +(defn- assert-no-identity-changes [id changes] + (when (some #(contains? changes %) [:field_id :type :hash_key]) + (throw (ex-info (tru "Can't update field_id, type, or hash_key for a FieldValues.") + {:id id + :field_id (:field_id changes) + :type (:type changes) + :hash_key (:hash_key changes) + :status-code 400})))) (defn clear-advanced-field-values-for-field! "Remove all advanced FieldValues for a `field-or-id`." @@ -138,29 +146,50 @@ (t2/define-before-insert :model/FieldValues [{:keys [field_id] :as field-values}] - (u/prog1 (merge {:type :full} - field-values) + (u/prog1 (update field-values :type #(keyword (or % :full))) (assert-valid-human-readable-values field-values) - (assert-valid-field-values-type field-values) - ;; if inserting a new full fieldvalues, make sure all the advanced field-values of this field is deleted - (when (= (:type <>) :full) + (assert-valid-type-hash-combo <>) + ;; if inserting a new full fieldvalues, make sure all the advanced field-values of this field are deleted + (when (= :full (:type <>)) (clear-advanced-field-values-for-field! field_id)))) (t2/define-before-update :model/FieldValues [field-values] - (let [{:keys [type values hash_key]} (t2/changes field-values)] - (u/prog1 field-values + (let [changes (t2/changes field-values)] + (u/prog1 (update field-values :type #(keyword (or % :full))) + (assert-no-identity-changes (:id field-values) changes) (assert-valid-human-readable-values field-values) - (when (or type hash_key) - (throw (ex-info (tru "Can't update type or hash_key for a FieldValues.") - {:type type - :hash_key hash_key - :status-code 400}))) ;; if we're updating the values of a Full FieldValues, delete all Advanced FieldValues of this field - (when (and values - (= (:type field-values) :full)) + (when (and (contains? changes :values) (= :full (:type <>))) (clear-advanced-field-values-for-field! (:field_id field-values)))))) +(defn- assert-coherent-query [{:keys [type hash_key] :as field-values}] + (cond + (nil? type) + (when (some? hash_key) + (throw (ex-info "Invalid query - cannot specify a hash_key without specifying the type" + {:field-values field-values}))) + + (= :full (keyword type)) + (when (some? hash_key) + (throw (ex-info "Invalid query - :full FieldValues cannot have a hash_key" + {:field-values field-values}))) + + (and (contains? field-values :hash_key) (nil? hash_key)) + (throw (ex-info "Invalid query - Advanced FieldValues can only specify a non-empty hash_key" + {:field-values field-values})))) + +(defn- add-mismatched-hash-filter [{:keys [type] :as field-values}] + (cond + (= :full (keyword type)) (assoc field-values :hash_key nil) + (some? type) (update field-values :hash_key #(or % [:not= nil])) + :else field-values)) + +(t2/define-before-select :model/FieldValues + [{:keys [kv-args] :as query}] + (assert-coherent-query kv-args) + (update query :kv-args add-mismatched-hash-filter)) + (t2/define-after-select :model/FieldValues [field-values] (cond-> field-values diff --git a/test/metabase/models/field_values_test.clj b/test/metabase/models/field_values_test.clj index 593407d9d084d20fc5f856fda58e320bed8de816..e424faa3c3e56d1600ab154dece78366e2c958e5 100644 --- a/test/metabase/models/field_values_test.clj +++ b/test/metabase/models/field_values_test.clj @@ -19,7 +19,8 @@ [metabase.util :as u] [next.jdbc :as next.jdbc] [toucan2.core :as t2] - [toucan2.tools.with-temp :as t2.with-temp])) + [toucan2.tools.with-temp :as t2.with-temp]) + (:import (clojure.lang ExceptionInfo))) (deftest ^:parallel field-should-have-field-values?-test (doseq [[group input->expected] {"Text and Category Fields" @@ -276,29 +277,66 @@ ;;; | Life Cycle | ;;; +----------------------------------------------------------------------------------------------------------------+ -(deftest insert-field-values-type-test - (testing "fieldvalues type=:full shouldn't have hash_key" - (is (thrown-with-msg? - clojure.lang.ExceptionInfo - #"Full FieldValues shouldnt have hash_key" - (t2.with-temp/with-temp [FieldValues _ {:field_id (mt/id :venues :id) - :type :full - :hash_key "random-hash"}])))) - - (testing "Advanced fieldvalues requires a hash_key" - (is (thrown-with-msg? - clojure.lang.ExceptionInfo - #"Advanced FieldValues requires a hash_key" - (t2.with-temp/with-temp [FieldValues _ {:field_id (mt/id :venues :id) - :type :sandbox}]))))) +(deftest insert-field-values-hook-test + (testing "The model hooks prevent us inserting invalid type / hash_key combination" + (let [field-id (mt/id :venues :id)] + (try + (is (thrown-with-msg? ExceptionInfo + #"Full FieldValues shouldnt have hash_key" + (t2/insert! :model/FieldValues :field_id field-id :hash_key "12345"))) + (is (thrown-with-msg? ExceptionInfo + #"Full FieldValues shouldnt have hash_key" + (t2/insert! :model/FieldValues :field_id field-id :type :full :hash_key "12345"))) + (is (thrown-with-msg? ExceptionInfo + #"Advanced FieldValues require a hash_key" + (t2/insert! :model/FieldValues :field_id field-id :type :sandbox))) + (is (thrown-with-msg? ExceptionInfo + #"Advanced FieldValues require a hash_key" + (t2/insert! :model/FieldValues :field_id field-id :type :sandbox :hash_key " "))) + (finally + ;; Clean up in case there were any failed assertions, and we ended up inserting values + (t2/delete! :model/FieldValues :field_id field-id)))))) + +(deftest update-field-values-hook-test + (t2.with-temp/with-temp [FieldValues {full-id :id} {:field_id (mt/id :venues :id) + :type :full} + FieldValues {sandbox-id :id} {:field_id (mt/id :venues :id) + :type :sandbox + :hash_key "random-hash"}] + (testing "The model hooks prevent us changing the intrinsic identity of a field values" + (doseq [[id update-map] [[sandbox-id {:field_id 1}] + [sandbox-id {:type :full}] + [sandbox-id {:type nil}] + ;; this one should be ok, but toucan doesn't give the hook enough info to know better + [full-id {:type nil}] + [full-id {:type :sandbox}] + [sandbox-id {:hash_key "another-hash"}] + [sandbox-id {:hash_key nil}] + [full-id {:hash_key "random-hash"}] + ;; not even if it keeps type / hash consistency + [sandbox-id {:type :full, :hash_key nil}] + [full-id {:type :sandbox, :hash_key "random-hash"}]]] + (is (thrown-with-msg? ExceptionInfo + #"Cant update field_id, type, or hash_key for a FieldValues." + (t2/update! :model/FieldValues id update-map))))) + + (testing "The model hooks permits mention of the existing values" + (doseq [[id update-map] [[full-id {:field_id (mt/id :venues :id)}] + [sandbox-id {:type :sandbox}] + [full-id {:type :full}] + [sandbox-id {:hash_key "random-hash"}] + [full-id {:hash_key nil}] + [full-id {:type :full, :hash_key nil}] + [sandbox-id {:type :sandbox, :hash_key "random-hash"}]]] + (is (some? (t2/update! :model/FieldValues id update-map))))))) (deftest insert-full-field-values-should-remove-all-cached-field-values - (mt/with-temp [FieldValues sandbox-fv {:field_id (mt/id :venues :id) - :type :sandbox - :hash_key "random-hash"}] - (t2/insert! FieldValues {:field_id (mt/id :venues :id) - :type :full}) - (is (not (t2/exists? FieldValues :id (:id sandbox-fv)))))) + (doseq [explicitly-full? [false true]] + (mt/with-temp [FieldValues sandbox-fv {:field_id (mt/id :venues :id) + :type :sandbox + :hash_key "random-hash"}] + (t2/insert! FieldValues (cond-> {:field_id (mt/id :venues :id)} explicitly-full? (assoc :type :full))) + (is (not (t2/exists? FieldValues :id (:id sandbox-fv))))))) (deftest update-full-field-values-should-remove-all-cached-field-values (mt/with-temp [FieldValues fv {:field_id (mt/id :venues :id) @@ -309,20 +347,14 @@ (t2/update! FieldValues (:id fv) {:values [1 2 3]}) (is (not (t2/exists? FieldValues :id (:id sandbox-fv)))))) -(deftest cant-update-type-or-has-of-a-field-values-test - (t2.with-temp/with-temp [FieldValues fv {:field_id (mt/id :venues :id) - :type :sandbox - :hash_key "random-hash"}] - (is (thrown-with-msg? - clojure.lang.ExceptionInfo - #"Cant update type or hash_key for a FieldValues." - (t2/update! FieldValues (:id fv) {:type :full}))) - - (is (thrown-with-msg? - clojure.lang.ExceptionInfo - #"Cant update type or hash_key for a FieldValues." - (t2/update! FieldValues (:id fv) {:hash_key "new-hash"}))))) - +(deftest update-full-field-without-values-should-remove-not-all-cached-field-values + (mt/with-temp [FieldValues fv {:field_id (mt/id :venues :id) + :type :full} + FieldValues sandbox-fv {:field_id (mt/id :venues :id) + :type :sandbox + :hash_key "random-hash"}] + (t2/update! FieldValues (:id fv) {:updated_at (t/zoned-date-time)}) + (is (t2/exists? FieldValues :id (:id sandbox-fv))))) (deftest identity-hash-test (testing "Field hashes are composed of the name and the table's identity-hash" @@ -333,3 +365,35 @@ (is (= "6f5bb4ba" (serdes/raw-hash [(serdes/identity-hash field)]) (serdes/identity-hash fv)))))) + +(deftest select-coherence-test + (testing "We cannot perform queries with invalid mixes of type and hash_key, which would return nothing" + (t2/select :model/FieldValues :field_id 1) + (t2/select :model/FieldValues :field_id 1 :type :full) + (is (thrown-with-msg? ExceptionInfo + #"Invalid query - :full FieldValues cannot have a hash_key" + (t2/select :model/FieldValues :field_id 1 :type :full :hash_key "12345"))) + + (t2/select :model/FieldValues :field_id 1 :type :sandbox) + (t2/select :model/FieldValues :field_id 1 :type :sandbox :hash_key "12345") + (is (thrown-with-msg? ExceptionInfo + #"Invalid query - Advanced FieldValues can only specify a non-empty hash_key" + (t2/select :model/FieldValues :field_id 1 :type :sandbox :hash_key nil))))) + +(deftest select-safety-filter-test + (testing "We do not modify queries that omit type" + ;; We could push down a WHERE clause to filter mismatched rows, but for performance reasons we do not. + (is (= {} (#'field-values/add-mismatched-hash-filter {}))) + ;; Is there really a use-case for reading all these values? + ;; Perhaps we should require a type/hash combo - we would need to be careful it doesn't break any existing queries. + (is (= {:field_id 1} (#'field-values/add-mismatched-hash-filter {:field_id 1})))) + + ;; There's an argument to be made that we should only query on these "identity" fields if the field-id is present, + ;; but perhaps there are use cases that I haven't considered. + (testing "Queries that fully specify the identity are not mangled" + (is (= {:type :full, :hash_key nil} (#'field-values/add-mismatched-hash-filter {:type :full, :hash_key nil}))) + (is (= {:type :sandbox, :hash_key "random-hash"} (#'field-values/add-mismatched-hash-filter {:type :sandbox, :hash_key "random-hash"})))) + + (testing "Ambiguous queries are upgraded to ensure invalid rows are filtered" + (is (= {:type :full, :hash_key nil} (#'field-values/add-mismatched-hash-filter {:type :full}))) + (is (= {:type :sandbox, :hash_key [:not= nil]} (#'field-values/add-mismatched-hash-filter {:type :sandbox})))))