diff --git a/src/metabase/models/field_values.clj b/src/metabase/models/field_values.clj index 3c892add80d9b19bb56f57bb641d86acfe31ffb9..f4798d8fc24bc7f824734d6fabfe98f4f32fa096 100644 --- a/src/metabase/models/field_values.clj +++ b/src/metabase/models/field_values.clj @@ -394,22 +394,30 @@ nil))) (defn- delete-duplicates-and-return-latest! - "This is a workaround for the issue of stale FieldValues rows (metabase#668) - In order to mitigate the impact of duplicates, we return the most recently updated row, and delete the older rows." - [rows] - (if (<= (count rows) 1) - (first rows) - (let [[latest & duplicates] (sort-by :updated_at u/reverse-compare rows)] - (t2/delete! FieldValues :id [:in (map :id duplicates)]) - latest))) - -(defn get-latest-field-values + "Takes a list of field values, return a map of field-id -> latest FieldValues. + + If a field has more than one Field Values, delete the old ones. This is a workaround for the issue of stale FieldValues rows (metabase#668) + In order to mitigate the impact of duplicates, we return the most recently updated row, and delete the older rows. + + It assumes that all rows are of the same type. Rows could be from multiple field-ids." + [fvs] + (let [fvs-grouped-by-field-id (update-vals (group-by :field_id fvs) + #(sort-by :updated_at u/reverse-compare %)) + to-delete-fv-ids (->> (vals fvs-grouped-by-field-id) + (mapcat rest) + (map :id))] + (when (seq to-delete-fv-ids) + (t2/delete! :model/FieldValues :id [:in to-delete-fv-ids])) + (update-vals fvs-grouped-by-field-id first))) + +(defn- get-latest-field-values "This returns the FieldValues with the given :type and :hash_key for the given Field. - This may implicitly delete shadowed entries in the database, see [[delete-duplicates-and-return-latest!]]" + This may implicitly delete shadowed entries in the database, see [[delete-duplicates-and-return-latest!]]" [field-id type hash] (assert (= (nil? hash) (= type :full)) ":hash_key must be nil iff :type is :full") - (delete-duplicates-and-return-latest! - (t2/select FieldValues :field_id field-id :type type :hash_key hash))) + (-> (t2/select :model/FieldValues :field_id field-id :type type :hash_key hash) + delete-duplicates-and-return-latest! + (get field-id))) (defn get-latest-full-field-values "This returns the full FieldValues for the given Field. @@ -417,6 +425,14 @@ [field-id] (get-latest-field-values field-id :full nil)) +(defn batched-get-latest-full-field-values + "Batched version of [[get-latest-full-field-values]] . + Takes a list of field-ids and returns a map of field-id -> full FieldValues. + This may implicitly delete shadowed entries in the database, see [[delete-duplicates-and-return-latest!]]" + [field-ids] + (delete-duplicates-and-return-latest! + (t2/select :model/FieldValues :field_id [:in field-ids] :type :full :hash_key nil))) + (defn create-or-update-full-field-values! "Create or update the full FieldValues object for `field`. If the FieldValues object already exists, then update values for it; otherwise create a new FieldValues object with the newly fetched values. Returns whether the field values were diff --git a/src/metabase/models/params/field_values.clj b/src/metabase/models/params/field_values.clj index cc29ee1e885797cce36fa7ca25c5fce600da6d95..3bc7df11756cc6e2417534b2f2c1279ca0bd1d13 100644 --- a/src/metabase/models/params/field_values.clj +++ b/src/metabase/models/params/field_values.clj @@ -2,7 +2,6 @@ "Code related to fetching FieldValues for Fields to populate parameter widgets. Always used by the field values (`GET /api/field/:id/values`) endpoint; used by the chain filter endpoints under certain circumstances." (:require - [medley.core :as m] [metabase.db.query :as mdb.query] [metabase.models.field :as field] [metabase.models.field-values :as field-values :refer [FieldValues]] @@ -46,15 +45,12 @@ "OSS implementation; used as a fallback for the EE implementation for any fields that aren't subject to sandboxing." [field-ids] (when (seq field-ids) - (not-empty - (let [fields (-> (t2/select :model/Field :id [:in (set field-ids)]) - (field/readable-fields-only) - (t2/hydrate :values)) - field-values (->> (map #(select-keys (field-values/get-latest-full-field-values (:id %)) - [:field_id :human_readable_values :values]) - fields) - (keep not-empty))] - (m/index-by :field_id field-values))))) + (let [field-ids (->> (t2/select :model/Field :id [:in (set field-ids)]) + field/readable-fields-only + (map :id))] + (when (seq field-ids) + (update-vals (field-values/batched-get-latest-full-field-values field-ids) + #(select-keys % [:field_id :human_readable_values :values])))))) (defenterprise field-id->field-values-for-current-user "Fetch *existing* FieldValues for a sequence of `field-ids` for the current User. Values are returned as a map of diff --git a/test/metabase/models/field_values_test.clj b/test/metabase/models/field_values_test.clj index f4240f3020ca5d455f634befeb79060ccd27145a..4fa10a0f2a394711876d06c31d21a1d2ce96a9c6 100644 --- a/test/metabase/models/field_values_test.clj +++ b/test/metabase/models/field_values_test.clj @@ -211,6 +211,28 @@ ;; double check that we deleted the correct row (is (= ["C" "D"] (:human_readable_values (field-values/get-latest-full-field-values field-id)))))))))) +(deftest implicit-deduplication-batched-test + (let [before (t/zoned-date-time) + after (t/plus before (t/millis 1)) + later (t/plus after (t/millis 1))] + (mt/with-temp [:model/Database {database-id :id} {} + :model/Table {table-id :id} {:db_id database-id} + ;; will have duplicated field values + :model/Field {field-id-1 :id} {:table_id table-id} + ;; have only one field values + :model/Field {field-id-2 :id} {:table_id table-id} + ;; doesn't have a a field values + :model/Field {field-id-3 :id} {:table_id table-id} + :model/FieldValues _ {:field_id field-id-1 :type :full :values ["a" "b"] :human_readable_values ["A" "B"] :created_at before :updated_at before} + :model/FieldValues _ {:field_id field-id-1 :type :full :values ["c" "d"] :human_readable_values ["C" "D"] :created_at before :updated_at later} + :model/FieldValues _ {:field_id field-id-2 :type :full :values ["e" "f"] :human_readable_values ["E" "F"] :created_at after :updated_at after}] + (testing "When we have multiple FieldValues rows in the database, we always return the most recently updated row" + (is (=? {field-id-1 {:values ["c" "d"]} + field-id-2 {:values ["e" "f"]}} + (field-values/batched-get-latest-full-field-values [field-id-1 field-id-2 field-id-3]))) + (testing "and older values are implicitly deleted" + (is (= 1 (count (t2/select FieldValues :field_id field-id-1 :type :full))))))))) + (deftest get-or-create-full-field-values!-test (mt/dataset test-data (testing "create a full Fieldvalues if it does not exist"