diff --git a/src/metabase/models.clj b/src/metabase/models.clj index 437af22eb72b49bf07d3d43f1cebfeffb36b1dd1..44981a1e24dd0ca2fb404be70a8645e4a2c4756a 100644 --- a/src/metabase/models.clj +++ b/src/metabase/models.clj @@ -1,5 +1,10 @@ (ns metabase.models + ;; metabase.search.postgres.ingestion has not been exposed publicly yet, it needs a higher level API + #_{:clj-kondo/ignore [:metabase/ns-module-checker]} (:require + [clojure.string :as str] + [environ.core :as env] + [metabase.config :as config] [metabase.models.action :as action] [metabase.models.application-permissions-revision :as a-perm-revision] [metabase.models.bookmark :as bookmark] @@ -54,9 +59,12 @@ [metabase.models.view-log :as view-log] [metabase.plugins.classloader :as classloader] [metabase.public-settings.premium-features :refer [defenterprise]] + [metabase.search :as search] + [metabase.search.postgres.ingestion :as search.ingestion] [metabase.util :as u] [methodical.core :as methodical] [potemkin :as p] + [toucan2.core :as t2] [toucan2.model :as t2.model])) ;; Fool the linter @@ -192,3 +200,37 @@ (when (isa? metabase-models-keyword :metabase/model) metabase-models-keyword))) (next-method symb))) + +;; Models must derive from :hook/search-index if their state can influence the contents of the Search Index. +;; Note that it might not be the model itself that depends on it, for example, Dashcards are used in Card entries. +;; Don't worry about whether you've added it in the right place, we have tests to ensure that it is derived if, and only +;; if, it is required. + +(t2/define-after-insert :hook/search-index + [instance] + (when (search/supports-index?) + (search.ingestion/update-index! instance true)) + instance) + +;; Hidden behind an obscure environment variable, as it may cause performance problems. +;; See https://github.com/camsaul/toucan2/issues/195 +(defn- update-hook-enabled? [] + (or config/is-dev? + config/is-test? + (when-let [setting (:mb-experimental-search-index-realtime-updates env/env)] + (and (not (str/blank? setting)) + (not= "false" setting))))) + +(when (update-hook-enabled?) + (t2/define-after-update :hook/search-index + [instance] + (when (search/supports-index?) + (search.ingestion/update-index! instance)) + nil)) + +;; Too much of a performance risk. +#_(t2/define-before-delete :metabase/model + [instance] + (when (search/supports-index?) + (search.ingestion/update-index! instance)) + instance) diff --git a/src/metabase/models/action.clj b/src/metabase/models/action.clj index 1bcf16983ff5a6c861775c4638892075684e3124..81f7dfa02eb4a268a1bf858865fa10ad705c2c03 100644 --- a/src/metabase/models/action.clj +++ b/src/metabase/models/action.clj @@ -39,6 +39,8 @@ (doseq [model action-sub-models] (derive model :metabase/model)) +(derive :model/QueryAction :hook/search-index) + (methodical/defmethod t2/primary-keys :model/QueryAction [_model] [:action_id]) (methodical/defmethod t2/primary-keys :model/HTTPAction [_model] [:action_id]) (methodical/defmethod t2/primary-keys :model/ImplicitAction [_model] [:action_id]) diff --git a/src/metabase/models/card.clj b/src/metabase/models/card.clj index e7aa56aaaf22b9ac7988bc4c11fda6c09df185c9..024f5da17ce90a1da4ed8325933af7476f86c92d 100644 --- a/src/metabase/models/card.clj +++ b/src/metabase/models/card.clj @@ -1052,8 +1052,12 @@ [:= :mr.moderated_item_type "card"] [:= :mr.moderated_item_id :this.id] [:= :mr.most_recent true]]] - ;; workaround for dataflow :(((((( - :dashcard [:model/DashboardCard [:= :dashcard.card_id :this.id]]}}) + ;; Workaround for dataflow :(((((( + ;; NOTE: disabled for now, as this is not a very important ranker and can afford to have stale data, + ;; and could cause a large increase in the query count for dashboard updates. + ;; (see the test failures when this hook is added back) + ;:dashcard [:model/DashboardCard [:= :dashcard.card_id :this.id]] + }}) (search/define-spec "card" (-> base-search-spec (sql.helpers/where [:= :this.type "question"]))) diff --git a/src/metabase/models/dashboard_card.clj b/src/metabase/models/dashboard_card.clj index 63dd75640f1ac50a68f486e082d0c822dc22df49..b4ab833a2d6e33ecf4a04d8b38047991ea4b3982 100644 --- a/src/metabase/models/dashboard_card.clj +++ b/src/metabase/models/dashboard_card.clj @@ -27,7 +27,9 @@ (derive ::mi/read-policy.full-perms-for-perms-set) (derive ::mi/write-policy.full-perms-for-perms-set) (derive :hook/timestamped?) - (derive :hook/entity-id)) + (derive :hook/entity-id) + ;; Disabled for performance reasons, see update-dashboard-card!-call-count-test + #_(derive :hook/search-index)) (t2/deftransforms :model/DashboardCard {:parameter_mappings mi/transform-parameters-list diff --git a/src/metabase/models/model_index.clj b/src/metabase/models/model_index.clj index 76e59ace31b46d85e91d6cd42b12e7e437f7cf0c..fcd5118d84c663c7a8c18ca1c7cf9889f5928a80 100644 --- a/src/metabase/models/model_index.clj +++ b/src/metabase/models/model_index.clj @@ -36,6 +36,8 @@ (derive :model/ModelIndexValue :metabase/model) (derive :model/ModelIndex :hook/created-at-timestamped?) +(derive :model/ModelIndex :hook/search-index) +(derive :model/ModelIndexValue :hook/search-index) (t2/deftransforms ModelIndex {:pk_ref mi/transform-field-ref diff --git a/src/metabase/models/moderation_review.clj b/src/metabase/models/moderation_review.clj index c9c6d5e738eeed60acba07ecf09d3e7248638c68..39e2d9cefe979555f99af1f687077bcb981d4d6f 100644 --- a/src/metabase/models/moderation_review.clj +++ b/src/metabase/models/moderation_review.clj @@ -41,7 +41,8 @@ (derive :metabase/model) ;;; TODO: this is wrong, but what should it be? (derive ::perms/use-parent-collection-perms) - (derive :hook/timestamped?)) + (derive :hook/timestamped?) + (derive :hook/search-index)) (t2/deftransforms :model/ModerationReview {:moderated_item_type mi/transform-keyword}) diff --git a/src/metabase/models/revision.clj b/src/metabase/models/revision.clj index 0a021049ec461e6edb33611bff37261eb60ebd71..36d486180fd6e0b15368af3cd64d19373caa6d7a 100644 --- a/src/metabase/models/revision.clj +++ b/src/metabase/models/revision.clj @@ -76,7 +76,8 @@ (methodical/defmethod t2/table-name :model/Revision [_model] :revision) (doto :model/Revision - (derive :metabase/model)) + (derive :metabase/model) + (derive :hook/search-index)) (t2/deftransforms :model/Revision {:object mi/transform-json}) diff --git a/src/metabase/search/postgres/ingestion.clj b/src/metabase/search/postgres/ingestion.clj index 2c7dc309a9224abd656646607b148ce4dd711198..4577838fac72a5e8194f718993187529d5d81ab7 100644 --- a/src/metabase/search/postgres/ingestion.clj +++ b/src/metabase/search/postgres/ingestion.clj @@ -108,8 +108,8 @@ (defn update-index! "Given a new or updated instance, create or update all the corresponding search entries if needed." - [instance] - (when-let [updates (seq (search.spec/search-models-to-update instance))] + [instance & [always?]] + (when-let [updates (seq (search.spec/search-models-to-update instance always?))] ;; We need to delay execution to handle deletes, which alert us *before* updating the database. ;; TODO It's dangerous to simply unleash threads on the world, this should use a queue in future. (run-on-thread diff --git a/src/metabase/search/spec.clj b/src/metabase/search/spec.clj index 027cd158149d732b80377203e1d5ec530717f0d3..a37956d5899038ff520c132d815346f6f4082f11 100644 --- a/src/metabase/search/spec.clj +++ b/src/metabase/search/spec.clj @@ -246,6 +246,7 @@ (assoc :name ~search-model) (update :attrs #(merge ~default-attrs %)))] (validate-spec! spec#) + (derive (:model spec#) :hook/search-index) (defmethod spec ~search-model [~'_] spec#))) ;; TODO we should memoize this for production (based on spec values) @@ -258,12 +259,16 @@ (defn search-models-to-update "Given an updated or created instance, return a description of which search-models to (re)index." - [instance] + [instance & [always?]] (into #{} (keep (fn [{:keys [search-model fields where]}] - ;; If there are no changes, treat it as if everything has changed. - ;; Likewise, if there are no field dependencies, always do it - this is a hack for dashcards to cards. - (when (or (not fields) (some fields (keys (or (t2/changes instance) instance)))) + (when (or always? (and fields (some fields (keys (or (t2/changes instance) instance))))) [search-model (insert-values where :updated instance)]))) (get (model-hooks) (t2/model instance)))) + +(comment + (doseq [d (descendants :hook/search-index)] + (underive d :hook/search-index)) + (doseq [d (keys (model-hooks))] + (derive d :hook/search-index))) diff --git a/test/metabase/models/dashboard_card_test.clj b/test/metabase/models/dashboard_card_test.clj index 1e6ecf4c94ad2af144d6c77fbb3f2418ba9ed589..4ef178c28d4cc756082d65a534c97d556f47f17c 100644 --- a/test/metabase/models/dashboard_card_test.clj +++ b/test/metabase/models/dashboard_card_test.clj @@ -248,7 +248,7 @@ :size_x 3 :size_y 4 :series []}]) - ;; this is usually 10 but it can be 11 sometimes in CI for some reason + ;; this is usually 10, but it can be 11 sometimes in CI for some reason (is (contains? #{10 11} (call-count))))))))) (deftest normalize-parameter-mappings-test diff --git a/test/metabase/search/postgres/index_test.clj b/test/metabase/search/postgres/index_test.clj index c06232ba1a403d35dce3a3d08d0e2c76924d3576..408431d5b3b09fa5a3ce0aef7085071c6d948737 100644 --- a/test/metabase/search/postgres/index_test.clj +++ b/test/metabase/search/postgres/index_test.clj @@ -56,42 +56,35 @@ (deftest incremental-update-test (with-index - (testing "The index is updated when models change" - ;; Has a second entry is "Revenue Project(ions)", when using English dictionary - (is (= 1 #_2 (count (search.index/search "Projected Revenue")))) - (is (= 0 (count (search.index/search "Protected Avenue")))) - - (t2/update! :model/Card {:name "Projected Revenue"} {:name "Protected Avenue"}) - ;; TODO wire up an actual hook - (search.ingestion/update-index! (t2/select-one :model/Card :name "Protected Avenue")) - - ;; wait for the background thread - (is (= 0 #_1 (count (search.index/search "Projected Revenue")))) - (is (= 1 (count (search.index/search "Protected Avenue")))) - - ;; TODO wire up the actual hook, and actually delete it - (search.ingestion/delete-model! (t2/select-one :model/Card :name "Protected Avenue")) - - (is (= 0 #_1 (count (search.index/search "Projected Revenue")))) - (is (= 0 (count (search.index/search "Protected Avenue"))))))) + (mt/with-temporary-setting-values [experimental-search-index-realtime-updates true] + (testing "The index is updated when models change" + ;; Has a second entry is "Revenue Project(ions)", when using English dictionary + (is (= 1 #_2 (count (search.index/search "Projected Revenue")))) + (is (= 0 (count (search.index/search "Protected Avenue")))) + (t2/update! :model/Card {:name "Projected Revenue"} {:name "Protected Avenue"}) + (is (= 0 #_1 (count (search.index/search "Projected Revenue")))) + (is (= 1 (count (search.index/search "Protected Avenue")))) + ;; Delete hooks are disabled, for now, over performance concerns. + ;(t2/delete! :model/Card :name "Protected Avenue") + (search.ingestion/delete-model! (t2/select-one :model/Card :name "Protected Avenue")) + (is (= 0 #_1 (count (search.index/search "Projected Revenue")))) + (is (= 0 (count (search.index/search "Protected Avenue")))))))) (deftest related-update-test (with-index - (testing "The index is updated when model dependencies change" - (let [index-table @#'search.index/active-table - table-id (t2/select-one-pk :model/Table :name "Indexed Table") - legacy-input #(-> (t2/select-one [index-table :legacy_input] :model "table" :model_id table-id) - :legacy_input - (json/parse-string true)) - db-id (t2/select-one-fn :db_id :model/Table table-id) - db-name-fn (comp :database_name legacy-input) - alternate-name (str (random-uuid))] - - (t2/update! :model/Database db-id {:name alternate-name}) - ;; TODO wire up an actual hook - (search.ingestion/update-index! (t2/select-one :model/Database :id db-id)) - - (is (= alternate-name (db-name-fn))))))) + (mt/with-temporary-setting-values [experimental-search-index-realtime-updates true] + (testing "The index is updated when model dependencies change" + (let [index-table @#'search.index/active-table + table-id (t2/select-one-pk :model/Table :name "Indexed Table") + legacy-input #(-> (t2/select-one [index-table :legacy_input] :model "table" :model_id table-id) + :legacy_input + (json/parse-string true)) + db-id (t2/select-one-fn :db_id :model/Table table-id) + db-name-fn (comp :database_name legacy-input) + alternate-name (str (random-uuid))] + (is (= "Indexed Database" (db-name-fn))) + (t2/update! :model/Database db-id {:name alternate-name}) + (is (= alternate-name (db-name-fn)))))))) (deftest consistent-subset-test (with-index diff --git a/test/metabase/search/spec_test.clj b/test/metabase/search/spec_test.clj index 047852a1893ea2c0596cea48d3a1b0972604fdab..9d37141b4756b8842d8c6d7defdca8b6ff33152e 100644 --- a/test/metabase/search/spec_test.clj +++ b/test/metabase/search/spec_test.clj @@ -89,9 +89,10 @@ [:= :updated.moderated_item_type "card"] [:= :updated.moderated_item_id :this.id] [:= :updated.most_recent true]]}} - :DashboardCard #{{:search-model "card" - :fields nil - :where [:= :updated.card_id :this.id]}}} + ;; Disabled for performance reasons, see spec for :model/Card + #_#_:DashboardCard #{{:search-model "card" + :fields nil + :where [:= :updated.card_id :this.id]}}} (#'search.spec/search-model-hooks (search.spec/spec "card")))) (is (= #:model{:Table #{{:search-model "segment", @@ -124,3 +125,15 @@ (is (= #{["segment" [:= 321 :this.table_id]] ["table" [:= 321 :this.id]]} (search.spec/search-models-to-update (t2/instance :model/Table {:id 321 :name "turn-tables"}))))) + +(deftest search-index-model-test + (testing "All the required models descend from :hook/search-index\n" + (let [expected-models (keys (search.spec/model-hooks)) + ;; Some models have submodels, so absorb those too + expected-models (into (set expected-models) (mapcat descendants) expected-models) + actual-models (set (descendants :hook/search-index))] + (doseq [em (sort-by name expected-models)] + (testing (str "- " em) + (is (actual-models em)))) + (testing "... and nothing else does" + (is (empty? (sort-by name (remove expected-models actual-models))))))))