Skip to content
Snippets Groups Projects
Unverified Commit db0f9119 authored by Chris Truter's avatar Chris Truter Committed by GitHub
Browse files

Keep search index updated (#49666)

parent 00dd363b
No related branches found
No related tags found
No related merge requests found
(ns metabase.models
;; metabase.search.postgres.ingestion has not been exposed publicly yet, it needs a higher level API
#_{:clj-kondo/ignore [:metabase/ns-module-checker]}
(:require
[clojure.string :as str]
[environ.core :as env]
[metabase.config :as config]
[metabase.models.action :as action]
[metabase.models.application-permissions-revision :as a-perm-revision]
[metabase.models.bookmark :as bookmark]
......@@ -54,9 +59,12 @@
[metabase.models.view-log :as view-log]
[metabase.plugins.classloader :as classloader]
[metabase.public-settings.premium-features :refer [defenterprise]]
[metabase.search :as search]
[metabase.search.postgres.ingestion :as search.ingestion]
[metabase.util :as u]
[methodical.core :as methodical]
[potemkin :as p]
[toucan2.core :as t2]
[toucan2.model :as t2.model]))
;; Fool the linter
......@@ -192,3 +200,37 @@
(when (isa? metabase-models-keyword :metabase/model)
metabase-models-keyword)))
(next-method symb)))
;; Models must derive from :hook/search-index if their state can influence the contents of the Search Index.
;; Note that it might not be the model itself that depends on it, for example, Dashcards are used in Card entries.
;; Don't worry about whether you've added it in the right place, we have tests to ensure that it is derived if, and only
;; if, it is required.
(t2/define-after-insert :hook/search-index
[instance]
(when (search/supports-index?)
(search.ingestion/update-index! instance true))
instance)
;; Hidden behind an obscure environment variable, as it may cause performance problems.
;; See https://github.com/camsaul/toucan2/issues/195
(defn- update-hook-enabled? []
(or config/is-dev?
config/is-test?
(when-let [setting (:mb-experimental-search-index-realtime-updates env/env)]
(and (not (str/blank? setting))
(not= "false" setting)))))
(when (update-hook-enabled?)
(t2/define-after-update :hook/search-index
[instance]
(when (search/supports-index?)
(search.ingestion/update-index! instance))
nil))
;; Too much of a performance risk.
#_(t2/define-before-delete :metabase/model
[instance]
(when (search/supports-index?)
(search.ingestion/update-index! instance))
instance)
......@@ -39,6 +39,8 @@
(doseq [model action-sub-models]
(derive model :metabase/model))
(derive :model/QueryAction :hook/search-index)
(methodical/defmethod t2/primary-keys :model/QueryAction [_model] [:action_id])
(methodical/defmethod t2/primary-keys :model/HTTPAction [_model] [:action_id])
(methodical/defmethod t2/primary-keys :model/ImplicitAction [_model] [:action_id])
......
......@@ -1052,8 +1052,12 @@
[:= :mr.moderated_item_type "card"]
[:= :mr.moderated_item_id :this.id]
[:= :mr.most_recent true]]]
;; workaround for dataflow :((((((
:dashcard [:model/DashboardCard [:= :dashcard.card_id :this.id]]}})
;; Workaround for dataflow :((((((
;; NOTE: disabled for now, as this is not a very important ranker and can afford to have stale data,
;; and could cause a large increase in the query count for dashboard updates.
;; (see the test failures when this hook is added back)
;:dashcard [:model/DashboardCard [:= :dashcard.card_id :this.id]]
}})
(search/define-spec "card"
(-> base-search-spec (sql.helpers/where [:= :this.type "question"])))
......
......@@ -27,7 +27,9 @@
(derive ::mi/read-policy.full-perms-for-perms-set)
(derive ::mi/write-policy.full-perms-for-perms-set)
(derive :hook/timestamped?)
(derive :hook/entity-id))
(derive :hook/entity-id)
;; Disabled for performance reasons, see update-dashboard-card!-call-count-test
#_(derive :hook/search-index))
(t2/deftransforms :model/DashboardCard
{:parameter_mappings mi/transform-parameters-list
......
......@@ -36,6 +36,8 @@
(derive :model/ModelIndexValue :metabase/model)
(derive :model/ModelIndex :hook/created-at-timestamped?)
(derive :model/ModelIndex :hook/search-index)
(derive :model/ModelIndexValue :hook/search-index)
(t2/deftransforms ModelIndex
{:pk_ref mi/transform-field-ref
......
......@@ -41,7 +41,8 @@
(derive :metabase/model)
;;; TODO: this is wrong, but what should it be?
(derive ::perms/use-parent-collection-perms)
(derive :hook/timestamped?))
(derive :hook/timestamped?)
(derive :hook/search-index))
(t2/deftransforms :model/ModerationReview
{:moderated_item_type mi/transform-keyword})
......
......@@ -76,7 +76,8 @@
(methodical/defmethod t2/table-name :model/Revision [_model] :revision)
(doto :model/Revision
(derive :metabase/model))
(derive :metabase/model)
(derive :hook/search-index))
(t2/deftransforms :model/Revision
{:object mi/transform-json})
......
......@@ -108,8 +108,8 @@
(defn update-index!
"Given a new or updated instance, create or update all the corresponding search entries if needed."
[instance]
(when-let [updates (seq (search.spec/search-models-to-update instance))]
[instance & [always?]]
(when-let [updates (seq (search.spec/search-models-to-update instance always?))]
;; We need to delay execution to handle deletes, which alert us *before* updating the database.
;; TODO It's dangerous to simply unleash threads on the world, this should use a queue in future.
(run-on-thread
......
......@@ -246,6 +246,7 @@
(assoc :name ~search-model)
(update :attrs #(merge ~default-attrs %)))]
(validate-spec! spec#)
(derive (:model spec#) :hook/search-index)
(defmethod spec ~search-model [~'_] spec#)))
;; TODO we should memoize this for production (based on spec values)
......@@ -258,12 +259,16 @@
(defn search-models-to-update
"Given an updated or created instance, return a description of which search-models to (re)index."
[instance]
[instance & [always?]]
(into #{}
(keep
(fn [{:keys [search-model fields where]}]
;; If there are no changes, treat it as if everything has changed.
;; Likewise, if there are no field dependencies, always do it - this is a hack for dashcards to cards.
(when (or (not fields) (some fields (keys (or (t2/changes instance) instance))))
(when (or always? (and fields (some fields (keys (or (t2/changes instance) instance)))))
[search-model (insert-values where :updated instance)])))
(get (model-hooks) (t2/model instance))))
(comment
(doseq [d (descendants :hook/search-index)]
(underive d :hook/search-index))
(doseq [d (keys (model-hooks))]
(derive d :hook/search-index)))
......@@ -248,7 +248,7 @@
:size_x 3
:size_y 4
:series []}])
;; this is usually 10 but it can be 11 sometimes in CI for some reason
;; this is usually 10, but it can be 11 sometimes in CI for some reason
(is (contains? #{10 11} (call-count)))))))))
(deftest normalize-parameter-mappings-test
......
......@@ -56,42 +56,35 @@
(deftest incremental-update-test
(with-index
(testing "The index is updated when models change"
;; Has a second entry is "Revenue Project(ions)", when using English dictionary
(is (= 1 #_2 (count (search.index/search "Projected Revenue"))))
(is (= 0 (count (search.index/search "Protected Avenue"))))
(t2/update! :model/Card {:name "Projected Revenue"} {:name "Protected Avenue"})
;; TODO wire up an actual hook
(search.ingestion/update-index! (t2/select-one :model/Card :name "Protected Avenue"))
;; wait for the background thread
(is (= 0 #_1 (count (search.index/search "Projected Revenue"))))
(is (= 1 (count (search.index/search "Protected Avenue"))))
;; TODO wire up the actual hook, and actually delete it
(search.ingestion/delete-model! (t2/select-one :model/Card :name "Protected Avenue"))
(is (= 0 #_1 (count (search.index/search "Projected Revenue"))))
(is (= 0 (count (search.index/search "Protected Avenue")))))))
(mt/with-temporary-setting-values [experimental-search-index-realtime-updates true]
(testing "The index is updated when models change"
;; Has a second entry is "Revenue Project(ions)", when using English dictionary
(is (= 1 #_2 (count (search.index/search "Projected Revenue"))))
(is (= 0 (count (search.index/search "Protected Avenue"))))
(t2/update! :model/Card {:name "Projected Revenue"} {:name "Protected Avenue"})
(is (= 0 #_1 (count (search.index/search "Projected Revenue"))))
(is (= 1 (count (search.index/search "Protected Avenue"))))
;; Delete hooks are disabled, for now, over performance concerns.
;(t2/delete! :model/Card :name "Protected Avenue")
(search.ingestion/delete-model! (t2/select-one :model/Card :name "Protected Avenue"))
(is (= 0 #_1 (count (search.index/search "Projected Revenue"))))
(is (= 0 (count (search.index/search "Protected Avenue"))))))))
(deftest related-update-test
(with-index
(testing "The index is updated when model dependencies change"
(let [index-table @#'search.index/active-table
table-id (t2/select-one-pk :model/Table :name "Indexed Table")
legacy-input #(-> (t2/select-one [index-table :legacy_input] :model "table" :model_id table-id)
:legacy_input
(json/parse-string true))
db-id (t2/select-one-fn :db_id :model/Table table-id)
db-name-fn (comp :database_name legacy-input)
alternate-name (str (random-uuid))]
(t2/update! :model/Database db-id {:name alternate-name})
;; TODO wire up an actual hook
(search.ingestion/update-index! (t2/select-one :model/Database :id db-id))
(is (= alternate-name (db-name-fn)))))))
(mt/with-temporary-setting-values [experimental-search-index-realtime-updates true]
(testing "The index is updated when model dependencies change"
(let [index-table @#'search.index/active-table
table-id (t2/select-one-pk :model/Table :name "Indexed Table")
legacy-input #(-> (t2/select-one [index-table :legacy_input] :model "table" :model_id table-id)
:legacy_input
(json/parse-string true))
db-id (t2/select-one-fn :db_id :model/Table table-id)
db-name-fn (comp :database_name legacy-input)
alternate-name (str (random-uuid))]
(is (= "Indexed Database" (db-name-fn)))
(t2/update! :model/Database db-id {:name alternate-name})
(is (= alternate-name (db-name-fn))))))))
(deftest consistent-subset-test
(with-index
......
......@@ -89,9 +89,10 @@
[:= :updated.moderated_item_type "card"]
[:= :updated.moderated_item_id :this.id]
[:= :updated.most_recent true]]}}
:DashboardCard #{{:search-model "card"
:fields nil
:where [:= :updated.card_id :this.id]}}}
;; Disabled for performance reasons, see spec for :model/Card
#_#_:DashboardCard #{{:search-model "card"
:fields nil
:where [:= :updated.card_id :this.id]}}}
(#'search.spec/search-model-hooks (search.spec/spec "card"))))
(is (= #:model{:Table #{{:search-model "segment",
......@@ -124,3 +125,15 @@
(is (= #{["segment" [:= 321 :this.table_id]]
["table" [:= 321 :this.id]]}
(search.spec/search-models-to-update (t2/instance :model/Table {:id 321 :name "turn-tables"})))))
(deftest search-index-model-test
(testing "All the required models descend from :hook/search-index\n"
(let [expected-models (keys (search.spec/model-hooks))
;; Some models have submodels, so absorb those too
expected-models (into (set expected-models) (mapcat descendants) expected-models)
actual-models (set (descendants :hook/search-index))]
(doseq [em (sort-by name expected-models)]
(testing (str "- " em)
(is (actual-models em))))
(testing "... and nothing else does"
(is (empty? (sort-by name (remove expected-models actual-models))))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment