From e150fafa857ce1fd7af5406b7c6f1c5bbaefb196 Mon Sep 17 00:00:00 2001 From: Chris Truter <crisptrutski@users.noreply.github.com> Date: Wed, 13 Nov 2024 14:56:57 +0200 Subject: [PATCH] Basic API to twiddle with search weights (#49866) --- .../metabase_enterprise/search/scoring.clj | 2 +- src/metabase/api/search.clj | 41 ++++++++++++--- src/metabase/models/model_index.clj | 9 ++-- src/metabase/public_settings.clj | 9 ++++ src/metabase/search/config.clj | 13 ++++- src/metabase/search/postgres/scoring.clj | 2 +- test/metabase/api/search_test.clj | 18 +++++++ test/metabase/search/postgres/index_test.clj | 50 +++++++++---------- test/metabase/search/spec_test.clj | 3 +- 9 files changed, 106 insertions(+), 41 deletions(-) diff --git a/enterprise/backend/src/metabase_enterprise/search/scoring.clj b/enterprise/backend/src/metabase_enterprise/search/scoring.clj index 5d5f3dd201b..b208adb4377 100644 --- a/enterprise/backend/src/metabase_enterprise/search/scoring.clj +++ b/enterprise/backend/src/metabase_enterprise/search/scoring.clj @@ -56,7 +56,7 @@ {:name (legacy-name k) :score (let [f (get legacy-scorers k)] (f result)) - :weight (search.config/weights k)}) + :weight (search.config/weight k)}) (defenterprise score-result "Scoring implementation that adds score for items in official collections." diff --git a/src/metabase/api/search.clj b/src/metabase/api/search.clj index e08d93708ef..d6a71f85f85 100644 --- a/src/metabase/api/search.clj +++ b/src/metabase/api/search.clj @@ -1,10 +1,14 @@ (ns metabase.api.search + ;; Allowing search.config to be accessed for developer API to set weights + #_{:clj-kondo/ignore [:metabase/ns-module-checker]} (:require + [clojure.string :as str] [compojure.core :refer [GET]] [java-time.api :as t] [metabase.api.common :as api] [metabase.public-settings :as public-settings] [metabase.search :as search] + [metabase.search.config :as search.config] [metabase.server.middleware.offset-paging :as mw.offset-paging] [metabase.task :as task] [metabase.task.search-index :as task.search-index] @@ -46,17 +50,40 @@ (api/check-superuser) (cond (not (public-settings/experimental-fulltext-search-enabled)) - {:status-code 501, :message "Search index is not enabled."} + (throw (ex-info "Search index is not enabled." {:status-code 501})) (search/supports-index?) - (do - (if (task/job-exists? task.search-index/job-key) - (task/trigger-now! task.search-index/job-key) - (search/reindex!)) - {:status-code 200}) + (if (task/job-exists? task.search-index/job-key) + (do (task/trigger-now! task.search-index/job-key) {:message "task triggered"}) + (do (search/reindex!) {:message "done"})) :else - {:status-code 501, :message "Search index is not supported for this installation."})) + (throw (ex-info "Search index is not supported for this installation." {:status-code 501})))) + +(defn- set-weights! [overrides] + (api/check-superuser) + (let [allowed-key? (set (keys @#'search.config/default-weights)) + unknown-weights (seq (remove allowed-key? (keys overrides)))] + (when unknown-weights + (throw (ex-info (str "Unknown weights: " (str/join ", " (map name (sort unknown-weights)))) + {:status-code 400}))) + (public-settings/experimental-search-weight-overrides! + (merge (public-settings/experimental-search-weight-overrides) overrides)) + (search.config/weights))) + +(api/defendpoint GET "/weights" + "Return the current weights being used to rank the search results" + [:as {overrides :params}] + ;; remove cookie + (let [overrides (-> overrides (dissoc :search_engine) (update-vals parse-double))] + (if (seq overrides) + (set-weights! overrides) + (search.config/weights)))) + +(api/defendpoint PUT "/weights" + "Return the current weights being used to rank the search results" + [:as {overrides :body}] + (set-weights! overrides)) (api/defendpoint GET "/" "Search for items in Metabase. diff --git a/src/metabase/models/model_index.clj b/src/metabase/models/model_index.clj index fcd5118d84c..e51258ce03d 100644 --- a/src/metabase/models/model_index.clj +++ b/src/metabase/models/model_index.clj @@ -36,8 +36,8 @@ (derive :model/ModelIndexValue :metabase/model) (derive :model/ModelIndex :hook/created-at-timestamped?) -(derive :model/ModelIndex :hook/search-index) -(derive :model/ModelIndexValue :hook/search-index) +;; TODO disabled due to issues having an update hook causes, seemingly due to a toucan2 bug +#_(derive :model/ModelIndex :hook/search-index) (t2/deftransforms ModelIndex {:pk_ref mi/transform-field-ref @@ -190,7 +190,7 @@ :attrs {:id :model_pk :collection-id :collection.id :creator-id false - ;; this seems wrong, I'd expect it to track whether the model is archived. + ;; this seems wrong, I'd expect it to track whether the model is archived. :archived false :database-id :model.database_id :created-at false @@ -205,3 +205,6 @@ :joins {:model_index [:model/ModelIndex [:= :model_index.id :this.model_index_id]] :model [:model/Card [:= :model.id :model_index.model_id]] :collection [:model/Collection [:= :collection.id :model.collection_id]]}}) + +;; TODO resolve the toucan2 issue preventing us from using this hook +(underive :model/ModelIndexValue :hook/search-index) diff --git a/src/metabase/public_settings.clj b/src/metabase/public_settings.clj index 331f7033af2..4a967a0001e 100644 --- a/src/metabase/public_settings.clj +++ b/src/metabase/public_settings.clj @@ -1049,6 +1049,15 @@ See [fonts](../configuring-metabase/fonts.md).") :default false :type :boolean) +(defsetting experimental-search-weight-overrides + (deferred-tru "Used to override weights used for search ranking") + :visibility :internal + :cache? false + :encryption :no + :export? false + :default nil + :type :json) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Deprecated uploads settings begin ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/src/metabase/search/config.clj b/src/metabase/search/config.clj index a60d4bcc14f..6d27c19f0c2 100644 --- a/src/metabase/search/config.clj +++ b/src/metabase/search/config.clj @@ -79,8 +79,7 @@ (assert (= all-models (set models-search-order)) "The models search order has to include all models") -(def weights - "Strength of the various scorers. Copied from metabase.search.in-place.scoring, but allowing divergence." +(def ^:private default-weights {:pinned 2 :bookmarked 2 :recency 1.5 @@ -91,6 +90,16 @@ :view-count 2 :text 10}) +(defn weights + "Strength of the various scorers. Copied from metabase.search.in-place.scoring, but allowing divergence." + [] + (merge default-weights (public-settings/experimental-search-weight-overrides))) + +(defn weight + "The relative strength the corresponding score has in influencing the total score." + [scorer-key] + (get (weights) scorer-key 0)) + (defn model->alias "Given a model string returns the model alias" [model] diff --git a/src/metabase/search/postgres/scoring.clj b/src/metabase/search/postgres/scoring.clj index e64fd7ab599..1d4f196efc8 100644 --- a/src/metabase/search/postgres/scoring.clj +++ b/src/metabase/search/postgres/scoring.clj @@ -53,7 +53,7 @@ [:inline 1])) (defn- weighted-score [[column-alias expr]] - [:* [:inline (search.config/weights column-alias 0)] expr]) + [:* [:inline (search.config/weight column-alias)] expr]) (defn- select-items [scorers] (concat diff --git a/test/metabase/api/search_test.clj b/test/metabase/api/search_test.clj index 7e0f89cc5e5..12919189696 100644 --- a/test/metabase/api/search_test.clj +++ b/test/metabase/api/search_test.clj @@ -19,6 +19,7 @@ [metabase.models.permissions :as perms] [metabase.models.permissions-group :as perms-group] [metabase.models.revision :as revision] + [metabase.public-settings :as public-settings] [metabase.public-settings.premium-features :as premium-features] [metabase.search :as search] [metabase.search.config :as search.config] @@ -1589,3 +1590,20 @@ (when (pos? attempts-left) (Thread/sleep 200) (recur (dec attempts-left)))))))))) + +(deftest weights-test + (let [original-weights (search.config/weights) + original-overrides (public-settings/experimental-search-weight-overrides)] + (try + (is (= original-weights (mt/user-http-request :crowberto :get 200 "search/weights"))) + (is (mt/user-http-request :rasta :put 403 "search/weights")) + (is (= original-weights (mt/user-http-request :crowberto :put 200 "search/weights"))) + (is (= (assoc original-weights :recency 4 :text 20) + (mt/user-http-request :crowberto :put 200 "search/weights" {:recency 4, :text 20}))) + (is (= (assoc original-weights :recency 4 :text 30.0) + (mt/user-http-request :crowberto :get 200 "search/weights?text=30"))) + (is (mt/user-http-request :crowberto :put 400 "search/weights" {:bad-spelling 2})) + (is (= (assoc original-weights :recency 4 :text 30.0) + (mt/user-http-request :crowberto :get 200 "search/weights"))) + (finally + (public-settings/experimental-search-weight-overrides! original-overrides))))) diff --git a/test/metabase/search/postgres/index_test.clj b/test/metabase/search/postgres/index_test.clj index b916f8336c2..de18f28e8cf 100644 --- a/test/metabase/search/postgres/index_test.clj +++ b/test/metabase/search/postgres/index_test.clj @@ -56,35 +56,33 @@ (deftest incremental-update-test (with-index - (mt/with-temporary-setting-values [experimental-search-index-realtime-updates true] - (testing "The index is updated when models change" - ;; Has a second entry is "Revenue Project(ions)", when using English dictionary - (is (= 1 #_2 (count (search.index/search "Projected Revenue")))) - (is (= 0 (count (search.index/search "Protected Avenue")))) - (t2/update! :model/Card {:name "Projected Revenue"} {:name "Protected Avenue"}) - (is (= 0 #_1 (count (search.index/search "Projected Revenue")))) - (is (= 1 (count (search.index/search "Protected Avenue")))) - ;; Delete hooks are disabled, for now, over performance concerns. - ;(t2/delete! :model/Card :name "Protected Avenue") - (search.ingestion/delete-model! (t2/select-one :model/Card :name "Protected Avenue")) - (is (= 0 #_1 (count (search.index/search "Projected Revenue")))) - (is (= 0 (count (search.index/search "Protected Avenue")))))))) + (testing "The index is updated when models change" + ;; Has a second entry is "Revenue Project(ions)", when using English dictionary + (is (= 1 #_2 (count (search.index/search "Projected Revenue")))) + (is (= 0 (count (search.index/search "Protected Avenue")))) + (t2/update! :model/Card {:name "Projected Revenue"} {:name "Protected Avenue"}) + (is (= 0 #_1 (count (search.index/search "Projected Revenue")))) + (is (= 1 (count (search.index/search "Protected Avenue")))) + ;; Delete hooks are disabled, for now, over performance concerns. + ;(t2/delete! :model/Card :name "Protected Avenue") + (search.ingestion/delete-model! (t2/select-one :model/Card :name "Protected Avenue")) + (is (= 0 #_1 (count (search.index/search "Projected Revenue")))) + (is (= 0 (count (search.index/search "Protected Avenue"))))))) (deftest related-update-test (with-index - (mt/with-temporary-setting-values [experimental-search-index-realtime-updates true] - (testing "The index is updated when model dependencies change" - (let [index-table @#'search.index/active-table - table-id (t2/select-one-pk :model/Table :name "Indexed Table") - legacy-input #(-> (t2/select-one [index-table :legacy_input] :model "table" :model_id table-id) - :legacy_input - (json/parse-string true)) - db-id (t2/select-one-fn :db_id :model/Table table-id) - db-name-fn (comp :database_name legacy-input) - alternate-name (str (random-uuid))] - (is (= "Indexed Database" (db-name-fn))) - (t2/update! :model/Database db-id {:name alternate-name}) - (is (= alternate-name (db-name-fn)))))))) + (testing "The index is updated when model dependencies change" + (let [index-table @#'search.index/active-table + table-id (t2/select-one-pk :model/Table :name "Indexed Table") + legacy-input #(-> (t2/select-one [index-table :legacy_input] :model "table" :model_id table-id) + :legacy_input + (json/parse-string true)) + db-id (t2/select-one-fn :db_id :model/Table table-id) + db-name-fn (comp :database_name legacy-input) + alternate-name (str (random-uuid))] + (is (= "Indexed Database" (db-name-fn))) + (t2/update! :model/Database db-id {:name alternate-name}) + (is (= alternate-name (db-name-fn))))))) (deftest consistent-subset-test (with-index diff --git a/test/metabase/search/spec_test.clj b/test/metabase/search/spec_test.clj index 9d37141b475..5a02dc16ddc 100644 --- a/test/metabase/search/spec_test.clj +++ b/test/metabase/search/spec_test.clj @@ -128,7 +128,8 @@ (deftest search-index-model-test (testing "All the required models descend from :hook/search-index\n" - (let [expected-models (keys (search.spec/model-hooks)) + ;; TODO restore hooks to ModelIndex when toucan issue is resolved + (let [expected-models (keys (dissoc (search.spec/model-hooks) :model/ModelIndex :model/ModelIndexValue)) ;; Some models have submodels, so absorb those too expected-models (into (set expected-models) (mapcat descendants) expected-models) actual-models (set (descendants :hook/search-index))] -- GitLab