Skip to content
Snippets Groups Projects
Unverified Commit 017fbe9a authored by github-automation-metabase's avatar github-automation-metabase Committed by GitHub
Browse files

Extra Search Rankers for exact matches and things you own (#50684) (#50695)

parent a784851a
No related branches found
No related tags found
No related merge requests found
(ns metabase.search.appdb.scoring (ns metabase.search.appdb.scoring
(:require (:require
[clojure.core.memoize :as memoize] [clojure.core.memoize :as memoize]
[clojure.string :as str]
[honey.sql.helpers :as sql.helpers] [honey.sql.helpers :as sql.helpers]
[metabase.config :as config] [metabase.config :as config]
[metabase.public-settings.premium-features :refer [defenterprise]] [metabase.public-settings.premium-features :refer [defenterprise]]
...@@ -16,6 +17,16 @@ ...@@ -16,6 +17,16 @@
[column] [column]
[:coalesce [:cast column :integer] [:inline 0]]) [:coalesce [:cast column :integer] [:inline 0]])
(defn equal
"Prefer it when it matches a specific (non-null) value"
[column value]
[:coalesce [:case [:= column value] [:inline 1] :else [:inline 0]] [:inline 0]])
(defn prefix
"Prefer it when the given value is a completion of a specific (non-null) value"
[column value]
[:coalesce [:case [:like column (str (str/replace value "%" "%%") "%")] [:inline 1] :else [:inline 0]] [:inline 0]])
(defn size (defn size
"Prefer items whose value is larger, up to some saturation point. Items beyond that point are equivalent." "Prefer items whose value is larger, up to some saturation point. Items beyond that point are equivalent."
[column ceiling] [column ceiling]
...@@ -145,6 +156,8 @@ ...@@ -145,6 +156,8 @@
(defn base-scorers (defn base-scorers
"The default constituents of the search ranking scores." "The default constituents of the search ranking scores."
[search-ctx] [search-ctx]
;; NOTE: we calculate scores even if the weight is zero, so that it's easy to consider how we could affect any
;; given set of results. At some point, we should optimize away the irrelevant scores for any given context.
{:text [:ts_rank :search_vector :query [:inline ts-rank-normalization]] {:text [:ts_rank :search_vector :query [:inline ts-rank-normalization]]
:view-count (view-count-expr search.config/view-count-scaling-percentile) :view-count (view-count-expr search.config/view-count-scaling-percentile)
:pinned (truthy :pinned) :pinned (truthy :pinned)
...@@ -152,7 +165,10 @@ ...@@ -152,7 +165,10 @@
:recency (inverse-duration [:coalesce :last_viewed_at :model_updated_at] [:now] search.config/stale-time-in-days) :recency (inverse-duration [:coalesce :last_viewed_at :model_updated_at] [:now] search.config/stale-time-in-days)
:user-recency (inverse-duration (user-recency-expr search-ctx) [:now] search.config/stale-time-in-days) :user-recency (inverse-duration (user-recency-expr search-ctx) [:now] search.config/stale-time-in-days)
:dashboard (size :dashboardcard_count search.config/dashboard-count-ceiling) :dashboard (size :dashboardcard_count search.config/dashboard-count-ceiling)
:model (model-rank-exp search-ctx)}) :model (model-rank-exp search-ctx)
:mine (equal :search_index.creator_id (:current-user-id search-ctx))
:exact (equal [:lower :search_index.name] [:lower (:search-string search-ctx)])
:prefix (prefix [:lower :search_index.name] (u/lower-case-en (:search-string search-ctx)))})
(defenterprise scorers (defenterprise scorers
"Return the select-item expressions used to calculate the score for each search result." "Return the select-item expressions used to calculate the score for each search result."
......
...@@ -92,9 +92,13 @@ ...@@ -92,9 +92,13 @@
:official-collection 1 :official-collection 1
:verified 1 :verified 1
:view-count 2 :view-count 2
:text 5} :text 5
:mine 1
:exact 5
:prefix 0}
:command-palette :command-palette
{:model/collection 1 {:prefix 5
:model/collection 1
:model/dashboard 1 :model/dashboard 1
:model/metric 1 :model/metric 1
:model/dataset 0.8 :model/dataset 0.8
......
...@@ -49,6 +49,12 @@ ...@@ -49,6 +49,12 @@
"sanity check: search-no-weights should be different") "sanity check: search-no-weights should be different")
result)) result))
(defn indifferent?
"Check that the results and their order do not depend on the given ranker."
[ranker-key search-string & {:as raw-ctx}]
(= (with-weights {ranker-key 1} (search-results* search-string raw-ctx))
(with-weights {ranker-key -1} (search-results* search-string raw-ctx))))
;; ---- index-ony rankers ---- ;; ---- index-ony rankers ----
;; These are the easiest to test, as they don't depend on other appdb state. ;; These are the easiest to test, as they don't depend on other appdb state.
...@@ -73,6 +79,24 @@ ...@@ -73,6 +79,24 @@
["card" 3 "classified"]] ["card" 3 "classified"]]
(search-results :text "order")))))) (search-results :text "order"))))))
(deftest ^:parallel exact-test
(with-index-contents
[{:model "card" :id 1 :name "the any most of stop words very"}
{:model "card" :id 2 :name "stop words"}]
(testing "Preferences according to exact name matches, including stop words"
(is (= [["card" 1 "the any most of stop words very"]
["card" 2 "stop words"]]
(search-results :exact "the any most of stop words very"))))))
(deftest ^:parallel prefix-test
(with-index-contents
[{:model "card" :id 1 :name "this is a prefix of something longer"}
{:model "card" :id 2 :name "a prefix this is not, unfortunately"}]
(testing "We can boost exact prefix matches"
(is (= [["card" 1 "this is a prefix of something longer"]
["card" 2 "a prefix this is not, unfortunately"]]
(search-results :prefix "this is a prefix"))))))
(deftest ^:parallel model-test (deftest ^:parallel model-test
(with-index-contents (with-index-contents
[{:model "dataset" :id 1 :name "card ancient"} [{:model "dataset" :id 1 :name "card ancient"}
...@@ -159,11 +183,9 @@ ...@@ -159,11 +183,9 @@
(testing "it has a ceiling, more than the ceiling is considered to be equal" (testing "it has a ceiling, more than the ceiling is considered to be equal"
(with-index-contents (with-index-contents
[{:model "card" :id 1 :name "card popular" :dashboardcard_count 22} [{:model "card" :id 1 :name "card popular" :dashboardcard_count 200}
{:model "card" :id 2 :name "card" :dashboardcard_count 11}] {:model "card" :id 2 :name "card" :dashboardcard_count 201}]
(is (= [["card" 1 "card popular"] (is (indifferent? :dashboard "card")))))
["card" 2 "card"]]
(search-results* "card"))))))
;; ---- personalized rankers --- ;; ---- personalized rankers ---
;; These require some related appdb content ;; These require some related appdb content
...@@ -237,3 +259,15 @@ ...@@ -237,3 +259,15 @@
["card" c1 "card ancient"] ["card" c1 "card ancient"]
["dataset" c3 "card unseen"]] ["dataset" c3 "card unseen"]]
(search-results :user-recency "card" {:current-user-id user-id})))))))) (search-results :user-recency "card" {:current-user-id user-id}))))))))
(deftest ^:parallel mine-test
(let [crowberto (mt/user->id :crowberto)
rasta (mt/user->id :rasta)]
(with-index-contents [{:model "card" :id 1 :name "crow's fly card" :creator_id crowberto}
{:model "card" :id 2 :name "this card is aerie mon" :creator_id rasta}]
(is (= [["card" 1 "crow's fly card"]
["card" 2 "this card is aerie mon"]]
(search-results :mine "card" {:current-user-id crowberto})))
(is (= [["card" 2 "this card is aerie mon"]
["card" 1 "crow's fly card"]]
(search-results :mine "card" {:current-user-id rasta}))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment