Skip to content
Snippets Groups Projects
Unverified Commit 017fbe9a authored by github-automation-metabase's avatar github-automation-metabase Committed by GitHub
Browse files

Extra Search Rankers for exact matches and things you own (#50684) (#50695)

parent a784851a
No related branches found
No related tags found
No related merge requests found
(ns metabase.search.appdb.scoring
(:require
[clojure.core.memoize :as memoize]
[clojure.string :as str]
[honey.sql.helpers :as sql.helpers]
[metabase.config :as config]
[metabase.public-settings.premium-features :refer [defenterprise]]
......@@ -16,6 +17,16 @@
[column]
[:coalesce [:cast column :integer] [:inline 0]])
(defn equal
"Prefer it when it matches a specific (non-null) value"
[column value]
[:coalesce [:case [:= column value] [:inline 1] :else [:inline 0]] [:inline 0]])
(defn prefix
"Prefer it when the given value is a completion of a specific (non-null) value"
[column value]
[:coalesce [:case [:like column (str (str/replace value "%" "%%") "%")] [:inline 1] :else [:inline 0]] [:inline 0]])
(defn size
"Prefer items whose value is larger, up to some saturation point. Items beyond that point are equivalent."
[column ceiling]
......@@ -145,6 +156,8 @@
(defn base-scorers
"The default constituents of the search ranking scores."
[search-ctx]
;; NOTE: we calculate scores even if the weight is zero, so that it's easy to consider how we could affect any
;; given set of results. At some point, we should optimize away the irrelevant scores for any given context.
{:text [:ts_rank :search_vector :query [:inline ts-rank-normalization]]
:view-count (view-count-expr search.config/view-count-scaling-percentile)
:pinned (truthy :pinned)
......@@ -152,7 +165,10 @@
:recency (inverse-duration [:coalesce :last_viewed_at :model_updated_at] [:now] search.config/stale-time-in-days)
:user-recency (inverse-duration (user-recency-expr search-ctx) [:now] search.config/stale-time-in-days)
:dashboard (size :dashboardcard_count search.config/dashboard-count-ceiling)
:model (model-rank-exp search-ctx)})
:model (model-rank-exp search-ctx)
:mine (equal :search_index.creator_id (:current-user-id search-ctx))
:exact (equal [:lower :search_index.name] [:lower (:search-string search-ctx)])
:prefix (prefix [:lower :search_index.name] (u/lower-case-en (:search-string search-ctx)))})
(defenterprise scorers
"Return the select-item expressions used to calculate the score for each search result."
......
......@@ -92,9 +92,13 @@
:official-collection 1
:verified 1
:view-count 2
:text 5}
:text 5
:mine 1
:exact 5
:prefix 0}
:command-palette
{:model/collection 1
{:prefix 5
:model/collection 1
:model/dashboard 1
:model/metric 1
:model/dataset 0.8
......
......@@ -49,6 +49,12 @@
"sanity check: search-no-weights should be different")
result))
(defn indifferent?
"Check that the results and their order do not depend on the given ranker."
[ranker-key search-string & {:as raw-ctx}]
(= (with-weights {ranker-key 1} (search-results* search-string raw-ctx))
(with-weights {ranker-key -1} (search-results* search-string raw-ctx))))
;; ---- index-ony rankers ----
;; These are the easiest to test, as they don't depend on other appdb state.
......@@ -73,6 +79,24 @@
["card" 3 "classified"]]
(search-results :text "order"))))))
(deftest ^:parallel exact-test
(with-index-contents
[{:model "card" :id 1 :name "the any most of stop words very"}
{:model "card" :id 2 :name "stop words"}]
(testing "Preferences according to exact name matches, including stop words"
(is (= [["card" 1 "the any most of stop words very"]
["card" 2 "stop words"]]
(search-results :exact "the any most of stop words very"))))))
(deftest ^:parallel prefix-test
(with-index-contents
[{:model "card" :id 1 :name "this is a prefix of something longer"}
{:model "card" :id 2 :name "a prefix this is not, unfortunately"}]
(testing "We can boost exact prefix matches"
(is (= [["card" 1 "this is a prefix of something longer"]
["card" 2 "a prefix this is not, unfortunately"]]
(search-results :prefix "this is a prefix"))))))
(deftest ^:parallel model-test
(with-index-contents
[{:model "dataset" :id 1 :name "card ancient"}
......@@ -159,11 +183,9 @@
(testing "it has a ceiling, more than the ceiling is considered to be equal"
(with-index-contents
[{:model "card" :id 1 :name "card popular" :dashboardcard_count 22}
{:model "card" :id 2 :name "card" :dashboardcard_count 11}]
(is (= [["card" 1 "card popular"]
["card" 2 "card"]]
(search-results* "card"))))))
[{:model "card" :id 1 :name "card popular" :dashboardcard_count 200}
{:model "card" :id 2 :name "card" :dashboardcard_count 201}]
(is (indifferent? :dashboard "card")))))
;; ---- personalized rankers ---
;; These require some related appdb content
......@@ -237,3 +259,15 @@
["card" c1 "card ancient"]
["dataset" c3 "card unseen"]]
(search-results :user-recency "card" {:current-user-id user-id}))))))))
(deftest ^:parallel mine-test
(let [crowberto (mt/user->id :crowberto)
rasta (mt/user->id :rasta)]
(with-index-contents [{:model "card" :id 1 :name "crow's fly card" :creator_id crowberto}
{:model "card" :id 2 :name "this card is aerie mon" :creator_id rasta}]
(is (= [["card" 1 "crow's fly card"]
["card" 2 "this card is aerie mon"]]
(search-results :mine "card" {:current-user-id crowberto})))
(is (= [["card" 2 "this card is aerie mon"]
["card" 1 "crow's fly card"]]
(search-results :mine "card" {:current-user-id rasta}))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment