Extra Search Rankers for exact matches and things you own (#50684) (#50695)

Co-authored-by: Chris Truter <crisptrutski@users.noreply.github.com>

Extra Search Rankers for exact matches and things you own (#50684) (#50695)
017fbe9a · github-automation-metabase · GitHub · a784851a · 017fbe9a · 017fbe9a
Unverified Commit 017fbe9a authored 3 months ago by github-automation-metabase Committed by GitHub 3 months ago
--- a/src/metabase/search/appdb/scoring.clj
+++ b/src/metabase/search/appdb/scoring.clj
 (ns metabase.search.appdb.scoring
  (:require
   [clojure.core.memoize :as memoize]
+   [clojure.string :as str]
   [honey.sql.helpers :as sql.helpers]
   [metabase.config :as config]
   [metabase.public-settings.premium-features :refer [defenterprise]]
@@ -16,6 +17,16 @@
  [column]
  [:coalesce [:cast column :integer] [:inline 0]])
+(defn equal
+  "Prefer it when it matches a specific (non-null) value"
+  [column value]
+  [:coalesce [:case [:= column value] [:inline 1] :else [:inline 0]] [:inline 0]])
+(defn prefix
+  "Prefer it when the given value is a completion of a specific (non-null) value"
+  [column value]
+  [:coalesce [:case [:like column (str (str/replace value "%" "%%") "%")] [:inline 1] :else [:inline 0]] [:inline 0]])
 (defn size
  "Prefer items whose value is larger, up to some saturation point. Items beyond that point are equivalent."
  [column ceiling]
@@ -145,6 +156,8 @@
 (defn base-scorers
  "The default constituents of the search ranking scores."
  [search-ctx]
+  ;; NOTE: we calculate scores even if the weight is zero, so that it's easy to consider how we could affect any
+  ;; given set of results. At some point, we should optimize away the irrelevant scores for any given context.
  {:text         [:ts_rank :search_vector :query [:inline ts-rank-normalization]]
   :view-count   (view-count-expr search.config/view-count-scaling-percentile)
   :pinned       (truthy :pinned)
@@ -152,7 +165,10 @@
   :recency      (inverse-duration [:coalesce :last_viewed_at :model_updated_at] [:now] search.config/stale-time-in-days)
   :user-recency (inverse-duration (user-recency-expr search-ctx) [:now] search.config/stale-time-in-days)
   :dashboard    (size :dashboardcard_count search.config/dashboard-count-ceiling)
-   :model        (model-rank-exp search-ctx)})
+   :model        (model-rank-exp search-ctx)
+   :mine         (equal :search_index.creator_id (:current-user-id search-ctx))
+   :exact        (equal [:lower :search_index.name] [:lower (:search-string search-ctx)])
+   :prefix       (prefix [:lower :search_index.name] (u/lower-case-en (:search-string search-ctx)))})
 (defenterprise scorers
  "Return the select-item expressions used to calculate the score for each search result."

--- a/src/metabase/search/config.clj
+++ b/src/metabase/search/config.clj
@@ -92,9 +92,13 @@
    :official-collection 1
    :verified            1
    :view-count          2
-    :text                5}
+    :text                5
+    :mine                1
+    :exact               5
+    :prefix              0}
   :command-palette
-   {:model/collection     1
+   {:prefix               5
+    :model/collection     1
    :model/dashboard      1
    :model/metric         1
    :model/dataset        0.8

--- a/test/metabase/search/appdb/scoring_test.clj
+++ b/test/metabase/search/appdb/scoring_test.clj
@@ -49,6 +49,12 @@
        "sanity check: search-no-weights should be different")
    result))
+(defn indifferent?
+  "Check that the results and their order do not depend on the given ranker."
+  [ranker-key search-string & {:as raw-ctx}]
+  (= (with-weights {ranker-key 1} (search-results* search-string raw-ctx))
+     (with-weights {ranker-key -1} (search-results* search-string raw-ctx))))
 ;; ---- index-ony rankers ----
 ;; These are the easiest to test, as they don't depend on other appdb state.
@@ -73,6 +79,24 @@
              ["card" 3 "classified"]]
             (search-results :text "order"))))))
+(deftest ^:parallel exact-test
+  (with-index-contents
+    [{:model "card" :id 1 :name "the any most of stop words very"}
+     {:model "card" :id 2 :name "stop words"}]
+    (testing "Preferences according to exact name matches, including stop words"
+      (is (= [["card" 1 "the any most of stop words very"]
+              ["card" 2 "stop words"]]
+             (search-results :exact "the any most of stop words very"))))))
+(deftest ^:parallel prefix-test
+  (with-index-contents
+    [{:model "card" :id 1 :name "this is a prefix of something longer"}
+     {:model "card" :id 2 :name "a prefix this is not, unfortunately"}]
+    (testing "We can boost exact prefix matches"
+      (is (= [["card" 1 "this is a prefix of something longer"]
+              ["card" 2 "a prefix this is not, unfortunately"]]
+             (search-results :prefix "this is a prefix"))))))
 (deftest ^:parallel model-test
  (with-index-contents
    [{:model "dataset" :id 1 :name "card ancient"}
@@ -159,11 +183,9 @@
  (testing "it has a ceiling, more than the ceiling is considered to be equal"
    (with-index-contents
-      [{:model "card" :id 1 :name "card popular" :dashboardcard_count 22}
+      [{:model "card" :id 1 :name "card popular" :dashboardcard_count 200}
-       {:model "card" :id 2 :name "card" :dashboardcard_count 11}]
+       {:model "card" :id 2 :name "card" :dashboardcard_count 201}]
-      (is (= [["card" 1 "card popular"]
+      (is (indifferent? :dashboard "card")))))
-              ["card" 2 "card"]]
-             (search-results* "card"))))))
 ;; ---- personalized rankers ---
 ;; These require some related appdb content
@@ -237,3 +259,15 @@
                  ["card"    c1 "card ancient"]
                  ["dataset" c3 "card unseen"]]
                 (search-results :user-recency "card" {:current-user-id user-id}))))))))
+(deftest ^:parallel mine-test
+  (let [crowberto (mt/user->id :crowberto)
+        rasta     (mt/user->id :rasta)]
+    (with-index-contents [{:model "card" :id 1 :name "crow's fly card" :creator_id crowberto}
+                          {:model "card" :id 2 :name "this card is aerie mon" :creator_id rasta}]
+      (is (= [["card" 1 "crow's fly card"]
+              ["card" 2 "this card is aerie mon"]]
+             (search-results :mine "card" {:current-user-id crowberto})))
+      (is (= [["card" 2 "this card is aerie mon"]
+              ["card" 1 "crow's fly card"]]
+             (search-results :mine "card" {:current-user-id rasta}))))))