From 09ee5aa2e8dcede18da0ddf03a905944059b79d7 Mon Sep 17 00:00:00 2001
From: Chris Truter <crisptrutski@users.noreply.github.com>
Date: Mon, 11 Nov 2024 23:35:33 +0200
Subject: [PATCH] Prefer search results with higher view counts (#49842)

---
 src/metabase/models/card.clj             |  1 +
 src/metabase/models/dashboard.clj        |  1 +
 src/metabase/models/table.clj            |  1 +
 src/metabase/search/config.clj           | 22 +++++++++++++---------
 src/metabase/search/postgres/core.clj    |  2 ++
 src/metabase/search/postgres/index.clj   |  1 +
 src/metabase/search/postgres/scoring.clj |  9 +++++++++
 src/metabase/search/spec.clj             |  1 +
 test/metabase/search/spec_test.clj       |  3 ++-
 9 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/src/metabase/models/card.clj b/src/metabase/models/card.clj
index 8d82ac9ef08..a56455cfb55 100644
--- a/src/metabase/models/card.clj
+++ b/src/metabase/models/card.clj
@@ -1024,6 +1024,7 @@
                   :last-editor-id      :r.user_id
                   :pinned              [:> [:coalesce :collection_position [:inline 0]] [:inline 0]]
                   :verified            [:= "verified" :mr.status]
+                  :view-count          true
                   :created-at          true
                   :updated-at          true}
    :search-terms [:name :description]
diff --git a/src/metabase/models/dashboard.clj b/src/metabase/models/dashboard.clj
index 368e34d6a32..0159f645a80 100644
--- a/src/metabase/models/dashboard.clj
+++ b/src/metabase/models/dashboard.clj
@@ -674,6 +674,7 @@
                   :last-edited-at :r.timestamp
                   :pinned         [:> [:coalesce :collection_position [:inline 0]] [:inline 0]]
                   :table-id       false
+                  :view-count     true
                   :created-at     true
                   :updated-at     true}
    :search-terms [:name :description]
diff --git a/src/metabase/models/table.clj b/src/metabase/models/table.clj
index cd0209783f8..c3cbab97889 100644
--- a/src/metabase/models/table.clj
+++ b/src/metabase/models/table.clj
@@ -309,6 +309,7 @@
                   ;; legacy search uses :active for this, but then has a rule to only ever show active tables
                   ;; so we moved that to the where clause
                   :archived      false
+                  :view-count    true
                   :created-at    true
                   :updated-at    true}
    :search-terms [:name :description :display_name]
diff --git a/src/metabase/search/config.clj b/src/metabase/search/config.clj
index a9ca61f33a1..a60d4bcc14f 100644
--- a/src/metabase/search/config.clj
+++ b/src/metabase/search/config.clj
@@ -39,6 +39,10 @@
   "Results in more dashboards than this are all considered to be equally popular."
   10)
 
+(def ^:const view-count-scaling
+  "The larger this value, the longer it will take for the score to approach 1.0. It will never quite reach it."
+  50)
+
 (def ^:const surrounding-match-context
   "Show this many words of context before/after matches in long search results"
   2)
@@ -77,15 +81,15 @@
 
 (def weights
   "Strength of the various scorers. Copied from metabase.search.in-place.scoring, but allowing divergence."
-  {:pinned              2                                   ;; simple field
-   :bookmarked          2                                   ;; join with multi-table entity
-   :recency             1.5                                 ;; date formula
-   :dashboard           1                                   ;; simple field
-   :model               0.5                                 ;; simple field
-   :official-collection 2                                   ;; a field we can calculate
-   :verified            2                                   ;; a simple field
-   :text                10                                  ;; strength of text-scores-weight previously
-   })
+  {:pinned              2
+   :bookmarked          2
+   :recency             1.5
+   :dashboard           1
+   :model               0.5
+   :official-collection 2
+   :verified            2
+   :view-count          2
+   :text                10})
 
 (defn model->alias
   "Given a model string returns the model alias"
diff --git a/src/metabase/search/postgres/core.clj b/src/metabase/search/postgres/core.clj
index 6d143c81698..a17556eb234 100644
--- a/src/metabase/search/postgres/core.clj
+++ b/src/metabase/search/postgres/core.clj
@@ -72,6 +72,8 @@
     (OffsetDateTime/parse s)))
 
 (defn- rehydrate [index-row]
+  ;; Useful for debugging scoring
+  #_ (dissoc index-row :legacy_input :created_at :updated_at :last_edited_at)
   (-> (merge
        (json/parse-string (:legacy_input index-row) keyword)
        (select-keys index-row [:total_score :pinned]))
diff --git a/src/metabase/search/postgres/index.clj b/src/metabase/search/postgres/index.clj
index bfe35def584..2291330f641 100644
--- a/src/metabase/search/postgres/index.clj
+++ b/src/metabase/search/postgres/index.clj
@@ -56,6 +56,7 @@
              [:model_rank :int :not-null]
              [:pinned :boolean]
              [:verified :boolean]
+             [:view_count :int]
              ;; permission related entities
              [:collection_id :int]
              [:database_id :int]
diff --git a/src/metabase/search/postgres/scoring.clj b/src/metabase/search/postgres/scoring.clj
index 16d7e09af97..cf390385c47 100644
--- a/src/metabase/search/postgres/scoring.clj
+++ b/src/metabase/search/postgres/scoring.clj
@@ -15,6 +15,14 @@
   [column ceiling]
   [:least [:/ [:coalesce column [:inline 0]] [:inline (double ceiling)]] [:inline 1]])
 
+(defn- atan-size
+  "Prefer items whose value is larger, with diminishing gains."
+  [column scaling]
+  ;; 2/PI * tan^-1 (x/N)
+  [:*
+   [:/ [:inline 2] [:pi]]
+   [:atan [:/ [:cast [:coalesce column [:inline 0.0]] :float] [:inline scaling]]]])
+
 (defn- inverse-duration
   "Score at item based on the duration between two dates, where less is better."
   [from-column to-column ceiling-in-days]
@@ -67,6 +75,7 @@
 
 (def ^:private scorers
   {:text       [:ts_rank :search_vector :query [:inline ts-rank-normalization]]
+   :view-count (atan-size :view_count search.config/view-count-scaling)
    :pinned     (truthy :pinned)
    :bookmarked bookmark-score-expr
    :recency    (inverse-duration :model_updated_at [:now] search.config/stale-time-in-days)
diff --git a/src/metabase/search/spec.clj b/src/metabase/search/spec.clj
index 74014434e70..969646bb915 100644
--- a/src/metabase/search/spec.clj
+++ b/src/metabase/search/spec.clj
@@ -41,6 +41,7 @@
    :last-editor-id
    :pinned
    :verified
+   :view-count
    :updated-at])
 
 (def ^:private default-attrs
diff --git a/test/metabase/search/spec_test.clj b/test/metabase/search/spec_test.clj
index 399521754e0..047852a1893 100644
--- a/test/metabase/search/spec_test.clj
+++ b/test/metabase/search/spec_test.clj
@@ -70,6 +70,7 @@
                                                       :name
                                                       :query_type
                                                       :type
+                                                      :view_count
                                                       :created_at
                                                       :updated_at},
                                       :where        [:= :updated.id :this.id]}},
@@ -99,7 +100,7 @@
                                {:search-model "table",
                                 :fields
                                 #{:active :description :schema :name :id :db_id :initial_sync_status :display_name
-                                  :visibility_type :created_at :updated_at}
+                                  :visibility_type :view_count :created_at :updated_at}
                                 :where        [:= :updated.id :this.id]}},
                  :Database   #{{:search-model "table", :fields #{:name}, :where [:= :updated.id :this.db_id]}}
                  :Segment    #{{:search-model "segment"
-- 
GitLab