From 09ee5aa2e8dcede18da0ddf03a905944059b79d7 Mon Sep 17 00:00:00 2001 From: Chris Truter <crisptrutski@users.noreply.github.com> Date: Mon, 11 Nov 2024 23:35:33 +0200 Subject: [PATCH] Prefer search results with higher view counts (#49842) --- src/metabase/models/card.clj | 1 + src/metabase/models/dashboard.clj | 1 + src/metabase/models/table.clj | 1 + src/metabase/search/config.clj | 22 +++++++++++++--------- src/metabase/search/postgres/core.clj | 2 ++ src/metabase/search/postgres/index.clj | 1 + src/metabase/search/postgres/scoring.clj | 9 +++++++++ src/metabase/search/spec.clj | 1 + test/metabase/search/spec_test.clj | 3 ++- 9 files changed, 31 insertions(+), 10 deletions(-) diff --git a/src/metabase/models/card.clj b/src/metabase/models/card.clj index 8d82ac9ef08..a56455cfb55 100644 --- a/src/metabase/models/card.clj +++ b/src/metabase/models/card.clj @@ -1024,6 +1024,7 @@ :last-editor-id :r.user_id :pinned [:> [:coalesce :collection_position [:inline 0]] [:inline 0]] :verified [:= "verified" :mr.status] + :view-count true :created-at true :updated-at true} :search-terms [:name :description] diff --git a/src/metabase/models/dashboard.clj b/src/metabase/models/dashboard.clj index 368e34d6a32..0159f645a80 100644 --- a/src/metabase/models/dashboard.clj +++ b/src/metabase/models/dashboard.clj @@ -674,6 +674,7 @@ :last-edited-at :r.timestamp :pinned [:> [:coalesce :collection_position [:inline 0]] [:inline 0]] :table-id false + :view-count true :created-at true :updated-at true} :search-terms [:name :description] diff --git a/src/metabase/models/table.clj b/src/metabase/models/table.clj index cd0209783f8..c3cbab97889 100644 --- a/src/metabase/models/table.clj +++ b/src/metabase/models/table.clj @@ -309,6 +309,7 @@ ;; legacy search uses :active for this, but then has a rule to only ever show active tables ;; so we moved that to the where clause :archived false + :view-count true :created-at true :updated-at true} :search-terms [:name :description :display_name] diff --git a/src/metabase/search/config.clj b/src/metabase/search/config.clj index a9ca61f33a1..a60d4bcc14f 100644 --- a/src/metabase/search/config.clj +++ b/src/metabase/search/config.clj @@ -39,6 +39,10 @@ "Results in more dashboards than this are all considered to be equally popular." 10) +(def ^:const view-count-scaling + "The larger this value, the longer it will take for the score to approach 1.0. It will never quite reach it." + 50) + (def ^:const surrounding-match-context "Show this many words of context before/after matches in long search results" 2) @@ -77,15 +81,15 @@ (def weights "Strength of the various scorers. Copied from metabase.search.in-place.scoring, but allowing divergence." - {:pinned 2 ;; simple field - :bookmarked 2 ;; join with multi-table entity - :recency 1.5 ;; date formula - :dashboard 1 ;; simple field - :model 0.5 ;; simple field - :official-collection 2 ;; a field we can calculate - :verified 2 ;; a simple field - :text 10 ;; strength of text-scores-weight previously - }) + {:pinned 2 + :bookmarked 2 + :recency 1.5 + :dashboard 1 + :model 0.5 + :official-collection 2 + :verified 2 + :view-count 2 + :text 10}) (defn model->alias "Given a model string returns the model alias" diff --git a/src/metabase/search/postgres/core.clj b/src/metabase/search/postgres/core.clj index 6d143c81698..a17556eb234 100644 --- a/src/metabase/search/postgres/core.clj +++ b/src/metabase/search/postgres/core.clj @@ -72,6 +72,8 @@ (OffsetDateTime/parse s))) (defn- rehydrate [index-row] + ;; Useful for debugging scoring + #_ (dissoc index-row :legacy_input :created_at :updated_at :last_edited_at) (-> (merge (json/parse-string (:legacy_input index-row) keyword) (select-keys index-row [:total_score :pinned])) diff --git a/src/metabase/search/postgres/index.clj b/src/metabase/search/postgres/index.clj index bfe35def584..2291330f641 100644 --- a/src/metabase/search/postgres/index.clj +++ b/src/metabase/search/postgres/index.clj @@ -56,6 +56,7 @@ [:model_rank :int :not-null] [:pinned :boolean] [:verified :boolean] + [:view_count :int] ;; permission related entities [:collection_id :int] [:database_id :int] diff --git a/src/metabase/search/postgres/scoring.clj b/src/metabase/search/postgres/scoring.clj index 16d7e09af97..cf390385c47 100644 --- a/src/metabase/search/postgres/scoring.clj +++ b/src/metabase/search/postgres/scoring.clj @@ -15,6 +15,14 @@ [column ceiling] [:least [:/ [:coalesce column [:inline 0]] [:inline (double ceiling)]] [:inline 1]]) +(defn- atan-size + "Prefer items whose value is larger, with diminishing gains." + [column scaling] + ;; 2/PI * tan^-1 (x/N) + [:* + [:/ [:inline 2] [:pi]] + [:atan [:/ [:cast [:coalesce column [:inline 0.0]] :float] [:inline scaling]]]]) + (defn- inverse-duration "Score at item based on the duration between two dates, where less is better." [from-column to-column ceiling-in-days] @@ -67,6 +75,7 @@ (def ^:private scorers {:text [:ts_rank :search_vector :query [:inline ts-rank-normalization]] + :view-count (atan-size :view_count search.config/view-count-scaling) :pinned (truthy :pinned) :bookmarked bookmark-score-expr :recency (inverse-duration :model_updated_at [:now] search.config/stale-time-in-days) diff --git a/src/metabase/search/spec.clj b/src/metabase/search/spec.clj index 74014434e70..969646bb915 100644 --- a/src/metabase/search/spec.clj +++ b/src/metabase/search/spec.clj @@ -41,6 +41,7 @@ :last-editor-id :pinned :verified + :view-count :updated-at]) (def ^:private default-attrs diff --git a/test/metabase/search/spec_test.clj b/test/metabase/search/spec_test.clj index 399521754e0..047852a1893 100644 --- a/test/metabase/search/spec_test.clj +++ b/test/metabase/search/spec_test.clj @@ -70,6 +70,7 @@ :name :query_type :type + :view_count :created_at :updated_at}, :where [:= :updated.id :this.id]}}, @@ -99,7 +100,7 @@ {:search-model "table", :fields #{:active :description :schema :name :id :db_id :initial_sync_status :display_name - :visibility_type :created_at :updated_at} + :visibility_type :view_count :created_at :updated_at} :where [:= :updated.id :this.id]}}, :Database #{{:search-model "table", :fields #{:name}, :where [:= :updated.id :this.db_id]}} :Segment #{{:search-model "segment" -- GitLab