From dd614a18b4b703a263c93ec86a6a03bb5443d016 Mon Sep 17 00:00:00 2001
From: Cal Herries <39073188+calherries@users.noreply.github.com>
Date: Fri, 22 Jul 2022 08:54:03 +1200
Subject: [PATCH] Fix incorrect scoring of search results (#23984)

* Correct `score-and-result` to count the weight of the text score when there is no text match

* Replace nil-punning of scores with explicit checking for zero scores at each step

* Update scoring functions that can return nil to return zero instead

* Address archived search with no search string

* Update e2e test

* Update e2e test with new ordering of results

* Fix dataset search with blank query string
---
 .../onboarding/search/search.cy.spec.js       |  1 -
 .../scenarios/question/new.cy.spec.js         |  1 +
 src/metabase/api/search.clj                   |  2 +-
 src/metabase/search/scoring.clj               | 59 +++++++++++--------
 test/metabase/search/scoring_test.clj         | 14 ++---
 5 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/frontend/test/metabase/scenarios/onboarding/search/search.cy.spec.js b/frontend/test/metabase/scenarios/onboarding/search/search.cy.spec.js
index 2b62c1ce294..439239bed15 100644
--- a/frontend/test/metabase/scenarios/onboarding/search/search.cy.spec.js
+++ b/frontend/test/metabase/scenarios/onboarding/search/search.cy.spec.js
@@ -46,7 +46,6 @@ describe("scenarios > auth > search", () => {
       cy.findByPlaceholderText("Searchâ€¦").type("ord");
       cy.wait("@search");
 
-      cy.get("body").trigger("keydown", { key: "ArrowDown" });
       cy.get("body").trigger("keydown", { key: "ArrowDown" });
       cy.get("body").trigger("keydown", { key: "Enter" });
 
diff --git a/frontend/test/metabase/scenarios/question/new.cy.spec.js b/frontend/test/metabase/scenarios/question/new.cy.spec.js
index b67084cb3dc..afb96316198 100644
--- a/frontend/test/metabase/scenarios/question/new.cy.spec.js
+++ b/frontend/test/metabase/scenarios/question/new.cy.spec.js
@@ -80,6 +80,7 @@ describe("scenarios > question > new", () => {
 
       it("should allow to search and select tables", () => {
         cy.findAllByText("Orders")
+          .eq(1)
           .closest("li")
           .findByText("Table in")
           .parent()
diff --git a/src/metabase/api/search.clj b/src/metabase/api/search.clj
index 7d9e6e84826..652d63cc522 100644
--- a/src/metabase/api/search.clj
+++ b/src/metabase/api/search.clj
@@ -420,7 +420,7 @@
                              (map #(update % :bookmark bit->boolean))
                              (map #(update % :archived bit->boolean))
                              (map (partial scoring/score-and-result (:search-string search-ctx)))
-                             (filter some?))
+                             (filter #(pos? (:score %))))
           total-results     (scoring/top-results reducible-results xf)]
       ;; We get to do this slicing and dicing with the result data because
       ;; the pagination of search is for UI improvement, not for performance.
diff --git a/src/metabase/search/scoring.clj b/src/metabase/search/scoring.clj
index 2bd3808480c..bca0363cd30 100644
--- a/src/metabase/search/scoring.clj
+++ b/src/metabase/search/scoring.clj
@@ -82,6 +82,8 @@
                  :text     (tokens->string text-tokens (not is-match))})))))
 
 (defn- text-score-with
+  "Scores a search result. Returns a map with the score and other info about the text match,
+   if there is one. If there is no match, the score is 0."
   [weighted-scorers query-tokens search-result]
   (let [total-weight (reduce + (map :weight weighted-scorers))
         scores       (for [column (search-config/searchable-columns-for-model (:model search-result))
@@ -96,14 +98,14 @@
                                                              0
                                                              (map :scorer weighted-scorers)))]
                            :when  (and matched-text
-                                       (> score 0))]
+                                       (pos? score))]
                        {:score               (/ score total-weight)
                         :match               matched-text
                         :match-context-thunk #(match-context query-tokens match-tokens)
-                        :column              column
-                        :result              search-result})]
-    (when (seq scores)
-      (apply max-key :score scores))))
+                        :column              column})]
+    (if (seq scores)
+      (apply max-key :score scores)
+      {:score 0})))
 
 (defn- consecutivity-scorer
   [query-tokens match-tokens]
@@ -169,31 +171,31 @@
                      (tokenize (normalize raw-search-string))
                      result)
     {:score  0
-     :match  ""
-     :result result}))
+     :match  ""}))
 
 (defn- pinned-score
   [{:keys [model collection_position]}]
   ;; We experimented with favoring lower collection positions, but it wasn't good
   ;; So instead, just give a bonus for items that are pinned at all
-  (when (#{"card" "dashboard" "pulse"} model)
-    (if ((fnil pos? 0) collection_position)
-      1
-      0)))
+  (if (and (#{"card" "dashboard" "pulse"} model)
+           ((fnil pos? 0) collection_position))
+    1
+    0))
 
 (defn- bookmarked-score
   [{:keys [model bookmark]}]
-  (when (#{"card" "collection" "dashboard"} model)
-    (if bookmark
-      1
-      0)))
+  (if (and (#{"card" "collection" "dashboard"} model)
+           bookmark)
+    1
+    0))
 
 (defn- dashboard-count-score
   [{:keys [model dashboardcard_count]}]
-  (when (= model "card")
+  (if (= model "card")
     (min (/ dashboardcard_count
             search-config/dashboard-count-ceiling)
-         1)))
+         1)
+    0))
 
 (defn- recency-score
   [{:keys [updated_at]}]
@@ -268,15 +270,22 @@
    (weights-and-scores result))
 
 (defn score-and-result
-  "Returns a map with the `:score` and `:result`â€”or nil. The score is a vector of comparable things in priority order."
+  "Returns a map with the `:score` and `:result`."
   ([raw-search-string result]
-   (let [text-score (text-score-with-match raw-search-string result)
-         scores     (->> (conj (score-result result)
-                               {:score (:score text-score), :weight 10 :name "text score"})
-                         (filter :score))]
-     {:score  (/ (reduce + (map (fn [{:keys [weight score]}] (* weight score)) scores))
-                 (reduce + (map :weight scores)))
-      :result (serialize result text-score scores)})))
+   (let [text-match (text-score-with-match raw-search-string result)
+         text-score {:score  (:score text-match)
+                     :weight 10
+                     :name   "text score"}
+         scores (conj (score-result result) text-score)]
+     ;; Searches with a blank search string mean "show me everything, ranked";
+     ;; see https://github.com/metabase/metabase/pull/15604 for archived search.
+     ;; If the search string is non-blank, results with no text match have a score of zero.
+     (if (or (str/blank? raw-search-string)
+             (pos? (:score text-match)))
+       {:score  (/ (reduce + (map (fn [{:keys [weight score]}] (* weight score)) scores))
+                   (reduce + (map :weight scores)))
+        :result (serialize result text-match scores)}
+       {:score 0}))))
 
 (defn top-results
   "Given a reducible collection (i.e., from `jdbc/reducible-query`) and a transforming function for it, applies the
diff --git a/test/metabase/search/scoring_test.clj b/test/metabase/search/scoring_test.clj
index 68ccb332427..99bab24a6c9 100644
--- a/test/metabase/search/scoring_test.clj
+++ b/test/metabase/search/scoring_test.clj
@@ -53,10 +53,10 @@
              (score ["rasta"]
                     (result-row "Rasta")))))
     (testing "misses"
-      (is (nil?
+      (is (zero?
            (score ["rasta"]
                   (result-row "just a straight-up imposter"))))
-      (is (nil?
+      (is (zero?
            (score ["rasta" "the" "toucan"]
                   (result-row "")))))))
 
@@ -83,10 +83,10 @@
              (score ["rasta" "the" "toucan"]
                     (result-row "Rasta may be my favorite of the toucans")))))
     (testing "misses"
-      (is (nil?
+      (is (zero?
            (score ["rasta"]
                   (result-row "just a straight-up imposter"))))
-      (is (nil?
+      (is (zero?
            (score ["rasta" "the" "toucan"]
                   (result-row "")))))))
 
@@ -101,16 +101,16 @@
              (score ["rasta" "the" "toucan"]
                     (result-row "Rasta the Toucan")))))
     (testing "misses"
-      (is (nil?
+      (is (zero?
            (score ["rasta"]
                   (result-row "just a straight-up imposter"))))
-      (is (nil?
+      (is (zero?
            (score ["rasta" "the" "toucan"]
                   (result-row "")))))))
 
 (deftest exact-match-scorer-test
   (let [score (scorer->score #'scoring/exact-match-scorer)]
-    (is (nil?
+    (is (zero?
          (score ["rasta" "the" "toucan"]
                 (result-row "Crowberto el tucan"))))
     (is (= 1/3
-- 
GitLab