From dd614a18b4b703a263c93ec86a6a03bb5443d016 Mon Sep 17 00:00:00 2001 From: Cal Herries <39073188+calherries@users.noreply.github.com> Date: Fri, 22 Jul 2022 08:54:03 +1200 Subject: [PATCH] Fix incorrect scoring of search results (#23984) * Correct `score-and-result` to count the weight of the text score when there is no text match * Replace nil-punning of scores with explicit checking for zero scores at each step * Update scoring functions that can return nil to return zero instead * Address archived search with no search string * Update e2e test * Update e2e test with new ordering of results * Fix dataset search with blank query string --- .../onboarding/search/search.cy.spec.js | 1 - .../scenarios/question/new.cy.spec.js | 1 + src/metabase/api/search.clj | 2 +- src/metabase/search/scoring.clj | 59 +++++++++++-------- test/metabase/search/scoring_test.clj | 14 ++--- 5 files changed, 43 insertions(+), 34 deletions(-) diff --git a/frontend/test/metabase/scenarios/onboarding/search/search.cy.spec.js b/frontend/test/metabase/scenarios/onboarding/search/search.cy.spec.js index 2b62c1ce294..439239bed15 100644 --- a/frontend/test/metabase/scenarios/onboarding/search/search.cy.spec.js +++ b/frontend/test/metabase/scenarios/onboarding/search/search.cy.spec.js @@ -46,7 +46,6 @@ describe("scenarios > auth > search", () => { cy.findByPlaceholderText("Search…").type("ord"); cy.wait("@search"); - cy.get("body").trigger("keydown", { key: "ArrowDown" }); cy.get("body").trigger("keydown", { key: "ArrowDown" }); cy.get("body").trigger("keydown", { key: "Enter" }); diff --git a/frontend/test/metabase/scenarios/question/new.cy.spec.js b/frontend/test/metabase/scenarios/question/new.cy.spec.js index b67084cb3dc..afb96316198 100644 --- a/frontend/test/metabase/scenarios/question/new.cy.spec.js +++ b/frontend/test/metabase/scenarios/question/new.cy.spec.js @@ -80,6 +80,7 @@ describe("scenarios > question > new", () => { it("should allow to search and select tables", () => { cy.findAllByText("Orders") + .eq(1) .closest("li") .findByText("Table in") .parent() diff --git a/src/metabase/api/search.clj b/src/metabase/api/search.clj index 7d9e6e84826..652d63cc522 100644 --- a/src/metabase/api/search.clj +++ b/src/metabase/api/search.clj @@ -420,7 +420,7 @@ (map #(update % :bookmark bit->boolean)) (map #(update % :archived bit->boolean)) (map (partial scoring/score-and-result (:search-string search-ctx))) - (filter some?)) + (filter #(pos? (:score %)))) total-results (scoring/top-results reducible-results xf)] ;; We get to do this slicing and dicing with the result data because ;; the pagination of search is for UI improvement, not for performance. diff --git a/src/metabase/search/scoring.clj b/src/metabase/search/scoring.clj index 2bd3808480c..bca0363cd30 100644 --- a/src/metabase/search/scoring.clj +++ b/src/metabase/search/scoring.clj @@ -82,6 +82,8 @@ :text (tokens->string text-tokens (not is-match))}))))) (defn- text-score-with + "Scores a search result. Returns a map with the score and other info about the text match, + if there is one. If there is no match, the score is 0." [weighted-scorers query-tokens search-result] (let [total-weight (reduce + (map :weight weighted-scorers)) scores (for [column (search-config/searchable-columns-for-model (:model search-result)) @@ -96,14 +98,14 @@ 0 (map :scorer weighted-scorers)))] :when (and matched-text - (> score 0))] + (pos? score))] {:score (/ score total-weight) :match matched-text :match-context-thunk #(match-context query-tokens match-tokens) - :column column - :result search-result})] - (when (seq scores) - (apply max-key :score scores)))) + :column column})] + (if (seq scores) + (apply max-key :score scores) + {:score 0}))) (defn- consecutivity-scorer [query-tokens match-tokens] @@ -169,31 +171,31 @@ (tokenize (normalize raw-search-string)) result) {:score 0 - :match "" - :result result})) + :match ""})) (defn- pinned-score [{:keys [model collection_position]}] ;; We experimented with favoring lower collection positions, but it wasn't good ;; So instead, just give a bonus for items that are pinned at all - (when (#{"card" "dashboard" "pulse"} model) - (if ((fnil pos? 0) collection_position) - 1 - 0))) + (if (and (#{"card" "dashboard" "pulse"} model) + ((fnil pos? 0) collection_position)) + 1 + 0)) (defn- bookmarked-score [{:keys [model bookmark]}] - (when (#{"card" "collection" "dashboard"} model) - (if bookmark - 1 - 0))) + (if (and (#{"card" "collection" "dashboard"} model) + bookmark) + 1 + 0)) (defn- dashboard-count-score [{:keys [model dashboardcard_count]}] - (when (= model "card") + (if (= model "card") (min (/ dashboardcard_count search-config/dashboard-count-ceiling) - 1))) + 1) + 0)) (defn- recency-score [{:keys [updated_at]}] @@ -268,15 +270,22 @@ (weights-and-scores result)) (defn score-and-result - "Returns a map with the `:score` and `:result`—or nil. The score is a vector of comparable things in priority order." + "Returns a map with the `:score` and `:result`." ([raw-search-string result] - (let [text-score (text-score-with-match raw-search-string result) - scores (->> (conj (score-result result) - {:score (:score text-score), :weight 10 :name "text score"}) - (filter :score))] - {:score (/ (reduce + (map (fn [{:keys [weight score]}] (* weight score)) scores)) - (reduce + (map :weight scores))) - :result (serialize result text-score scores)}))) + (let [text-match (text-score-with-match raw-search-string result) + text-score {:score (:score text-match) + :weight 10 + :name "text score"} + scores (conj (score-result result) text-score)] + ;; Searches with a blank search string mean "show me everything, ranked"; + ;; see https://github.com/metabase/metabase/pull/15604 for archived search. + ;; If the search string is non-blank, results with no text match have a score of zero. + (if (or (str/blank? raw-search-string) + (pos? (:score text-match))) + {:score (/ (reduce + (map (fn [{:keys [weight score]}] (* weight score)) scores)) + (reduce + (map :weight scores))) + :result (serialize result text-match scores)} + {:score 0})))) (defn top-results "Given a reducible collection (i.e., from `jdbc/reducible-query`) and a transforming function for it, applies the diff --git a/test/metabase/search/scoring_test.clj b/test/metabase/search/scoring_test.clj index 68ccb332427..99bab24a6c9 100644 --- a/test/metabase/search/scoring_test.clj +++ b/test/metabase/search/scoring_test.clj @@ -53,10 +53,10 @@ (score ["rasta"] (result-row "Rasta"))))) (testing "misses" - (is (nil? + (is (zero? (score ["rasta"] (result-row "just a straight-up imposter")))) - (is (nil? + (is (zero? (score ["rasta" "the" "toucan"] (result-row ""))))))) @@ -83,10 +83,10 @@ (score ["rasta" "the" "toucan"] (result-row "Rasta may be my favorite of the toucans"))))) (testing "misses" - (is (nil? + (is (zero? (score ["rasta"] (result-row "just a straight-up imposter")))) - (is (nil? + (is (zero? (score ["rasta" "the" "toucan"] (result-row ""))))))) @@ -101,16 +101,16 @@ (score ["rasta" "the" "toucan"] (result-row "Rasta the Toucan"))))) (testing "misses" - (is (nil? + (is (zero? (score ["rasta"] (result-row "just a straight-up imposter")))) - (is (nil? + (is (zero? (score ["rasta" "the" "toucan"] (result-row ""))))))) (deftest exact-match-scorer-test (let [score (scorer->score #'scoring/exact-match-scorer)] - (is (nil? + (is (zero? (score ["rasta" "the" "toucan"] (result-row "Crowberto el tucan")))) (is (= 1/3 -- GitLab