Skip to content
Snippets Groups Projects
Unverified Commit dd614a18 authored by Cal Herries's avatar Cal Herries Committed by GitHub
Browse files

Fix incorrect scoring of search results (#23984)

* Correct `score-and-result` to count the weight of the text score when there is no text match

* Replace nil-punning of scores with explicit checking for zero scores at each step

* Update scoring functions that can return nil to return zero instead

* Address archived search with no search string

* Update e2e test

* Update e2e test with new ordering of results

* Fix dataset search with blank query string
parent 2ed3d0fe
No related branches found
No related tags found
No related merge requests found
...@@ -46,7 +46,6 @@ describe("scenarios > auth > search", () => { ...@@ -46,7 +46,6 @@ describe("scenarios > auth > search", () => {
cy.findByPlaceholderText("Search…").type("ord"); cy.findByPlaceholderText("Search…").type("ord");
cy.wait("@search"); cy.wait("@search");
cy.get("body").trigger("keydown", { key: "ArrowDown" });
cy.get("body").trigger("keydown", { key: "ArrowDown" }); cy.get("body").trigger("keydown", { key: "ArrowDown" });
cy.get("body").trigger("keydown", { key: "Enter" }); cy.get("body").trigger("keydown", { key: "Enter" });
......
...@@ -80,6 +80,7 @@ describe("scenarios > question > new", () => { ...@@ -80,6 +80,7 @@ describe("scenarios > question > new", () => {
it("should allow to search and select tables", () => { it("should allow to search and select tables", () => {
cy.findAllByText("Orders") cy.findAllByText("Orders")
.eq(1)
.closest("li") .closest("li")
.findByText("Table in") .findByText("Table in")
.parent() .parent()
......
...@@ -420,7 +420,7 @@ ...@@ -420,7 +420,7 @@
(map #(update % :bookmark bit->boolean)) (map #(update % :bookmark bit->boolean))
(map #(update % :archived bit->boolean)) (map #(update % :archived bit->boolean))
(map (partial scoring/score-and-result (:search-string search-ctx))) (map (partial scoring/score-and-result (:search-string search-ctx)))
(filter some?)) (filter #(pos? (:score %))))
total-results (scoring/top-results reducible-results xf)] total-results (scoring/top-results reducible-results xf)]
;; We get to do this slicing and dicing with the result data because ;; We get to do this slicing and dicing with the result data because
;; the pagination of search is for UI improvement, not for performance. ;; the pagination of search is for UI improvement, not for performance.
......
...@@ -82,6 +82,8 @@ ...@@ -82,6 +82,8 @@
:text (tokens->string text-tokens (not is-match))}))))) :text (tokens->string text-tokens (not is-match))})))))
(defn- text-score-with (defn- text-score-with
"Scores a search result. Returns a map with the score and other info about the text match,
if there is one. If there is no match, the score is 0."
[weighted-scorers query-tokens search-result] [weighted-scorers query-tokens search-result]
(let [total-weight (reduce + (map :weight weighted-scorers)) (let [total-weight (reduce + (map :weight weighted-scorers))
scores (for [column (search-config/searchable-columns-for-model (:model search-result)) scores (for [column (search-config/searchable-columns-for-model (:model search-result))
...@@ -96,14 +98,14 @@ ...@@ -96,14 +98,14 @@
0 0
(map :scorer weighted-scorers)))] (map :scorer weighted-scorers)))]
:when (and matched-text :when (and matched-text
(> score 0))] (pos? score))]
{:score (/ score total-weight) {:score (/ score total-weight)
:match matched-text :match matched-text
:match-context-thunk #(match-context query-tokens match-tokens) :match-context-thunk #(match-context query-tokens match-tokens)
:column column :column column})]
:result search-result})] (if (seq scores)
(when (seq scores) (apply max-key :score scores)
(apply max-key :score scores)))) {:score 0})))
(defn- consecutivity-scorer (defn- consecutivity-scorer
[query-tokens match-tokens] [query-tokens match-tokens]
...@@ -169,31 +171,31 @@ ...@@ -169,31 +171,31 @@
(tokenize (normalize raw-search-string)) (tokenize (normalize raw-search-string))
result) result)
{:score 0 {:score 0
:match "" :match ""}))
:result result}))
(defn- pinned-score (defn- pinned-score
[{:keys [model collection_position]}] [{:keys [model collection_position]}]
;; We experimented with favoring lower collection positions, but it wasn't good ;; We experimented with favoring lower collection positions, but it wasn't good
;; So instead, just give a bonus for items that are pinned at all ;; So instead, just give a bonus for items that are pinned at all
(when (#{"card" "dashboard" "pulse"} model) (if (and (#{"card" "dashboard" "pulse"} model)
(if ((fnil pos? 0) collection_position) ((fnil pos? 0) collection_position))
1 1
0))) 0))
(defn- bookmarked-score (defn- bookmarked-score
[{:keys [model bookmark]}] [{:keys [model bookmark]}]
(when (#{"card" "collection" "dashboard"} model) (if (and (#{"card" "collection" "dashboard"} model)
(if bookmark bookmark)
1 1
0))) 0))
(defn- dashboard-count-score (defn- dashboard-count-score
[{:keys [model dashboardcard_count]}] [{:keys [model dashboardcard_count]}]
(when (= model "card") (if (= model "card")
(min (/ dashboardcard_count (min (/ dashboardcard_count
search-config/dashboard-count-ceiling) search-config/dashboard-count-ceiling)
1))) 1)
0))
(defn- recency-score (defn- recency-score
[{:keys [updated_at]}] [{:keys [updated_at]}]
...@@ -268,15 +270,22 @@ ...@@ -268,15 +270,22 @@
(weights-and-scores result)) (weights-and-scores result))
(defn score-and-result (defn score-and-result
"Returns a map with the `:score` and `:result`—or nil. The score is a vector of comparable things in priority order." "Returns a map with the `:score` and `:result`."
([raw-search-string result] ([raw-search-string result]
(let [text-score (text-score-with-match raw-search-string result) (let [text-match (text-score-with-match raw-search-string result)
scores (->> (conj (score-result result) text-score {:score (:score text-match)
{:score (:score text-score), :weight 10 :name "text score"}) :weight 10
(filter :score))] :name "text score"}
{:score (/ (reduce + (map (fn [{:keys [weight score]}] (* weight score)) scores)) scores (conj (score-result result) text-score)]
(reduce + (map :weight scores))) ;; Searches with a blank search string mean "show me everything, ranked";
:result (serialize result text-score scores)}))) ;; see https://github.com/metabase/metabase/pull/15604 for archived search.
;; If the search string is non-blank, results with no text match have a score of zero.
(if (or (str/blank? raw-search-string)
(pos? (:score text-match)))
{:score (/ (reduce + (map (fn [{:keys [weight score]}] (* weight score)) scores))
(reduce + (map :weight scores)))
:result (serialize result text-match scores)}
{:score 0}))))
(defn top-results (defn top-results
"Given a reducible collection (i.e., from `jdbc/reducible-query`) and a transforming function for it, applies the "Given a reducible collection (i.e., from `jdbc/reducible-query`) and a transforming function for it, applies the
......
...@@ -53,10 +53,10 @@ ...@@ -53,10 +53,10 @@
(score ["rasta"] (score ["rasta"]
(result-row "Rasta"))))) (result-row "Rasta")))))
(testing "misses" (testing "misses"
(is (nil? (is (zero?
(score ["rasta"] (score ["rasta"]
(result-row "just a straight-up imposter")))) (result-row "just a straight-up imposter"))))
(is (nil? (is (zero?
(score ["rasta" "the" "toucan"] (score ["rasta" "the" "toucan"]
(result-row ""))))))) (result-row "")))))))
...@@ -83,10 +83,10 @@ ...@@ -83,10 +83,10 @@
(score ["rasta" "the" "toucan"] (score ["rasta" "the" "toucan"]
(result-row "Rasta may be my favorite of the toucans"))))) (result-row "Rasta may be my favorite of the toucans")))))
(testing "misses" (testing "misses"
(is (nil? (is (zero?
(score ["rasta"] (score ["rasta"]
(result-row "just a straight-up imposter")))) (result-row "just a straight-up imposter"))))
(is (nil? (is (zero?
(score ["rasta" "the" "toucan"] (score ["rasta" "the" "toucan"]
(result-row ""))))))) (result-row "")))))))
...@@ -101,16 +101,16 @@ ...@@ -101,16 +101,16 @@
(score ["rasta" "the" "toucan"] (score ["rasta" "the" "toucan"]
(result-row "Rasta the Toucan"))))) (result-row "Rasta the Toucan")))))
(testing "misses" (testing "misses"
(is (nil? (is (zero?
(score ["rasta"] (score ["rasta"]
(result-row "just a straight-up imposter")))) (result-row "just a straight-up imposter"))))
(is (nil? (is (zero?
(score ["rasta" "the" "toucan"] (score ["rasta" "the" "toucan"]
(result-row ""))))))) (result-row "")))))))
(deftest exact-match-scorer-test (deftest exact-match-scorer-test
(let [score (scorer->score #'scoring/exact-match-scorer)] (let [score (scorer->score #'scoring/exact-match-scorer)]
(is (nil? (is (zero?
(score ["rasta" "the" "toucan"] (score ["rasta" "the" "toucan"]
(result-row "Crowberto el tucan")))) (result-row "Crowberto el tucan"))))
(is (= 1/3 (is (= 1/3
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment