Skip to content
Snippets Groups Projects
Unverified Commit 1c6e8109 authored by Bryan Maass's avatar Bryan Maass Committed by GitHub
Browse files

Use all text scorers in the final result to increase scoring signal (#26026)

* Uses all text scorers in the final result

- instead of just the maximum one
- add tests

* add prefix scorer test + fix text-score-with

* linter fixes

* pass in number of results to find

* refactor test function

* fix linter by removing unused namespace: metabase.util

* limit arity of serialize to 3

* make oss-score and ee-score different things

- They were defined to be exactly the same, but should be different!
- Update some tests that broke when a test function was fixed

* remove extra let

* move rseq back out of sorted-take

* improve test feedback

* force weight of text based scorers always weigh 10

* handle 0 score/weights when normalizing scores

* add nil check

* fix more subtle test differences

* more test fiddling

- still test that :offset and :limit respect limits

* reuse bit->boolean from api collection

* clean up some tests

- filter -> remove
- replace some magic numbers
- revert to testing entire maps instead of names of sorted items

* add test, docstring, and weight

* sort ns requires

* responding to most of the review comments

* start our zero-score sum check with 0

* do not tokenize / normalize nil raw-search-string

* force equality in basic search test

* modify test to work in dev and test environments

* use display_name in results when appropriate

- This was looking for the _first_ column that had a non-zero score, but
actually we need to consider all relevant columns.
- Uses them to figure out if there is a display name, and if there is,
to use it.
- Coppied over the logic about showing :context from the prior approach
parent deeaed9c
No related branches found
No related tags found
No related merge requests found
(ns metabase-enterprise.search.scoring-test
(:require [clojure.test :refer :all]
(:require [cheshire.core :as json]
[clojure.math.combinatorics :as math.combo]
[clojure.string :as str]
[clojure.test :refer :all]
[java-time :as t]
[metabase-enterprise.search.scoring :as ee-scoring]
[metabase.public-settings.premium-features :as premium-features]
......@@ -18,67 +21,108 @@
(is (= [1 3 2] (score [(item 1 "verified") (item 2 nil) (item 3 nil)]))))))
(defn- ee-score
[search-string]
(fn [item]
(with-redefs [#_{:clj-kondo/ignore [:deprecated-var]} premium-features/enable-enhancements? (constantly true)]
(-> (scoring/score-and-result search-string item) :score))))
[search-string item]
(with-redefs [#_{:clj-kondo/ignore [:deprecated-var]}
premium-features/enable-enhancements? (constantly true)]
(-> (scoring/score-and-result search-string item) :score)))
(defn- oss-score
[search-string]
(fn [item]
(with-redefs [#_{:clj-kondo/ignore [:deprecated-var]} premium-features/enable-enhancements? (constantly false)]
(-> (scoring/score-and-result search-string item) :score))))
[search-string item]
(with-redefs [#_{:clj-kondo/ignore [:deprecated-var]}
premium-features/enable-enhancements? (constantly false)]
(-> (scoring/score-and-result search-string item) :score)))
(deftest official-collection-tests
(testing "it should bump up the value of items in official collections"
;; using the ee implementation that isn't wrapped by enable-enhancements? check
(let [search-string "custom expression examples"
ee-score (ee-score search-string)
oss-score (oss-score search-string)
labeled-results {:a {:name "custom expression examples" :model "dashboard"}
:b {:name "examples of custom expressions" :model "dashboard"}
:c {:name "customer success stories"
:dashboardcard_count 50
:updated_at (t/offset-date-time)
:collection_position 1
:model "dashboard"}
:d {:name "customer examples of bad sorting" :model "dashboard"}}
{:keys [a b c d]} labeled-results]
(let [search-string "custom expression examples"
a {:id "a" :name "custom expression examples" :model "dashboard"}
b {:id "b" :name "examples of custom expressions" :model "dashboard"}
c {:id "c"
:name "customer success stories"
:dashboardcard_count 50
:updated_at (t/offset-date-time)
:collection_position 1
:model "dashboard"}
d {:id "d" :name "customer examples of bad sorting" :model "dashboard"}]
(doseq [item [a b c d]]
(is (> (ee-score (assoc item :collection_authority_level "official")) (ee-score item))
(str "Item not greater for model: " (:model item))))
(let [items (shuffle [a b c d])]
(is (= (sort-by oss-score items)
;; assert that the ordering remains the same even if scores are slightly different
(sort-by ee-score items)))
(is (= ["customer examples of bad sorting"
"customer success stories"
"examples of custom expressions"
"custom expression examples"]
(map :name (sort-by oss-score [a b c d]))))
(is (= ["customer success stories"
"customer examples of bad sorting" ;; bumped up slightly in results
"examples of custom expressions"
"custom expression examples"]
(map :name (sort-by ee-score [a b c
(assoc d :collection_authority_level "official")])))))))
(is (> (ee-score search-string (assoc item :collection_authority_level "official"))
(ee-score search-string item))
(str "Score should be greater for item: " item " vs " (assoc item :collection_authority_level "official"))))
(is (= ["customer examples of bad sorting"
"customer success stories"
"examples of custom expressions"
"custom expression examples"]
(mapv :name (sort-by #(oss-score search-string %)
(shuffle [a b c d])))))
(is (= ["customer examples of bad sorting"
"customer success stories"
"examples of custom expressions"
"custom expression examples"]
(mapv :name (sort-by #(ee-score search-string %)
(shuffle [a b c (assoc d :collection_authority_level "official")])))))))
(testing "It should bump up the value of verified items"
(let [search-string "foo"
dashboard-count #(assoc % :dashboardcard_count 0)
ee-score (comp (ee-score search-string) dashboard-count)
oss-score (comp (oss-score search-string) dashboard-count)
labeled-results {:a {:name "foobar" :model "card" :id :a}
:b {:name "foo foo" :model "card" :id :b}
:c {:name "foo foo foo" :model "card" :id :c}}
{:keys [a b c]} labeled-results]
(let [ss "foo"
a {:name "foobar"
:model "card"
:id :a
:dashboardcard_count 0}
b {:name "foo foo"
:model "card"
:id :b
:dashboardcard_count 0}
c {:name "foo foo foo"
:model "card"
:id :c
:dashboardcard_count 0}]
(doseq [item [a b c]]
(is (> (ee-score (assoc item :moderated_status "verified")) (ee-score item))
(is (> (ee-score ss (assoc item :moderated_status "verified"))
(ee-score ss item))
(str "Item not greater for model: " (:model item))))
(let [items (shuffle [a b c])]
(is (= (sort-by oss-score items) (sort-by ee-score items))))
;; a is sorted lowest here (sort-by is ascending)
(is (= [:a :c :b] (map :id (sort-by ee-score [a b c]))))
;; a is verified and is now last or highest score
(is (= [:c :b :a]
(is (= (sort-by #(oss-score ss %) items)
(sort-by #(ee-score ss %) items))))
(is (= [:c :b :a] (map :id (sort-by #(ee-score ss %) [a b c]))))
;; c is verified and is now last or highest score
(is (= [:b :a :c]
(map :id
(sort-by ee-score [(assoc a :moderated_status "verified") b c])))))))
(sort-by #(ee-score ss %)
[a
b
(assoc c :moderated_status "verified")])))))))
(defn- all-permutations-all-orders
"(all-permutations-all-orders [1]) ;; => [[] [1]]
(all-permutations-all-orders [1 2])
;; => [[] [1] [2] [1 2] [2 1]]
(all-permutations-all-orders [1 2 3])
;; => [[] ;; size 0
;; [1] [2] [3] ;; size 1
;; [1 2] [2 1] [1 3] [3 1] [2 3] [3 2] ;; size 2
;; [1 2 3] [1 3 2] [2 1 3] [2 3 1] [3 1 2] [3 2 1]] ;; size 3
"
[values]
{:pre [(> 10 (count values))]}
(mapv vec (mapcat math.combo/permutations (math.combo/subsets values))))
(defn test-corups [words]
(let [corpus (->> words
all-permutations-all-orders
(mapv #(str/join " " %))
(remove #{""}))
the-query (json/generate-string {:type :query :query {:source-table 1}})
->query (fn [n] {:name n :dataset_query the-query})
results (map ->query corpus)]
(doseq [search-string corpus]
(is (= search-string
(-> (scoring/top-results
results
1
(map #(metabase.search.scoring/score-and-result search-string %)))
first
:name))))))
(deftest identical-results-result-in-identical-hits
(test-corups ["foo" "bar"])
(test-corups ["foo" "bar" "baz"])
(test-corups ["foo" "bar" "baz" "quux"]))
......@@ -297,13 +297,6 @@
[_ collection options]
(card-query false collection options))
(defn- bit->boolean
"Coerce a bit returned by some MySQL/MariaDB versions in some situations to Boolean."
[v]
(if (number? v)
(not (zero? v))
v))
(defn- fully-parametrized-text?
"Decide if `text`, usually (a part of) a query, is fully parametrized given the parameter types
described by `template-tags` (usually the template tags of a native query).
......@@ -347,7 +340,7 @@
(defn- post-process-card-row [row]
(-> row
(dissoc :authority_level :icon :personal_owner_id :dataset_query)
(update :collection_preview bit->boolean)
(update :collection_preview api/bit->boolean)
(assoc :fully_parametrized (fully-parametrized-query? row))))
(defmethod post-process-collection-children :card
......
......@@ -538,3 +538,10 @@
~@more
(catch Throwable e#
(~raise e#))))
(defn bit->boolean
"Coerce a bit returned by some MySQL/MariaDB versions in some situations to Boolean."
[v]
(if (number? v)
(not (zero? v))
v))
......@@ -443,33 +443,29 @@
(s/defn ^:private search
"Builds a search query that includes all of the searchable entities and runs it"
[search-ctx :- SearchContext]
(letfn [(bit->boolean [v]
(if (number? v)
(not (zero? v))
v))]
(let [search-query (full-search-query search-ctx)
_ (log/tracef "Searching with query:\n%s" (u/pprint-to-str search-query))
reducible-results (db/reducible-query search-query :max-rows search-config/*db-max-results*)
xf (comp
(filter check-permissions-for-model)
;; MySQL returns `:bookmark` and `:archived` as `1` or `0` so convert those to boolean as needed
(map #(update % :bookmark bit->boolean))
(map #(update % :archived bit->boolean))
(map (partial scoring/score-and-result (:search-string search-ctx)))
(filter #(pos? (:score %))))
total-results (scoring/top-results reducible-results xf)]
;; We get to do this slicing and dicing with the result data because
;; the pagination of search is for UI improvement, not for performance.
;; We intend for the cardinality of the search results to be below the default max before this slicing occurs
{:total (count total-results)
:data (cond->> total-results
(some? (:offset-int search-ctx)) (drop (:offset-int search-ctx))
(some? (:limit-int search-ctx)) (take (:limit-int search-ctx)))
:available_models (query-model-set search-ctx)
:limit (:limit-int search-ctx)
:offset (:offset-int search-ctx)
:table_db_id (:table-db-id search-ctx)
:models (:models search-ctx)})))
(let [search-query (full-search-query search-ctx)
_ (log/tracef "Searching with query:\n%s" (u/pprint-to-str search-query))
reducible-results (db/reducible-query search-query :max-rows search-config/*db-max-results*)
xf (comp
(filter check-permissions-for-model)
;; MySQL returns `:bookmark` and `:archived` as `1` or `0` so convert those to boolean as needed
(map #(update % :bookmark api/bit->boolean))
(map #(update % :archived api/bit->boolean))
(map (partial scoring/score-and-result (:search-string search-ctx)))
(filter #(pos? (:score %))))
total-results (scoring/top-results reducible-results search-config/max-filtered-results xf)]
;; We get to do this slicing and dicing with the result data because
;; the pagination of search is for UI improvement, not for performance.
;; We intend for the cardinality of the search results to be below the default max before this slicing occurs
{:total (count total-results)
:data (cond->> total-results
(some? (:offset-int search-ctx)) (drop (:offset-int search-ctx))
(some? (:limit-int search-ctx)) (take (:limit-int search-ctx)))
:available_models (query-model-set search-ctx)
:limit (:limit-int search-ctx)
:offset (:offset-int search-ctx)
:table_db_id (:table-db-id search-ctx)
:models (:models search-ctx)}))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | Endpoint |
......
......@@ -81,31 +81,29 @@
{:is_match is-match
:text (tokens->string text-tokens (not is-match))})))))
(defn- text-score-with
"Scores a search result. Returns a map with the score and other info about the text match,
if there is one. If there is no match, the score is 0."
(defn- text-scores-with
"Scores a search result. Returns a vector of score maps, each containing `:weight`, `:score`, and other info about
the text match, if there is one. If there is no match, the score is 0."
[weighted-scorers query-tokens search-result]
(let [total-weight (reduce + (map :weight weighted-scorers))
scores (for [column (search-config/searchable-columns-for-model (:model search-result))
:let [matched-text (-> search-result
(get column)
(search-config/column->string (:model search-result) column))
match-tokens (some-> matched-text normalize tokenize)
score (and matched-text
(reduce (fn [tally f]
(+ tally
(f query-tokens match-tokens)))
0
(map :scorer weighted-scorers)))]
:when (and matched-text
(pos? score))]
{:score (/ score total-weight)
:match matched-text
:match-context-thunk #(match-context query-tokens match-tokens)
:column column})]
;; TODO is pmap over search-result worth it?
(let [scores (for [column (search-config/searchable-columns-for-model (:model search-result))
{:keys [scorer name weight]
:as _ws} weighted-scorers
:let [matched-text (-> search-result
(get column)
(search-config/column->string (:model search-result) column))
match-tokens (some-> matched-text normalize tokenize)
raw-score (scorer query-tokens match-tokens)]
:when (and matched-text (pos? raw-score))]
{:score raw-score
:name (str "text-" name)
:weight weight
:match matched-text
:match-context-thunk #(match-context query-tokens match-tokens)
:column column})]
(if (seq scores)
(apply max-key :score scores)
{:score 0})))
(vec scores)
[{:score 0 :weight 0}])))
(defn- consecutivity-scorer
[query-tokens match-tokens]
......@@ -138,7 +136,7 @@
(count query-tokens)))
(defn fullness-scorer
"How much of the *result* is covered by the search query?"
"How much of the result is covered by the search query?"
[query-tokens match-tokens]
(let [match-token-count (count match-tokens)]
(if (zero? match-token-count)
......@@ -146,15 +144,36 @@
(/ (occurrences query-tokens match-tokens matches-in?)
match-token-count))))
(defn- prefix-counter
[query-string item-string]
(reduce
(fn [cnt [a b]]
(if (= a b) (inc cnt) (reduced cnt)))
0
(map vector query-string item-string)))
(defn- count-token-chars
"Tokens is a seq of strings, like [\"abc\" \"def\"]"
[tokens]
(reduce
(fn [cnt x] (+ cnt (count x)))
0
tokens))
(defn prefix-scorer
"How much does the search query match the beginning of the result? "
[query-tokens match-tokens]
(let [query (str/lower-case (str/join " " query-tokens))
match (str/lower-case (str/join " " match-tokens))]
(/ (prefix-counter query match)
(count-token-chars query-tokens))))
(def ^:private match-based-scorers
[{:scorer consecutivity-scorer
:weight 1}
{:scorer total-occurrences-scorer
:weight 1}
{:scorer fullness-scorer
:weight 1/2}
{:scorer exact-match-scorer
:weight 2}])
[{:scorer exact-match-scorer :name "exact-match" :weight 4}
{:scorer consecutivity-scorer :name "consecutivity" :weight 2}
{:scorer total-occurrences-scorer :name "total-occurrences" :weight 2}
{:scorer fullness-scorer :name "fullness" :weight 1}
{:scorer prefix-scorer :name "prefix" :weight 1}])
(def ^:private model->sort-position
(zipmap (reverse search-config/all-models) (range)))
......@@ -164,14 +183,13 @@
(/ (or (model->sort-position model) 0)
(count model->sort-position)))
(defn- text-score-with-match
(defn- text-scores-with-match
[raw-search-string result]
(if (seq raw-search-string)
(text-score-with match-based-scorers
(tokenize (normalize raw-search-string))
result)
{:score 0
:match ""}))
(text-scores-with match-based-scorers
(tokenize (normalize raw-search-string))
result)
[{:score 0 :weight 0}]))
(defn- pinned-score
[{:keys [model collection_position]}]
......@@ -209,31 +227,27 @@
(max (- stale-time days-ago) 0)
stale-time)))
(defn- compare-score-and-result
"Compare maps of scores and results. Must return -1, 0, or 1. The score is assumed to be a vector, and will be
compared in order."
[{score-1 :score} {score-2 :score}]
(compare score-1 score-2))
(defn- serialize
"Massage the raw result from the DB and match data into something more useful for the client"
[result {:keys [column match-context-thunk]} scores]
(let [{:keys [name display_name
collection_id collection_name collection_authority_level collection_app_id]} result]
[result all-scores relevant-scores]
(let [{:keys [name display_name collection_id collection_name collection_authority_level
collection_app_id]} result
matching-columns (into #{} (remove nil? (map :column relevant-scores)))
match-context-thunk (first (keep :match-context-thunk relevant-scores))]
(-> result
(assoc
:name (if (or (= column :name)
(nil? display_name))
name
display_name)
:context (when (and (not (search-config/displayed-columns column))
match-context-thunk)
:name (if (and (contains? matching-columns :display_name) display_name)
display_name
name)
:context (when (and match-context-thunk
(empty?
(remove matching-columns search-config/displayed-columns)))
(match-context-thunk))
:collection {:id collection_id
:name collection_name
:authority_level collection_authority_level
:app_id collection_app_id}
:scores scores)
:scores all-scores)
(update :dataset_query #(some-> % json/parse-string mbql.normalize/normalize))
(dissoc
:collection_id
......@@ -244,21 +258,11 @@
(defn weights-and-scores
"Default weights and scores for a given result."
[result]
[{:weight 2
:score (pinned-score result)
:name "pinned"}
{:weight 2
:score (bookmarked-score result)
:name "bookmarked"}
{:weight 3/2
:score (recency-score result)
:name "recency"}
{:weight 1
:score (dashboard-count-score result)
:name "dashboard"}
{:weight 1/2
:score (model-score result)
:name "model"}])
[{:weight 2 :score (pinned-score result) :name "pinned"}
{:weight 2 :score (bookmarked-score result) :name "bookmarked"}
{:weight 3/2 :score (recency-score result) :name "recency"}
{:weight 1 :score (dashboard-count-score result) :name "dashboard"}
{:weight 1/2 :score (model-score result) :name "model"}])
(defenterprise score-result
"Score a result, returning a collection of maps with score and weight. Should not include the text scoring, done
......@@ -271,31 +275,70 @@
[result]
(weights-and-scores result))
(defn- sum-weights [weights]
(reduce
(fn [acc {:keys [weight] :or {weight 0}}]
(+ acc weight))
0
weights))
(defn- compute-normalized-score [scores]
(let [weight-sum (sum-weights scores)]
(if (zero? weight-sum)
0
(let [score-sum (reduce
(fn [acc {:keys [weight score]
:or {weight 0 score 0}}]
(+ acc (* score weight)))
0
scores)]
(/ score-sum weight-sum)))))
(defn force-weight
"Reweight `scores` such that the sum of their weights equals `total`, and their proportions do not change."
[scores total]
(let [total-weight (sum-weights scores)
weight-calc-fn (if (contains? #{nil 0} total-weight)
(fn weight-calc-fn [_] 0)
(fn weight-calc-fn [weight] (* total (/ weight total-weight))))]
(mapv #(update % :weight weight-calc-fn) scores)))
(def ^:const text-scores-weight
"This is used to control the total weight of text-based scorers in [[score-and-result]]"
10)
(defn score-and-result
"Returns a map with the `:score` and `:result`."
([raw-search-string result]
(let [text-match (text-score-with-match raw-search-string result)
text-score {:score (:score text-match)
:weight 10
:name "text score"}
scores (conj (score-result result) text-score)]
;; Searches with a blank search string mean "show me everything, ranked";
;; see https://github.com/metabase/metabase/pull/15604 for archived search.
;; If the search string is non-blank, results with no text match have a score of zero.
(if (or (str/blank? raw-search-string)
(pos? (:score text-match)))
{:score (/ (reduce + (map (fn [{:keys [weight score]}] (* weight score)) scores))
(reduce + (map :weight scores)))
:result (serialize result text-match scores)}
{:score 0}))))
"Returns a map with the normalized, combined score from relevant-scores as `:score` and `:result`."
[raw-search-string result]
(let [text-matches (-> raw-search-string
(text-scores-with-match result)
(force-weight text-scores-weight))
all-scores (into (vec (score-result result)) text-matches)
relevant-scores (remove #(= 0 (:score %)) all-scores)
total-score (compute-normalized-score all-scores)]
;; Searches with a blank search string mean "show me everything, ranked";
;; see https://github.com/metabase/metabase/pull/15604 for archived search.
;; If the search string is non-blank, results with no text match have a score of zero.
(if (or (str/blank? raw-search-string)
(pos? (reduce (fn [acc {:keys [score] :or {score 0}}] (+ acc score))
0
text-matches)))
{:score total-score
:result (serialize result all-scores relevant-scores)}
{:score 0})))
(defn compare-score
"Compare maps of scores and results. Must return -1, 0, or 1. The score is assumed to be a vector, and will be
compared in order."
[{score-1 :score} {score-2 :score}]
(compare score-1 score-2))
(defn top-results
"Given a reducible collection (i.e., from `jdbc/reducible-query`) and a transforming function for it, applies the
transformation and returns a seq of the results sorted by score. The transforming function is expected to output
maps with `:score` and `:result` keys."
[reducible-results xf]
[reducible-results max-results xf]
(->> reducible-results
(transduce xf (u/sorted-take search-config/max-filtered-results compare-score-and-result))
;; Make it descending: high scores first
(transduce xf (u/sorted-take max-results compare-score))
rseq
(map :result)))
......@@ -134,7 +134,7 @@
(def ^:private remove-databases
"Remove DBs from the results, which is useful since test databases unrelated to this suite can pollute the results"
(partial filter #(not= (:model %) "database")))
(partial remove #(= (:model %) "database")))
(defn- process-raw-data [raw-data keep-database-id]
(for [result raw-data
......@@ -215,16 +215,16 @@
(search-request-data :crowberto :q "test collection"))))))
(testing "It limits matches properly"
(with-search-items-in-root-collection "test"
(is (= 2 (count (search-request-data :crowberto :q "test" :limit "2" :offset "0"))))))
(is (>= 2 (count (search-request-data :crowberto :q "test" :limit "2" :offset "0"))))))
(testing "It offsets matches properly"
(with-search-items-in-root-collection "test"
(is (<= 4 (count (search-request-data :crowberto :q "test" :limit "100" :offset "2"))))))
(testing "It offsets without limit properly"
(with-search-items-in-root-collection "test"
(is (= 5 (count (search-request-data :crowberto :q "test" :offset "2"))))))
(is (<= 5 (count (search-request-data :crowberto :q "test" :offset "2"))))))
(testing "It limits without offset properly"
(with-search-items-in-root-collection "test"
(is (= 2 (count (search-request-data :crowberto :q "test" :limit "2"))))))
(is (>= 2 (count (search-request-data :crowberto :q "test" :limit "2"))))))
(testing "It subsets matches for model"
(with-search-items-in-root-collection "test"
(is (= 0 (count (search-request-data :crowberto :q "test" :models "database"))))
......
......@@ -28,7 +28,8 @@
(defn- scorer->score
[scorer]
(comp :score
(partial #'scoring/text-score-with [{:weight 1 :scorer scorer}])))
first
(partial #'scoring/text-scores-with [{:weight 1 :scorer scorer}])))
(deftest ^:parallel consecutivity-scorer-test
(let [score (scorer->score #'scoring/consecutivity-scorer)]
......@@ -123,27 +124,41 @@
(score ["rasta" "the" "toucan"]
(result-row "Rasta the toucan"))))))
(deftest ^:parallel prefix-match-scorer-test
(let [score (scorer->score #'scoring/prefix-scorer)]
(is (= 5/9 (score ["Crowberto" "the" "toucan"]
(result-row "Crowberto el tucan"))))
(is (= 3/7
(score ["rasta" "the" "toucan"]
(result-row "Rasta el tucan"))))
(is (= 0
(score ["rasta" "the" "toucan"]
(result-row "Crowberto the toucan"))))))
(deftest ^:parallel top-results-test
(let [xf (map identity)]
(let [xf (map identity)
small 10
medium 20
large 200]
(testing "a non-full queue behaves normally"
(let [items (->> (range 10)
(let [items (->> (range small)
reverse ;; descending order
(map (fn [i]
{:score [2 2 i]
:result (str "item " i)})))]
(is (= (map :result items)
(scoring/top-results items xf)))))
(scoring/top-results items large xf)))))
(testing "a full queue only saves the top items"
(let [sorted-items (->> (+ 10 search-config/max-filtered-results)
(let [sorted-items (->> (+ small search-config/max-filtered-results)
range
reverse ;; descending order
(map (fn [i]
{:score [1 2 3 i]
:result (str "item " i)})))]
(is (= (->> sorted-items
(take search-config/max-filtered-results)
(take medium)
(map :result))
(scoring/top-results (shuffle sorted-items) xf)))))))
(scoring/top-results (shuffle sorted-items) 20 xf)))))))
(deftest ^:parallel match-context-test
(let [context #'scoring/match-context
......@@ -302,3 +317,19 @@
(is (nil? (-> {:name "dash" :model "dashboard"}
(#'scoring/serialize {} {})
:dataset_query)))))
(deftest force-weight-test
(is (= [{:weight 10}]
(scoring/force-weight [{:weight 1}] 10)))
(is (= [{:weight 5} {:weight 5}]
(scoring/force-weight [{:weight 1} {:weight 1}] 10)))
(is (= [{:weight 0} {:weight 10}]
(scoring/force-weight [{:weight 0} {:weight 1}] 10)))
(is (= 10 (count (scoring/force-weight (repeat 10 {:weight 1}) 10))))
(is (= #{[:weight 1]} (into #{} (first (scoring/force-weight (repeat 10 {:weight 1}) 10)))))
(is (= 100 (count (scoring/force-weight (repeat 100 {:weight 10}) 10))))
(is (= #{{:weight 1/10}} (into #{} (scoring/force-weight (repeat 100 {:weight 10}) 10)))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment