Skip to content
Snippets Groups Projects
Unverified Commit 937c542b authored by Tim Macdonald's avatar Tim Macdonald Committed by GitHub
Browse files

Blended score for sorting (#15009)

* Combine search scores into one weighted number

* Use richer representation of scores

Should make the FE debugging bits more robust
parent 28ce196d
No related branches found
No related tags found
No related merge requests found
......@@ -109,17 +109,9 @@ const Title = styled("h3")`
margin-bottom: 4px;
`;
function Score({ score }) {
function Score({ scores }) {
return (
<pre className="hide search-score">
{`\n\n
Pinned: ${score[0]}
Dashboard: ${score[1]}
Recency: ${score[2]}
Text: ${score[3]}
Model: ${score[4]}
Raw: ${score && score.join(", ")}`}
</pre>
<pre className="hide search-score">{JSON.stringify(scores, null, 2)}</pre>
);
}
......@@ -130,7 +122,7 @@ function CollectionResult({ collection }) {
<Flex align="center">
<ItemIcon item={collection} />
<Title>{collection.name}</Title>
<Score score={collection.score} />
<Score scores={collection.scores} />
</Flex>
</ResultLink>
);
......@@ -169,7 +161,7 @@ function DashboardResult({ dashboard, options }) {
<Box>
<Title>{dashboard.name}</Title>
{formatCollection(dashboard.getCollection())}
<Score score={dashboard.score} />
<Score scores={dashboard.scores} />
</Box>
</Flex>
{formatContext(dashboard.context, options.compact)}
......@@ -185,7 +177,7 @@ function QuestionResult({ question, options }) {
<Box>
<Title>{question.name}</Title>
{formatCollection(question.getCollection())}
<Score score={question.score} />
<Score scores={question.scores} />
</Box>
{question.description && (
<Box ml="auto">
......@@ -208,7 +200,7 @@ function DefaultResult({ result, options }) {
<Box>
<Title>{result.name}</Title>
{formatCollection(result.getCollection())}
<Score score={result.score} />
<Score scores={result.scores} />
</Box>
</Flex>
{formatContext(result.context, options.compact)}
......
......@@ -19,6 +19,10 @@
bonus for results newer than this (scaled to just how recent they are). c.f. `search.scoring/recency-score`"
180)
(def ^:const dashboard-count-ceiling
"Results in more dashboards than this are all considered to be equally popular."
50)
(def searchable-models
"Models that can be searched. The order of this list also influences the order of the results: items earlier in the
list will be ranked higher."
......
......@@ -76,6 +76,10 @@
:text (str/join " "
(map :text matches-or-misses-map))}))))
(def ^:const text-score-max
"The maximum text score that could be achieved without normalization. This value is then used to normalize it down to the interval [0, 1]"
4)
(defn- text-score-with
[scoring-fns query-tokens search-result]
(let [scores (for [column (search-config/searchable-columns-for-model (search-config/model-name->class (:model search-result)))
......@@ -87,7 +91,7 @@
match-tokens
scoring-fns))]
:when (> score 0)]
{:text-score score
{:text-score (/ score text-score-max)
:match matched-text
:match-context-thunk #(match-context query-tokens match-tokens)
:column column
......@@ -120,9 +124,10 @@
(comp (partial * factor) scorer))
(def ^:private match-based-scorers
;; If the below is modified, be sure to update `text-score-max`!
[consecutivity-scorer
total-occurrences-scorer
(weigh-by 1.5 exact-match-scorer)])
(weigh-by 2 exact-match-scorer)])
(def ^:private model->sort-position
(into {} (map-indexed (fn [i model]
......@@ -130,11 +135,16 @@
;; Reverse so that they're in descending order
(reverse search-config/searchable-models))))
(defn- model-score
[{:keys [model]}]
(/ (or (model->sort-position model) 0)
(count model->sort-position)))
(defn- text-score-with-match
[query-string result]
(when (seq query-string)
[raw-search-string result]
(when (seq raw-search-string)
(text-score-with match-based-scorers
(tokenize (normalize query-string))
(tokenize (normalize raw-search-string))
result)))
(defn- pinned-score
......@@ -147,8 +157,9 @@
(defn- dashboard-count-score
[{:keys [dashboardcard_count]}]
;; higher is better; nil should count as 0
(or dashboardcard_count 0))
(min (/ (or dashboardcard_count 0)
search-config/dashboard-count-ceiling)
1))
(defn- recency-score
[{:keys [updated_at]}]
......@@ -170,7 +181,7 @@
(defn- serialize
"Massage the raw result from the DB and match data into something more useful for the client"
[{:keys [result column match-context-thunk]} score]
[{:keys [result column match-context-thunk]} scores]
(let [{:keys [name display_name
collection_id collection_name]} result]
(-> result
......@@ -183,19 +194,36 @@
(match-context-thunk))
:collection {:id collection_id
:name collection_name}
:score score)
:scores scores)
(dissoc
:collection_id
:collection_name
:display_name))))
(defn- combined-score
(defn- weights-and-scores
[{:keys [text-score result]}]
[(pinned-score result)
(dashboard-count-score result)
(recency-score result)
text-score
(model->sort-position (:model result))])
[{:weight 10
:score text-score
:name "text"}
{:weight 2
:score (pinned-score result)
:name "pinned"}
{:weight 3/2
:score (recency-score result)
:name "recency"}
{:weight 1
:score (dashboard-count-score result)
:name "dashboard"}
{:weight 1/2
:score (model-score result)
:name "model"}])
(defn- weighted-scores
[hit]
(->> hit
weights-and-scores
(map (fn [{:keys [weight score] :as composite-score}]
(assoc composite-score :weighted-score (* weight score))))))
(defn- accumulate-top-results
"Accumulator that saves the top n (defined by `search-config/max-filtered-results`) items sent to it"
......@@ -218,11 +246,11 @@
(defn score-and-result
"Returns a map with the `:score` and `:result`—or nil. The score is a vector of comparable things in priority order."
[query-string result]
(when-let [hit (text-score-with-match query-string result)]
(let [score (combined-score hit)]
{:score score
:result (serialize hit score)})))
[raw-search-string result]
(when-let [hit (text-score-with-match raw-search-string result)]
(let [scores (weighted-scores hit)]
{:score (reduce + (map :weighted-score scores))
:result (serialize hit scores)})))
(defn top-results
"Given a reducible collection (i.e., from `jdbc/reducible-query`) and a transforming function for it, applies the
......
......@@ -129,8 +129,8 @@
(-> result
mt/boolean-ids-and-timestamps
(update-in [:collection :name] #(some-> % string?))
;; `:score` is just used for debugging and would be a pain to match against.
(dissoc :score))))))))
;; `:scores` is just used for debugging and would be a pain to match against.
(dissoc :scores))))))))
(defn- search-request
[& args]
......
......@@ -26,7 +26,8 @@
(defn scorer->score
[scorer]
(comp :text-score
(comp (fn [s] (when s (* s search/text-score-max)))
:text-score
(partial #'search/text-score-with [scorer])))
(deftest consecutivity-scorer-test
......@@ -204,3 +205,26 @@
(item 4 (days-ago stale))]
(sort-by score)
(map :id))))))))
(deftest combined-test
(let [search-string "custom expression examples"
labeled-results {:a {:name "custom expression examples" :model "dashboard"}
:b {:name "examples of custom expressions" :model "dashboard"}
:c {:name "customer success stories"
:dashboardcard_count 50
:updated_at (t/offset-date-time)
:collection_position 1
:model "dashboard"}
:d {:name "customer examples of bad sorting" :model "dashboard"}}
{:keys [a b c d]} labeled-results]
(is (= (map :name [a ; exact text match
b ; good text match
c ; weak text match, but awesome other stuff
d]) ; middling text match, no other signal
(->> labeled-results
vals
(map (partial search/score-and-result search-string))
(sort-by :score)
reverse
(map :result)
(map :name))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment