Skip to content
Snippets Groups Projects
Unverified Commit 65e3ba19 authored by Tim Macdonald's avatar Tim Macdonald Committed by GitHub
Browse files

Search: Use number of occurrences in a dashboard (#14946)

* Use dashboard count in search scoring

[Fixes #14945]

* Fix sorting priority bug

(thanks, Dan!)

* Show (hidden) scores in search results for debugging purposes

* Bump memory limit for SQL Server on CI

c.f. https://metaboat.slack.com/archives/C5XHN8GLW/p1614272426000400
parent ca429576
Branches
Tags
No related merge requests found
......@@ -174,6 +174,7 @@ executors:
environment:
ACCEPT_EULA: Y
SA_PASSWORD: 'P@ssw0rd'
MSSQL_MEMORY_LIMIT_MB: 1024
fe-mongo-4:
working_directory: /home/circleci/metabase/metabase/
......
......@@ -103,6 +103,19 @@ const Title = styled("h3")`
margin-bottom: 4px;
`;
function Score({ score }) {
return (
<pre className="hide search-score">
{`\n\n
Pinned: ${score[0]}
Dashboard: ${score[1]}
Text: ${score[2]}
Model: ${score[3]}
Raw: ${score && score.join(", ")}`}
</pre>
);
}
// Generally these should be at the bottom of the list I'd hope
function CollectionResult({ collection }) {
return (
......@@ -110,6 +123,7 @@ function CollectionResult({ collection }) {
<Flex align="center">
<ItemIcon item={collection} />
<Title>{collection.name}</Title>
<Score score={collection.score} />
</Flex>
</ResultLink>
);
......@@ -148,6 +162,7 @@ function DashboardResult({ dashboard, options }) {
<Box>
<Title>{dashboard.name}</Title>
{formatCollection(dashboard.getCollection())}
<Score score={dashboard.score} />
</Box>
</Flex>
{formatContext(dashboard.context, options.compact)}
......@@ -163,6 +178,7 @@ function QuestionResult({ question, options }) {
<Box>
<Title>{question.name}</Title>
{formatCollection(question.getCollection())}
<Score score={question.score} />
</Box>
{question.description && (
<Box ml="auto">
......@@ -184,6 +200,7 @@ function DefaultResult({ result, options }) {
<ItemIcon item={result} />
<Title>{result.name}</Title>
{formatCollection(result.getCollection())}
<Score score={result.score} />
</Flex>
{formatContext(result.context, options.compact)}
</ResultLink>
......
......@@ -73,6 +73,7 @@
:collection_position :integer
:favorite :boolean
;; returned for Card only
:dashboardcard_count :integer
:dataset_query :text
;; returned for Metric and Segment
:table_id :integer
......@@ -302,22 +303,20 @@
(if (number? v)
(not (zero? v))
v))]
(let [search-query {:union-all (for [model search-config/searchable-models
:let [query (search-query-for-model model search-ctx)]
:when (seq query)]
query)}
_ (log/tracef "Searching with query:\n%s" (u/pprint-to-str search-query))
results (db/reducible-query search-query :max-rows search-config/db-max-results)
xf (comp
(filter check-permissions-for-model)
;; MySQL returns `:favorite` and `:archived` as `1` or `0` so convert those to boolean as needed
(map #(update % :favorite bit->boolean))
(map #(update % :archived bit->boolean))
(map (partial scoring/score-and-result (:search-string search-ctx)))
(filter some?))]
(->> results
(transduce xf scoring/accumulate-top-results)
(map :result)))))
(let [search-query {:union-all (for [model search-config/searchable-models
:let [query (search-query-for-model model search-ctx)]
:when (seq query)]
query)}
_ (log/tracef "Searching with query:\n%s" (u/pprint-to-str search-query))
reducible-results (db/reducible-query search-query :max-rows search-config/db-max-results)
xf (comp
(filter check-permissions-for-model)
;; MySQL returns `:favorite` and `:archived` as `1` or `0` so convert those to boolean as needed
(map #(update % :favorite bit->boolean))
(map #(update % :archived bit->boolean))
(map (partial scoring/score-and-result (:search-string search-ctx)))
(filter some?))]
(scoring/top-results reducible-results xf))))
;;; +----------------------------------------------------------------------------------------------------------------+
......
......@@ -61,6 +61,13 @@
"Case statement to return boolean values of `:favorite` for Card and Dashboard."
[(hsql/call :case [:not= :fave.id nil] true :else false) :favorite])
(def ^:private dashboardcard-count-col
"Subselect to get the count of associated DashboardCards"
[{:select [:%count.*]
:from [:report_dashboardcard]
:where [:= :report_dashboardcard.card_id :card.id]}
:dashboardcard_count])
(def ^:private table-columns
"Columns containing information about the Table this model references. Returned for Metrics and Segments."
[:table_id
......@@ -77,7 +84,7 @@
(defmethod columns-for-model (class Card)
[_]
(conj default-columns :collection_id :collection_position [:collection.name :collection_name] :dataset_query
favorite-col))
favorite-col dashboardcard-count-col))
(defmethod columns-for-model (class Dashboard)
[_]
......
......@@ -126,7 +126,8 @@
(def ^:private model->sort-position
(into {} (map-indexed (fn [i model]
[(str/lower-case (name model)) i])
search-config/searchable-models)))
;; Reverse so that they're in descending order
(reverse search-config/searchable-models))))
(defn- text-score-with-match
[query-string result]
......@@ -137,10 +138,16 @@
(defn- pinned-score
[{pos :collection_position}]
;; low is better (top of the list), but nil or 0 should be at the bottom
(if (or (nil? pos)
(zero? pos))
0
(/ -1 pos)))
(/ 1 pos)))
(defn- dashboard-count-score
[{:keys [dashboardcard_count]}]
;; higher is better; nil should count as 0
(or dashboardcard_count 0))
(defn- compare-score-and-result
"Compare maps of scores and results. Must return -1, 0, or 1. The score is assumed to be a vector, and will be
......@@ -150,7 +157,7 @@
(defn- serialize
"Massage the raw result from the DB and match data into something more useful for the client"
[{:keys [result column match-context-thunk]}]
[{:keys [result column match-context-thunk]} score]
(let [{:keys [name display_name
collection_id collection_name]} result]
(-> result
......@@ -162,7 +169,8 @@
:context (when-not (search-config/displayed-columns column)
(match-context-thunk))
:collection {:id collection_id
:name collection_name})
:name collection_name}
:score score)
(dissoc
:collection_id
:collection_name
......@@ -171,12 +179,12 @@
(defn- combined-score
[{:keys [text-score result]}]
[(pinned-score result)
(- text-score)
(model->sort-position (:model result))
(:name result)])
(dashboard-count-score result)
text-score
(model->sort-position (:model result))])
(defn accumulate-top-results
"Accumulator that saves the top n (defined by `search-config/max-filtered-results`) sent to it"
(defn- accumulate-top-results
"Accumulator that saves the top n (defined by `search-config/max-filtered-results`) items sent to it"
([] (PriorityQueue. search-config/max-filtered-results compare-score-and-result))
([^PriorityQueue q]
(loop [acc []]
......@@ -198,5 +206,17 @@
"Returns a map with the `:score` and `:result`—or nil. The score is a vector of comparable things in priority order."
[query-string result]
(when-let [hit (text-score-with-match query-string result)]
{:score (combined-score hit)
:result (serialize hit)}))
(let [score (combined-score hit)]
{:score score
:result (serialize hit score)})))
(defn top-results
"Given a reducible collection (i.e., from `jdbc/reducible-query`) and a transforming function for it, applies the
transformation and returns a seq of the results sorted by score. The transforming function is expected to output
maps with `:score` and `:result` keys."
[reducible-results xf]
(->> reducible-results
(transduce xf accumulate-top-results)
;; Make it descending: high scores first
reverse
(map :result)))
......@@ -19,6 +19,7 @@
:collection {:id false :name nil}
:collection_position nil
:context nil
:dashboardcard_count nil
:favorite nil
:table_id false
:database_id false
......@@ -37,25 +38,29 @@
:id (mt/id :checkins))))
(defn- sorted-results [results]
(sort-by (juxt (comp (var-get #'metabase.search.scoring/model->sort-position) :model) :name) results))
(->> results
(sort-by (juxt (comp (var-get #'metabase.search.scoring/model->sort-position) :model)))
reverse))
(defn- make-result
[name & kvs]
(merge
default-search-row
{:name name}
(apply array-map kvs)))
(defn- default-search-results []
(letfn [(make-result [name & kvs]
(merge
default-search-row
{:name name}
(apply array-map kvs)))]
(sorted-results
[(make-result "dashboard test dashboard", :model "dashboard", :favorite false)
(make-result "collection test collection", :model "collection", :collection {:id true, :name true})
(make-result "card test card", :model "card", :favorite false, :dataset_query "{}")
(make-result "pulse test pulse", :model "pulse", :archived nil)
(merge
(make-result "metric test metric", :model "metric", :description "Lookin' for a blueberry")
(table-search-results))
(merge
(make-result "segment test segment", :model "segment", :description "Lookin' for a blueberry")
(table-search-results))])))
(sorted-results
[(make-result "dashboard test dashboard", :model "dashboard", :favorite false)
(make-result "collection test collection", :model "collection", :collection {:id true, :name true})
(make-result "card test card", :model "card", :favorite false, :dataset_query "{}", :dashboardcard_count 0)
(make-result "pulse test pulse", :model "pulse", :archived nil)
(merge
(make-result "metric test metric", :model "metric", :description "Lookin' for a blueberry")
(table-search-results))
(merge
(make-result "segment test segment", :model "segment", :description "Lookin' for a blueberry")
(table-search-results))]))
(defn- default-metric-segment-results []
(filter #(contains? #{"metric" "segment"} (:model %)) (default-search-results)))
......@@ -78,7 +83,7 @@
(defn- do-with-search-items [search-string in-root-collection? f]
(let [data-map (fn [instance-name]
{:name (format instance-name search-string),})
{:name (format instance-name search-string)})
coll-data-map (fn [instance-name collection]
(merge (data-map instance-name)
(when-not in-root-collection?
......@@ -108,7 +113,7 @@
Databases causing the tests to fail."
mt/id)
(defn- search-request [user-kwd & params]
(defn- search-request* [xf user-kwd & params]
(let [raw-results (apply (partial mt/user-http-request user-kwd) :get 200 "search" params)
keep-database-id (if (fn? *search-request-results-database-id*)
(*search-request-results-database-id*)
......@@ -116,13 +121,23 @@
(if (:error raw-results)
raw-results
(vec
(sorted-results
(xf
(for [result raw-results
;; filter out any results not from the usual test data DB (e.g. results from other drivers)
:when (contains? #{keep-database-id nil} (:database_id result))]
(-> result
mt/boolean-ids-and-timestamps
(update-in [:collection :name] #(some-> % string?)))))))))
(update-in [:collection :name] #(some-> % string?))
;; `:score` is just used for debugging and would be a pain to match against
(dissoc :score))))))))
(defn- search-request
[& args]
(apply search-request* sorted-results args))
(defn- unsorted-search-request
[& args]
(apply search-request* identity args))
(deftest basic-test
(testing "Basic search, should find 1 of each entity type, all items in the root collection"
......@@ -140,6 +155,31 @@
(is (= (default-search-results)
(search-request :crowberto :q "test")))))))
(def ^:private dashboard-count-results
(letfn [(make-card [dashboard-count]
(make-result (str "dashboard-count " dashboard-count) :dashboardcard_count dashboard-count,
:model "card", :favorite false, :dataset_query "{}"))]
[(make-card 5)
(make-card 3)
(make-card 0)]))
(deftest dashboard-count-test
(testing "It sorts by dashboard count"
(mt/with-temp* [Card [{card-id-3 :id} {:name "dashboard-count 3"}]
Card [{card-id-5 :id} {:name "dashboard-count 5"}]
Card [{card-id-0 :id} {:name "dashboard-count 0"}]
Dashboard [{dashboard-id :id}]
DashboardCard [_ {:card_id card-id-3, :dashboard_id dashboard-id}]
DashboardCard [_ {:card_id card-id-3, :dashboard_id dashboard-id}]
DashboardCard [_ {:card_id card-id-3, :dashboard_id dashboard-id}]
DashboardCard [_ {:card_id card-id-5, :dashboard_id dashboard-id}]
DashboardCard [_ {:card_id card-id-5, :dashboard_id dashboard-id}]
DashboardCard [_ {:card_id card-id-5, :dashboard_id dashboard-id}]
DashboardCard [_ {:card_id card-id-5, :dashboard_id dashboard-id}]
DashboardCard [_ {:card_id card-id-5, :dashboard_id dashboard-id}]]
(is (= dashboard-count-results
(unsorted-search-request :rasta :q "dashboard-count" ))))))
(deftest permissions-test
(testing (str "Ensure that users without perms for the root collection don't get results NOTE: Metrics and segments "
"don't have collections, so they'll be returned")
......@@ -165,11 +205,12 @@
PermissionsGroupMembership [_ {:user_id (mt/user->id :rasta), :group_id (u/the-id group)}]]
(perms/grant-collection-read-permissions! group (u/the-id collection))
(is (= (sorted-results
(into
(default-results-with-collection)
(map #(merge default-search-row % (table-search-results))
[{:name "metric test2 metric", :description "Lookin' for a blueberry", :model "metric"}
{:name "segment test2 segment", :description "Lookin' for a blueberry", :model "segment"}])))
(reverse ;; This reverse is hokey; it's because the test2 results happen to come first in the API response
(into
(default-results-with-collection)
(map #(merge default-search-row % (table-search-results))
[{:name "metric test2 metric", :description "Lookin' for a blueberry", :model "metric"}
{:name "segment test2 segment", :description "Lookin' for a blueberry", :model "segment"}]))))
(search-request :rasta :q "test"))))))))
(testing (str "Users with root collection permissions should be able to search root collection data long with "
......@@ -182,11 +223,12 @@
(perms/grant-permissions! group (perms/collection-read-path {:metabase.models.collection.root/is-root? true}))
(perms/grant-collection-read-permissions! group collection)
(is (= (sorted-results
(into
(default-results-with-collection)
(for [row (default-search-results)
:when (not= "collection" (:model row))]
(update row :name #(str/replace % "test" "test2")))))
(reverse
(into
(default-results-with-collection)
(for [row (default-search-results)
:when (not= "collection" (:model row))]
(update row :name #(str/replace % "test" "test2"))))))
(search-request :rasta :q "test"))))))))
(testing "Users with access to multiple collections should see results from all collections they have access to"
......@@ -197,10 +239,11 @@
(perms/grant-collection-read-permissions! group (u/the-id coll-1))
(perms/grant-collection-read-permissions! group (u/the-id coll-2))
(is (= (sorted-results
(into
(default-results-with-collection)
(map (fn [row] (update row :name #(str/replace % "test" "test2")))
(default-results-with-collection))))
(reverse
(into
(default-results-with-collection)
(map (fn [row] (update row :name #(str/replace % "test" "test2")))
(default-results-with-collection)))))
(search-request :rasta :q "test")))))))
(testing "User should only see results in the collection they have access to"
......@@ -211,11 +254,12 @@
PermissionsGroupMembership [_ {:user_id (mt/user->id :rasta), :group_id (u/the-id group)}]]
(perms/grant-collection-read-permissions! group (u/the-id coll-1))
(is (= (sorted-results
(into
(default-results-with-collection)
(map #(merge default-search-row % (table-search-results))
[{:name "metric test2 metric", :description "Lookin' for a blueberry", :model "metric"}
{:name "segment test2 segment", :description "Lookin' for a blueberry", :model "segment"}])))
(reverse
(into
(default-results-with-collection)
(map #(merge default-search-row % (table-search-results))
[{:name "metric test2 metric", :description "Lookin' for a blueberry", :model "metric"}
{:name "segment test2 segment", :description "Lookin' for a blueberry", :model "segment"}]))))
(search-request :rasta :q "test"))))))))
(testing "Metrics on tables for which the user does not have access to should not show up in results"
......
......@@ -103,22 +103,27 @@
(score ["rasta" "the" "toucan"]
(result-row "Rasta the toucan"))))))
(deftest accumulate-top-results-test
(deftest top-results-test
(let [xf (map identity)]
(testing "a non-full queue behaves normally"
(let [items (map (fn [i]
{:score [2 2 i]
:result (str "item " i)})
(range 10))]
(is (= items
(transduce xf search/accumulate-top-results items)))))
(let [items (->> (range 10)
reverse ;; descending order
(map (fn [i]
{:score [2 2 i]
:result (str "item " i)})))]
(is (= (map :result items)
(search/top-results items xf)))))
(testing "a full queue only saves the top items"
(let [sorted-items (map (fn [i]
{:score [1 2 3 i]
:result (str "item " i)})
(range (+ 10 search-config/max-filtered-results)))]
(is (= (drop 10 sorted-items)
(transduce xf search/accumulate-top-results (shuffle sorted-items))))))))
(let [sorted-items (->> (+ 10 search-config/max-filtered-results)
range
reverse ;; descending order
(map (fn [i]
{:score [1 2 3 i]
:result (str "item " i)})))]
(is (= (->> sorted-items
(take search-config/max-filtered-results)
(map :result))
(search/top-results (shuffle sorted-items) xf)))))))
(deftest match-context-test
(let [context #'search/match-context
......@@ -163,11 +168,13 @@
(let [score #'search/pinned-score
item (fn [collection-position] {:collection_position collection-position})]
(testing "it provides a sortable score"
(is (= [1 2 3 0 nil]
(is (= [1 2 3 nil 0]
(->> [(item 0)
;; nil and 0 could theoretically be in either order, but it's a stable sort, so this is fine
(item nil)
(item 3)
(item 1)
(item 2)]
(sort-by score)
reverse
(map :collection_position)))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment