From a536c2d7bf24d111cace04abeedff5f33ac751d5 Mon Sep 17 00:00:00 2001 From: John Swanson <john.swanson@metabase.com> Date: Fri, 26 Apr 2024 15:47:05 -0700 Subject: [PATCH] Add `collection.effective_ancestors` to model search results (#41746) * Add `collection.effective_ancestors` to search Fixes #41909 The ultimate goal here is to be able to display a UI that showed the entire (effective) path to each collection. This was a bit more painful than I'd hoped. The primary issue is that we tied two things together: - scoring results - "serializing" results (really, partially serializing results, because we do more work to shape the data later) Why is this a problem? The `top-scorers` function took a transducer and returned the *serialized* top scorers. Later, when we need to operate on *all* the top results (in this case, to collect all the collection IDs and names that we know of), we're looking at the serialized results. In our case, we needed some data (collection location) that wasn't included after serialization. We could add it, of course, but it's frustrating to need to change the API of what data is *returned* from the endpoint for purely internal purposes (changing the data available during *processing*). I ended up pulling serialization out of the scoring function. At some point down the road, we should go ahead and move the `serialize` function out of the `search.scoring` namespace entirely - there's really no reason for it to belong there as far as I can tell. --- src/metabase/api/search.clj | 59 ++++++++++++++++++++++++--- src/metabase/search/config.clj | 3 ++ src/metabase/search/scoring.clj | 26 +++++++----- test/metabase/api/search_test.clj | 46 ++++++++++++++++++++- test/metabase/search/filter_test.clj | 1 + test/metabase/search/scoring_test.clj | 10 +++-- 6 files changed, 125 insertions(+), 20 deletions(-) diff --git a/src/metabase/api/search.clj b/src/metabase/api/search.clj index 9bbe1211745..7d9b87bd001 100644 --- a/src/metabase/api/search.clj +++ b/src/metabase/api/search.clj @@ -332,7 +332,8 @@ (filter (fn [[_k v]] (= v :text))) (map first) (remove #{:collection_authority_level :moderated_status - :initial_sync_status :pk_ref :location})) + :initial_sync_status :pk_ref :location + :collection_location})) case-clauses (as-> columns-to-search <> (map (fn [col] [:like [:lower col] match]) <>) (interleave <> (repeat [:inline 0])) @@ -442,6 +443,47 @@ :last_editor_common_name (get user-id->common-name last_editor_id))) results))) +(defn add-dataset-collection-hierarchy + "Adds `collection_effective_ancestors` to *datasets* in the search results." + [search-results] + (let [;; this helper function takes a search result (with `collection_id` and `collection_location`) and returns the + ;; effective location of the result. + result->loc (fn [{:keys [collection_id collection_location]}] + (:effective_location + (t2/hydrate + (if (nil? collection_id) + collection/root-collection + {:location collection_location}) + :effective_location))) + ;; a map of collection-ids to collection info + col-id->name&loc (into {} + (for [item search-results + :when (= (:model item) "dataset")] + [(:collection_id item) + {:name (:collection_name item) + :effective_location (result->loc item)}])) + ;; the set of all collection IDs where we *don't* know the collection name. For example, if `col-id->name&loc` + ;; contained `{1 {:effective_location "/2/" :name "Foo"}}`, we need to look up the name of collection `2`. + to-fetch (into #{} (comp (keep :effective_location) + (mapcat collection/location-path->ids) + ;; already have these names + (remove col-id->name&loc)) + (vals col-id->name&loc)) + ;; the complete map of collection IDs to names + id->name (merge (if (seq to-fetch) + (t2/select-pk->fn :name :model/Collection :id [:in to-fetch]) + {}) + (update-vals col-id->name&loc :name)) + annotate (fn [x] + (cond-> x + (= (:model x) "dataset") + (assoc :collection_effective_ancestors + (if-let [loc (result->loc x)] + (->> (collection/location-path->ids loc) + (map (fn [id] {:id id :name (id->name id)}))) + []))))] + (map annotate search-results))) + (mu/defn ^:private search "Builds a search query that includes all the searchable entities and runs it" [search-ctx :- SearchContext] @@ -467,8 +509,10 @@ (map #(update % :pk_ref json/parse-string)) (map (partial scoring/score-and-result (:search-string search-ctx))) (filter #(pos? (:score %)))) - total-results (hydrate-user-metadata - (scoring/top-results reducible-results search.config/max-filtered-results xf)) + total-results (cond->> (scoring/top-results reducible-results search.config/max-filtered-results xf) + true hydrate-user-metadata + (:model-ancestors? search-ctx) (add-dataset-collection-hierarchy) + true (map scoring/serialize)) add-perms-for-col (fn [item] (cond-> item (mi/instance-of? :model/Collection item) @@ -502,6 +546,7 @@ filter-items-in-personal-collection offset search-string + model-ancestors? table-db-id search-native-query verified]} :- [:map {:closed true} @@ -517,6 +562,7 @@ [:offset {:optional true} [:maybe ms/Int]] [:table-db-id {:optional true} [:maybe ms/PositiveInt]] [:search-native-query {:optional true} [:maybe boolean?]] + [:model-ancestors? {:optional true} [:maybe boolean?]] [:verified {:optional true} [:maybe true?]]]] :- SearchContext (when (some? verified) (premium-features/assert-has-any-features @@ -526,7 +572,8 @@ ctx (cond-> {:search-string search-string :current-user-perms @api/*current-user-permissions-set* :archived? (boolean archived) - :models models} + :models models + :model-ancestors? (boolean model-ancestors?)} (some? created-at) (assoc :created-at created-at) (seq created-by) (assoc :created-by created-by) (some? filter-items-in-personal-collection) (assoc :filter-items-in-personal-collection filter-items-in-personal-collection) @@ -587,7 +634,7 @@ A search query that has both filters applied will only return models and cards." [q archived context created_at created_by table_db_id models last_edited_at last_edited_by - filter_items_in_personal_collection search_native_query verified] + filter_items_in_personal_collection model_ancestors search_native_query verified] {q [:maybe ms/NonBlankString] archived [:maybe :boolean] table_db_id [:maybe ms/PositiveInt] @@ -598,6 +645,7 @@ created_by [:maybe (ms/QueryVectorOf ms/PositiveInt)] last_edited_at [:maybe ms/NonBlankString] last_edited_by [:maybe (ms/QueryVectorOf ms/PositiveInt)] + model_ancestors [:maybe :boolean] search_native_query [:maybe true?] verified [:maybe true?]} (api/check-valid-page-params mw.offset-paging/*limit* mw.offset-paging/*offset*) @@ -617,6 +665,7 @@ :models models-set :limit mw.offset-paging/*limit* :offset mw.offset-paging/*offset* + :model-ancestors? model_ancestors :search-native-query search_native_query :verified verified})) duration (- (System/currentTimeMillis) start-time) diff --git a/src/metabase/search/config.clj b/src/metabase/search/config.clj index 2f3a741f3d5..d7f60bc8b54 100644 --- a/src/metabase/search/config.clj +++ b/src/metabase/search/config.clj @@ -99,6 +99,7 @@ [:map {:closed true} [:search-string [:maybe ms/NonBlankString]] [:archived? :boolean] + [:model-ancestors? :boolean] [:current-user-perms [:set perms.u/PathSchema]] [:models [:set SearchableModel]] [:filter-items-in-personal-collection {:optional true} [:enum "only" "exclude"]] @@ -138,6 +139,7 @@ :collection_id :integer :collection_name :text :collection_type :text + :collection_location :text :collection_authority_level :text ;; returned for Card and Dashboard :collection_position :integer @@ -270,6 +272,7 @@ [_] (conj default-columns :collection_id :collection_position :dataset_query :display :creator_id [:collection.name :collection_name] + [:collection.location :collection_location] [:collection.authority_level :collection_authority_level] bookmark-col dashboardcard-count-col)) diff --git a/src/metabase/search/scoring.clj b/src/metabase/search/scoring.clj index e8e2a798923..111316c9a57 100644 --- a/src/metabase/search/scoring.clj +++ b/src/metabase/search/scoring.clj @@ -301,12 +301,12 @@ (max (- stale-time days-ago) 0) stale-time))) -(defn- serialize +(defn serialize "Massage the raw result from the DB and match data into something more useful for the client" - [result all-scores relevant-scores] - (let [{:keys [name display_name collection_id collection_name collection_authority_level collection_type]} result - matching-columns (into #{} (remove nil? (map :column relevant-scores))) - match-context-thunk (first (keep :match-context-thunk relevant-scores))] + [{:as result :keys [all-scores relevant-scores name display_name collection_id collection_name + collection_authority_level collection_type collection_effective_ancestors]}] + (let [matching-columns (into #{} (remove nil? (map :column relevant-scores))) + match-context-thunk (first (keep :match-context-thunk relevant-scores))] (-> result (assoc :name (if (and (contains? matching-columns :display_name) display_name) @@ -316,14 +316,20 @@ (empty? (remove matching-columns search.config/displayed-columns))) (match-context-thunk)) - :collection {:id collection_id - :name collection_name - :authority_level collection_authority_level - :type collection_type} + :collection (merge {:id collection_id + :name collection_name + :authority_level collection_authority_level + :type collection_type} + (when collection_effective_ancestors + {:effective_ancestors collection_effective_ancestors})) :scores all-scores) (update :dataset_query #(some-> % json/parse-string mbql.normalize/normalize)) (dissoc + :all-scores + :relevant-scores + :collection_effective_ancestors :collection_id + :collection_location :collection_name :collection_type :display_name)))) @@ -397,7 +403,7 @@ 0 text-matches))) {:score total-score - :result (serialize result all-scores relevant-scores)} + :result (assoc result :all-scores all-scores :relevant-scores relevant-scores)} {:score 0}))) (defn compare-score diff --git a/test/metabase/api/search_test.clj b/test/metabase/api/search_test.clj index 32f5bc6a624..a2b84d4540a 100644 --- a/test/metabase/api/search_test.clj +++ b/test/metabase/api/search_test.clj @@ -141,7 +141,10 @@ (defn- default-results-with-collection [] (on-search-types #{"dashboard" "pulse" "card" "dataset" "action"} - #(assoc % :collection {:id true, :name (if (= (:model %) "action") nil true) :authority_level nil :type nil}) + #(assoc % :collection {:id true, + :name (if (= (:model %) "action") nil true) + :authority_level nil + :type nil}) (default-search-results))) (defn- do-with-search-items [search-string in-root-collection? f] @@ -919,6 +922,7 @@ :archived? false :models search.config/all-models :current-user-perms #{"/"} + :model-ancestors? false :limit-int 100}))] ;; warm it up, in case the DB call depends on the order of test execution and it needs to ;; do some initialization @@ -1658,3 +1662,43 @@ (->> (mt/user-http-request :lucky :get 200 "search" :archived true :q "test") :data (map :name))))))) + +(deftest model-ancestors-gets-ancestor-collections + (testing "Collection names are correct" + (mt/with-temp [Collection {top-col-id :id} {:name "top level col" :location "/"} + Collection {mid-col-id :id} {:name "middle level col" :location (str "/" top-col-id "/")} + Card {leaf-card-id :id} {:type :model :collection_id mid-col-id :name "leaf model"} + Card {top-card-id :id} {:type :model :collection_id top-col-id :name "top model"}] + (is (= #{[leaf-card-id [{:name "top level col" :id top-col-id}]] + [top-card-id []]} + (->> (mt/user-http-request :rasta :get 200 "search" :model_ancestors true :q "model" :models ["dataset"]) + :data + (map (juxt :id #(get-in % [:collection :effective_ancestors]))) + (into #{})))))) + (testing "Models not in a collection work correctly" + (mt/with-temp [Card {card-id :id} {:type :model + :name "model" + :collection_id nil}] + (is (= #{[card-id []]} + (->> (mt/user-http-request :rasta :get 200 "search" :model_ancestors true :q "model" :models ["dataset"]) + :data + (map (juxt :id #(get-in % [:collection :effective_ancestors]))) + (into #{})))))) + (testing "Non-models don't get collection_ancestors" + (mt/with-temp [Card _ {:name "question" + :collection_id nil}] + (is (not (contains? (->> (mt/user-http-request :rasta :get 200 "search" :model_ancestors true :q "question" :models ["dataset" "card"]) + :data + (filter #(= (:name %) "question")) + first + :collection) + :effective_ancestors))))) + (testing "If `model_parents` is not passed, it doesn't get populated" + (mt/with-temp [Collection {top-col-id :id} {:name "top level col" :location "/"} + Collection {mid-col-id :id} {:name "middle level col" :location (str "/" top-col-id "/")} + Card _ {:type :model :collection_id mid-col-id :name "leaf model"} + Card _ {:type :model :collection_id top-col-id :name "top model"}] + (is (= [nil nil] + (->> (mt/user-http-request :rasta :get 200 "search" :model_ancestors false :q "model" :models ["dataset"]) + :data + (map #(get-in % [:collection :effective_ancestors])))))))) diff --git a/test/metabase/search/filter_test.clj b/test/metabase/search/filter_test.clj index ac449840ae4..70bbb3ba3be 100644 --- a/test/metabase/search/filter_test.clj +++ b/test/metabase/search/filter_test.clj @@ -11,6 +11,7 @@ {:search-string nil :archived? false :models search.config/all-models + :model-ancestors? false :current-user-perms #{"/"}}) (deftest ^:parallel ->applicable-models-test diff --git a/test/metabase/search/scoring_test.clj b/test/metabase/search/scoring_test.clj index aaf168aef4d..af80593f174 100644 --- a/test/metabase/search/scoring_test.clj +++ b/test/metabase/search/scoring_test.clj @@ -280,11 +280,13 @@ :database 1} result {:name "card" :model "card" - :dataset_query (json/generate-string query)}] - (is (= query (-> result (#'scoring/serialize {} {}) :dataset_query))))) + :dataset_query (json/generate-string query) + :all-scores {} + :relevant-scores {}}] + (is (= query (-> result scoring/serialize :dataset_query))))) (testing "Doesn't error on other models without a query" - (is (nil? (-> {:name "dash" :model "dashboard"} - (#'scoring/serialize {} {}) + (is (nil? (-> {:name "dash" :model "dashboard" :all-scores {} :relevant-scores {}} + (#'scoring/serialize) :dataset_query))))) (deftest ^:parallel force-weight-test -- GitLab