From defd5cee28ca32212b614157737ddac477526c9c Mon Sep 17 00:00:00 2001
From: John Swanson <john.swanson@metabase.com>
Date: Thu, 21 Mar 2024 09:54:14 -0700
Subject: [PATCH] /api/collection/:id/items gets `here` and `below` (#40203)

* /api/collection/:id/items gets `here` and `below`

During postprocessing of `collection` children retrieved by
`/api/collection/:id/items`, annotate each of the children with `below`
and `here` keys representing the presence of questions, models, or
collections at this level of the hierarchy or below it.

Co-authored-by: Noah Moss <32746338+noahmoss@users.noreply.github.com>
---
 src/metabase/api/collection.clj       | 85 ++++++++++++++++++++-----
 src/metabase/models/collection.clj    | 90 ++++++++++++++++-----------
 test/metabase/api/collection_test.clj | 31 +++++++++
 3 files changed, 153 insertions(+), 53 deletions(-)

diff --git a/src/metabase/api/collection.clj b/src/metabase/api/collection.clj
index 045eb7ce5f2..2625f47c2d5 100644
--- a/src/metabase/api/collection.clj
+++ b/src/metabase/api/collection.clj
@@ -299,13 +299,14 @@
     always-false-hsql-expr
     always-true-hsql-expr))
 
+
 (defmulti ^:private post-process-collection-children
-  {:arglists '([model rows])}
-  (fn [model _]
+  {:arglists '([model collection rows])}
+  (fn [model _ _]
     (keyword model)))
 
 (defmethod ^:private post-process-collection-children :default
-  [_ rows]
+  [_ _ rows]
   rows)
 
 (defmethod collection-children-query :pulse
@@ -327,7 +328,7 @@
       (sql.helpers/where (pinned-state->clause pinned-state :p.collection_position))))
 
 (defmethod post-process-collection-children :pulse
-  [_ rows]
+  [_ _ rows]
   (for [row rows]
     (dissoc row
             :description :display :authority_level :moderated_status :icon :personal_owner_id
@@ -356,14 +357,14 @@
             [:= :archived (boolean archived?)]]})
 
 (defmethod post-process-collection-children :timeline
-  [_ rows]
+  [_ _collection rows]
   (for [row rows]
     (dissoc row
             :description :display :collection_position :authority_level :moderated_status
             :collection_preview :dataset_query :table_id :query_type :is_upload)))
 
 (defmethod post-process-collection-children :snippet
-  [_ rows]
+  [_ _collection rows]
   (for [row rows]
     (dissoc row
             :description :collection_position :display :authority_level
@@ -413,14 +414,14 @@
   (card-query true collection options))
 
 (defmethod post-process-collection-children :dataset
-  [_ rows]
+  [_ collection rows]
   (let [queries-before (map :dataset_query rows)
         queries-parsed (map (comp mbql.normalize/normalize json/parse-string) queries-before)]
     ;; We need to normalize the dataset queries for hydration, but reset the field to avoid leaking that transform.
     (->> (map #(assoc %2 :dataset_query %1) queries-parsed rows)
          upload/model-hydrate-based-on-upload
          (map #(assoc %2 :dataset_query %1) queries-before)
-         (post-process-collection-children :card))))
+         (post-process-collection-children collection :card))))
 
 (defmethod collection-children-query :card
   [_ collection options]
@@ -473,7 +474,7 @@
       (assoc :fully_parameterized (fully-parameterized-query? row))))
 
 (defmethod post-process-collection-children :card
-  [_ rows]
+  [_ _ rows]
   (map post-process-card-row rows))
 
 (defn- dashboard-query [collection {:keys [archived? pinned-state]}]
@@ -500,7 +501,7 @@
   (dashboard-query collection options))
 
 (defmethod post-process-collection-children :dashboard
-  [_ rows]
+  [_ _ rows]
   (map #(dissoc %
                 :display :authority_level :moderated_status :icon :personal_owner_id :collection_preview
                 :dataset_query :table_id :query_type :is_upload)
@@ -540,14 +541,68 @@
   [_ collection options]
   (collection-query collection options))
 
+(defn- annotate-collections
+  [parent-coll colls]
+  (let [visible-collection-ids (collection/permissions-set->visible-collection-ids
+                                @api/*current-user-permissions-set*)
+
+        descendant-collections (collection/descendants-flat parent-coll (collection/visible-collection-ids->honeysql-filter-clause
+                                                                         :id
+                                                                         visible-collection-ids))
+
+        descendant-collection-ids (map u/the-id descendant-collections)
+
+        child-type->coll-id-set
+        (reduce (fn [acc {collection-id :collection_id, card-type :type, :as _card}]
+                  (update acc (if (= (keyword card-type) :model) :dataset :card) conj collection-id))
+                {:dataset #{}
+                 :card    #{}}
+                (mdb.query/reducible-query {:select-distinct [:collection_id :type]
+                                            :from            [:report_card]
+                                            :where           [:and
+                                                              [:= :archived false]
+                                                              [:in :collection_id descendant-collection-ids]]}))
+
+        collections-containing-dashboards
+        (->> (t2/query {:select-distinct [:collection_id]
+                        :from :report_dashboard
+                        :where [:and
+                                [:= :archived false]
+                                [:in :collection_id descendant-collection-ids]]})
+             (map :collection_id)
+             (into #{}))
+
+        ;; the set of collections that contain collections (in terms of *effective* location)
+        collections-containing-collections
+        (->> descendant-collections
+             (reduce (fn [accu {:keys [location] :as _coll}]
+                       (let [effective-location (collection/effective-location-path location visible-collection-ids)
+                             parent-id (collection/location-path->parent-id effective-location)]
+                         (conj accu parent-id)))
+                     #{}))
+
+        child-type->coll-id-set
+        (merge child-type->coll-id-set
+               {:collection collections-containing-collections
+                :dashboard collections-containing-dashboards})
+
+        ;; why are we calling `annotate-collections` on all descendants, when we only need the collections in `colls`
+        ;; to be annotated? Because `annotate-collections` works by looping through the collections it's passed and
+        ;; using them to figure out the ancestors of a given collection. This could use a refactor - probably the
+        ;; caller of `annotate-collections` could be generating both `child-type->parent-ids` and
+        ;; `child-type->ancestor-ids`.
+        coll-id->annotated (m/index-by :id (collection/annotate-collections child-type->coll-id-set descendant-collections))]
+    (for [coll colls]
+      (merge coll (select-keys (coll-id->annotated (:id coll)) [:here :below])))))
+
 (defmethod post-process-collection-children :collection
-  [_ rows]
+  [_ parent-collection rows]
   (letfn [(update-personal-collection [{:keys [personal_owner_id] :as row}]
             (if personal_owner_id
               ;; when fetching root collection, we might have personal collection
               (assoc row :name (collection/user->personal-collection-name (:personal_owner_id row) :user))
               (dissoc row :personal_owner_id)))]
-    (for [row rows]
+    (for [row (annotate-collections parent-collection rows)]
       (-> (t2/hydrate (t2/instance :model/Collection row) :can_write :effective_location)
           (dissoc :collection_position :display :moderated_status :icon
                   :collection_preview :dataset_query :table_id :query_type :is_upload)
@@ -576,13 +631,13 @@
 (defn- post-process-rows
   "Post process any data. Have a chance to process all of the same type at once using
   `post-process-collection-children`. Must respect the order passed in."
-  [rows]
+  [collection rows]
   (->> (map-indexed (fn [i row] (vary-meta row assoc ::index i)) rows) ;; keep db sort order
        (map remove-unwanted-keys)
        (group-by :model)
        (into []
              (comp (map (fn [[model rows]]
-                          (post-process-collection-children (keyword model) rows)))
+                          (post-process-collection-children (keyword model) collection rows)))
                    cat
                    (map coalesce-edit-info)))
        (sort-by (comp ::index meta))))
@@ -728,7 +783,7 @@
                              :limit  mw.offset-paging/*limit*
                              :offset mw.offset-paging/*offset*))
         res         {:total  (->> (mdb.query/query total-query) first :count)
-                     :data   (->> (mdb.query/query limit-query) post-process-rows)
+                     :data   (->> (mdb.query/query limit-query) (post-process-rows collection))
                      :models models}
         limit-res   (assoc res
                            :limit  mw.offset-paging/*limit*
diff --git a/src/metabase/models/collection.clj b/src/metabase/models/collection.clj
index 9d37738b4f6..ca1c452b7f4 100644
--- a/src/metabase/models/collection.clj
+++ b/src/metabase/models/collection.clj
@@ -446,7 +446,27 @@
                             [:children [:set [:ref ::children]]]]]}}
    [:ref ::children]])
 
-(mu/defn ^:private descendants :- [:set Children]
+(mu/defn descendants-flat :- [:sequential CollectionWithLocationAndIDOrRoot]
+  "Return all descendant collections of a `collection`, including children, grandchildren, and so forth."
+  [collection :- CollectionWithLocationAndIDOrRoot, & additional-honeysql-where-clauses]
+  (or
+   (t2/select [:model/Collection :name :id :location :description]
+              {:where (apply
+                       vector
+                       :and
+                       [:like :location (str (children-location collection) "%")]
+                       ;; Only return the Personal Collection belonging to the Current
+                       ;; User, regardless of whether we should actually be allowed to see
+                       ;; it (e.g., admins have perms for all Collections). This is done
+                       ;; to keep the Root Collection View for admins from getting crazily
+                       ;; cluttered with Personal Collections belonging to other users
+                       [:or
+                        [:= :personal_owner_id nil]
+                        [:= :personal_owner_id *current-user-id*]]
+                       additional-honeysql-where-clauses)})
+   []))
+
+(mu/defn descendants :- [:set Children]
   "Return all descendant Collections of a `collection`, including children, grandchildren, and so forth. This is done
   primarily to power the `effective-children` feature below, and thus the descendants are returned in a hierarchy,
   rather than as a flat set. e.g. results will be something like:
@@ -462,21 +482,7 @@
   [collection :- CollectionWithLocationAndIDOrRoot, & additional-honeysql-where-clauses]
   ;; first, fetch all the descendants of the `collection`, and build a map of location -> children. This will be used
   ;; so we can fetch the immediate children of each Collection
-  (let [location->children (group-by :location (t2/select [Collection :name :id :location :description]
-                                                 {:where
-                                                  (apply
-                                                   vector
-                                                   :and
-                                                   [:like :location (str (children-location collection) "%")]
-                                                   ;; Only return the Personal Collection belonging to the Current
-                                                   ;; User, regardless of whether we should actually be allowed to see
-                                                   ;; it (e.g., admins have perms for all Collections). This is done
-                                                   ;; to keep the Root Collection View for admins from getting crazily
-                                                   ;; cluttered with Personal Collections belonging to randos
-                                                   [:or
-                                                    [:= :personal_owner_id nil]
-                                                    [:= :personal_owner_id *current-user-id*]]
-                                                   additional-honeysql-where-clauses)}))
+  (let [location->children (group-by :location (apply descendants-flat collection additional-honeysql-where-clauses))
         ;; Next, build a function to add children to a given `coll`. This function will recursively call itself to add
         ;; children to each child
         add-children       (fn add-children [coll]
@@ -1223,29 +1229,37 @@
                                :allowed-namespaces   allowed-namespaces
                                :collection-namespace collection-namespace})))))))
 
-(defn- annotate-collections
+(defn annotate-collections
   "Annotate collections with `:below` and `:here` keys to indicate which types are in their subtree and which types are
-  in the collection at that level."
-  [{:keys [dataset card] :as _coll-type-ids} collections]
-  (let [parent-info (reduce (fn [m {:keys [location id] :as _collection}]
-                              (let [parent-ids (set (location-path->ids location))]
-                                (cond-> m
-                                  (contains? dataset id)
-                                  (update :dataset set/union parent-ids)
-                                  (contains? card id)
-                                  (update :card set/union parent-ids))))
-                            {:dataset #{} :card #{}}
-                            collections)]
+  in the collection at that level.
+
+  The second argument is the list of collections to annotate.
+
+  The first argument to this function could use a bit of explanation: `child-type->parent-ids` is a map. Keys are
+  object types (e.g. `:collection`), values are sets of collection IDs that are the (direct) parents of one or more
+  objects of that type."
+  [child-type->parent-ids collections]
+  (let [child-type->ancestor-ids
+        (reduce (fn [m {:keys [location id] :as _collection}]
+                  (let [parent-ids (set (location-path->ids location))]
+                    (reduce (fn [m [t id-set]]
+                              (cond-> m
+                                (contains? id-set id) (update t set/union parent-ids)))
+                            m
+                            child-type->parent-ids)))
+                (zipmap (keys child-type->parent-ids) (repeat #{}))
+                collections)]
     (map (fn [{:keys [id] :as collection}]
-           (let [types (cond-> #{}
-                         (contains? (:dataset parent-info) id)
-                         (conj :dataset)
-                         (contains? (:card parent-info) id)
-                         (conj :card))]
+           (let [below (apply set/union
+                              (for [[child-type coll-id-set] child-type->ancestor-ids]
+                                (when (contains? coll-id-set id)
+                                  #{child-type})))
+                 here (into #{} (for [[child-type coll-id-set] child-type->parent-ids
+                                      :when (contains? coll-id-set id)]
+                                  child-type))]
              (cond-> collection
-               (seq types) (assoc :below types)
-               (contains? dataset id) (update :here (fnil conj #{}) :dataset)
-               (contains? card id) (update :here (fnil conj #{}) :card))))
+               (seq below) (assoc :below below)
+               (seq here) (assoc :here here))))
          collections)))
 
 (defn collections->tree
@@ -1266,7 +1280,7 @@
                               :here     #{:card}
                               :children [{:name \"G\"}]}]}]}
      {:name \"H\"}]"
-  [coll-type-ids collections]
+  [child-type->parent-ids collections]
   (let [;; instead of attempting to re-sort like the database does, keep things consistent by just keeping things in
         ;; the same order they're already in.
         original-position (into {} (map-indexed (fn [i {id :id}]
@@ -1308,4 +1322,4 @@
                         ;; coll-type is `nil` or "instance-analytics"
                         ;; nil sorts first, so we get instance-analytics at the end, which is what we want
                         (original-position coll-id))))))
-     (annotate-collections coll-type-ids collections))))
+     (annotate-collections child-type->parent-ids collections))))
diff --git a/test/metabase/api/collection_test.clj b/test/metabase/api/collection_test.clj
index e24518ca322..bc446800a25 100644
--- a/test/metabase/api/collection_test.clj
+++ b/test/metabase/api/collection_test.clj
@@ -966,6 +966,37 @@
           (is (= #{"card" "dash" "subcollection" "dataset"}
                  (into #{} (map :name) items))))))))
 
+(deftest collection-items-include-here-and-below-test
+  (testing "GET /api/collection/:id/items"
+    (t2.with-temp/with-temp [:model/Collection {id1 :id} {:name "Collection with Items"}
+                             :model/Collection {id2 :id} {:name "subcollection"
+                                                                       :location (format "/%d/" id1)}]
+      (let [item #(first (:data (mt/user-http-request :rasta :get 200 (format "collection/%d/items" id1))))]
+        (testing "the item has nothing in or below it"
+          (is (nil? (:here (item))))
+          (is (nil? (:below (item)))))
+        (t2.with-temp/with-temp [:model/Collection {id3 :id} {:location (format "/%d/%d/" id1 id2)}]
+          (testing "now the item has a collection in it"
+            (is (= ["collection"] (:here (item)))))
+          (testing "but nothing :below"
+            (is (nil? (:below (item)))))
+          (t2.with-temp/with-temp [:model/Collection _ {:location (format "/%d/%d/%d/" id1 id2 id3)}]
+            (testing "the item still has a collection in it"
+              (is (= ["collection"] (:here (item)))))
+            (testing "the item now has a collection below it"
+              (is (= ["collection"] (:below (item))))))
+          (t2.with-temp/with-temp [:model/Card _ {:name "card" :collection_id id2}
+                                   :model/Card _ {:name "dataset" :type :model :collection_id id2}]
+            (testing "when the item has a card/dataset, that's reflected in `here` too"
+              (is (= #{"collection" "card" "dataset"} (set (:here (item)))))
+              (is (nil? (:below (item)))))
+            (t2.with-temp/with-temp [:model/Card _ {:name "card" :collection_id id3}]
+              (testing "when the item contains a collection that contains a card, that's `below`"
+                (is (= #{"card"} (set (:below (item))))))))
+          (t2.with-temp/with-temp [:model/Dashboard _ {:collection_id id2}]
+            (testing "when the item has a dashboard, that's reflected in `here` too"
+              (is (= #{"collection" "dashboard"} (set (:here (item))))))))))))
+
 (deftest children-sort-clause-test
   ;; we always place "special" collection types (i.e. "Metabase Analytics") last
   (testing "Default sort"
-- 
GitLab