From 9c61561d42cebed31608e38fc9f8023ab548327d Mon Sep 17 00:00:00 2001 From: Braden Shepherdson <Braden.Shepherdson@gmail.com> Date: Wed, 31 Aug 2022 20:45:00 -0400 Subject: [PATCH] Add `serdes-descendants` for "containment" to serialize a subtree (#25017) * Add `serdes-descendants` for "containment" to serialize a subtree This allows naming eg. a Collection and will recursively serialize: all dashboards, cards and dashcards it contains directly, plus all child collections and everything they contain. Currently this words on Collection, Dashboard, DashboardCard, and Card. * lint * Switch to plural `extract-subtrees` with a list of `targets` --- .../serialization/v2/extract.clj | 32 ++- .../serialization/v2/extract_test.clj | 189 ++++++++++++++++++ src/metabase/models/card.clj | 7 + src/metabase/models/collection.clj | 10 + src/metabase/models/dashboard.clj | 5 + src/metabase/models/dashboard_card.clj | 5 + src/metabase/models/serialization/base.clj | 45 ++++- test/metabase/models/card_test.clj | 14 ++ 8 files changed, 305 insertions(+), 2 deletions(-) diff --git a/enterprise/backend/src/metabase_enterprise/serialization/v2/extract.clj b/enterprise/backend/src/metabase_enterprise/serialization/v2/extract.clj index 0e9cf2239c4..f5b62e26fa6 100644 --- a/enterprise/backend/src/metabase_enterprise/serialization/v2/extract.clj +++ b/enterprise/backend/src/metabase_enterprise/serialization/v2/extract.clj @@ -2,7 +2,9 @@ "Extraction is the first step in serializing a Metabase appdb so it can be eg. written to disk. See the detailed descriptions of the (de)serialization processes in [[metabase.models.serialization.base]]." - (:require [metabase-enterprise.serialization.v2.models :as serdes.models] + (:require [clojure.set :as set] + [medley.core :as m] + [metabase-enterprise.serialization.v2.models :as serdes.models] [metabase.models.serialization.base :as serdes.base])) (defn extract-metabase @@ -20,3 +22,31 @@ (eduction cat (for [model serdes.models/exported-models :when (model-pred model)] (serdes.base/extract-all model opts))))) + +(defn- descendants-closure [targets] + (loop [to-chase (set targets) + chased #{}] + (let [[m i :as item] (first to-chase) + desc (serdes.base/serdes-descendants m i) + chased (conj chased item) + to-chase (set/union (disj to-chase item) (set/difference desc chased))] + (if (empty? to-chase) + chased + (recur to-chase chased))))) + +(defn extract-subtrees + "Extracts the targeted entities and all their descendants into a reducible stream of extracted maps. + + The targeted entities are specified as a list of `[\"SomeModel\" database-id]` pairs. + + [[serdes.base/serdes-descendants]] is recursively called on these entities and all their descendants, until the + complete transitive closure of all descendants is found. This produces a set of `[\"ModelName\" id]` pairs, which + entities are then extracted the same way as [[extract-metabase]]." + [{:keys [targets] :as opts}] + (let [closure (descendants-closure targets) + by-model (->> closure + (group-by first) + (m/map-vals #(set (map second %))))] + (eduction cat (for [[model ids] by-model] + (eduction (map #(serdes.base/extract-one model opts %)) + (serdes.base/raw-reducible-query model {:where [:in :id ids]})))))) diff --git a/enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj b/enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj index 8046f0b7154..21ccbebf208 100644 --- a/enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj +++ b/enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj @@ -1,5 +1,6 @@ (ns metabase-enterprise.serialization.v2.extract-test (:require [cheshire.core :as json] + [clojure.set :as set] [clojure.test :refer :all] [metabase-enterprise.serialization.test-util :as ts] [metabase-enterprise.serialization.v2.extract :as extract] @@ -863,3 +864,191 @@ [{:model "Card" :id card1-eid}] [{:model "DashboardCard" :id dashcard-eid}]} (set (serdes.base/serdes-dependencies ser)))))))))) + +(deftest selective-serialization-basic-test + (ts/with-empty-h2-app-db + (ts/with-temp-dpc [User [{mark-id :id} {:first_name "Mark" + :last_name "Knopfler" + :email "mark@direstrai.ts"}] + Collection [{coll1-id :id + coll1-eid :entity_id} {:name "Some Collection"}] + Collection [{coll2-id :id + coll2-eid :entity_id} {:name "Nested Collection" + :location (str "/" coll1-id "/")}] + Collection [{coll3-id :id + coll3-eid :entity_id} {:name "Grandchild Collection" + :location (str "/" coll1-id "/" coll2-id "/")}] + + Database [{db-id :id} {:name "My Database"}] + Table [{no-schema-id :id} {:name "Schemaless Table" :db_id db-id}] + Field [_ {:name "Some Field" :table_id no-schema-id}] + Table [{schema-id :id} {:name "Schema'd Table" + :db_id db-id + :schema "PUBLIC"}] + Field [_ {:name "Other Field" :table_id schema-id}] + + ;; One dashboard and three cards in each of the three collections: + ;; Two cards contained in the dashboard and one freestanding. + Dashboard [{dash1-id :id + dash1-eid :entity_id} {:name "Dashboard 1" + :collection_id coll1-id + :creator_id mark-id}] + Card [{c1-1-id :id + c1-1-eid :entity_id} {:name "Question 1-1" + :database_id db-id + :table_id no-schema-id + :collection_id coll1-id + :creator_id mark-id}] + Card [{c1-2-id :id + c1-2-eid :entity_id} {:name "Question 1-2" + :database_id db-id + :table_id schema-id + :collection_id coll1-id + :creator_id mark-id}] + Card [{c1-3-eid :entity_id} {:name "Question 1-3" + :database_id db-id + :table_id schema-id + :collection_id coll1-id + :creator_id mark-id}] + + DashboardCard [{dc1-1-eid :entity_id} {:card_id c1-1-id + :dashboard_id dash1-id}] + DashboardCard [{dc1-2-eid :entity_id} {:card_id c1-2-id + :dashboard_id dash1-id}] + + ;; Second dashboard, in the middle collection. + Dashboard [{dash2-id :id + dash2-eid :entity_id} {:name "Dashboard 2" + :collection_id coll2-id + :creator_id mark-id}] + Card [{c2-1-id :id + c2-1-eid :entity_id} {:name "Question 2-1" + :database_id db-id + :table_id no-schema-id + :collection_id coll2-id + :creator_id mark-id}] + Card [{c2-2-id :id + c2-2-eid :entity_id} {:name "Question 2-2" + :database_id db-id + :table_id schema-id + :collection_id coll2-id + :creator_id mark-id}] + Card [{c2-3-eid :entity_id} {:name "Question 2-3" + :database_id db-id + :table_id schema-id + :collection_id coll2-id + :creator_id mark-id}] + + DashboardCard [{dc2-1-eid :entity_id} {:card_id c2-1-id + :dashboard_id dash2-id}] + DashboardCard [{dc2-2-eid :entity_id} {:card_id c2-2-id + :dashboard_id dash2-id}] + + ;; Third dashboard, in the grandchild collection. + Dashboard [{dash3-id :id + dash3-eid :entity_id} {:name "Dashboard 3" + :collection_id coll3-id + :creator_id mark-id}] + Card [{c3-1-id :id + c3-1-eid :entity_id} {:name "Question 3-1" + :database_id db-id + :table_id no-schema-id + :collection_id coll3-id + :creator_id mark-id}] + Card [{c3-2-id :id + c3-2-eid :entity_id} {:name "Question 3-2" + :database_id db-id + :table_id schema-id + :collection_id coll3-id + :creator_id mark-id}] + Card [{c3-3-eid :entity_id} {:name "Question 3-3" + :database_id db-id + :table_id schema-id + :collection_id coll3-id + :creator_id mark-id}] + + DashboardCard [{dc3-1-eid :entity_id} {:card_id c3-1-id + :dashboard_id dash3-id}] + DashboardCard [{dc3-2-eid :entity_id} {:card_id c3-2-id + :dashboard_id dash3-id}]] + + (testing "selecting a dashboard gets its dashcards and cards as well" + (testing "grandparent dashboard" + (is (= #{[{:model "Dashboard" :id dash1-eid}] + [{:model "Dashboard" :id dash1-eid} + {:model "DashboardCard" :id dc1-1-eid}] + [{:model "Dashboard" :id dash1-eid} + {:model "DashboardCard" :id dc1-2-eid}] + [{:model "Card" :id c1-1-eid}] + [{:model "Card" :id c1-2-eid}]} + (->> (extract/extract-subtrees {:targets [["Dashboard" dash1-id]]}) + (map serdes.base/serdes-path) + set)))) + + (testing "middle dashboard" + (is (= #{[{:model "Dashboard" :id dash2-eid}] + [{:model "Dashboard" :id dash2-eid} + {:model "DashboardCard" :id dc2-1-eid}] + [{:model "Dashboard" :id dash2-eid} + {:model "DashboardCard" :id dc2-2-eid}] + [{:model "Card" :id c2-1-eid}] + [{:model "Card" :id c2-2-eid}]} + (->> (extract/extract-subtrees {:targets [["Dashboard" dash2-id]]}) + (map serdes.base/serdes-path) + set)))) + + (testing "grandchild dashboard" + (is (= #{[{:model "Dashboard" :id dash3-eid}] + [{:model "Dashboard" :id dash3-eid} + {:model "DashboardCard" :id dc3-1-eid}] + [{:model "Dashboard" :id dash3-eid} + {:model "DashboardCard" :id dc3-2-eid}] + [{:model "Card" :id c3-1-eid}] + [{:model "Card" :id c3-2-eid}]} + (->> (extract/extract-subtrees {:targets [["Dashboard" dash3-id]]}) + (map serdes.base/serdes-path) + set))))) + + (testing "selecting a collection gets all its contents" + (let [grandchild-paths #{[{:model "Collection" :id coll3-eid :label "grandchild_collection"}] + [{:model "Dashboard" :id dash3-eid}] + [{:model "Dashboard" :id dash3-eid} + {:model "DashboardCard" :id dc3-1-eid}] + [{:model "Dashboard" :id dash3-eid} + {:model "DashboardCard" :id dc3-2-eid}] + [{:model "Card" :id c3-1-eid}] + [{:model "Card" :id c3-2-eid}] + [{:model "Card" :id c3-3-eid}]} + middle-paths #{[{:model "Collection" :id coll2-eid :label "nested_collection"}] + [{:model "Dashboard" :id dash2-eid}] + [{:model "Dashboard" :id dash2-eid} + {:model "DashboardCard" :id dc2-1-eid}] + [{:model "Dashboard" :id dash2-eid} + {:model "DashboardCard" :id dc2-2-eid}] + [{:model "Card" :id c2-1-eid}] + [{:model "Card" :id c2-2-eid}] + [{:model "Card" :id c2-3-eid}]} + grandparent-paths #{[{:model "Collection" :id coll1-eid :label "some_collection"}] + [{:model "Dashboard" :id dash1-eid}] + [{:model "Dashboard" :id dash1-eid} + {:model "DashboardCard" :id dc1-1-eid}] + [{:model "Dashboard" :id dash1-eid} + {:model "DashboardCard" :id dc1-2-eid}] + [{:model "Card" :id c1-1-eid}] + [{:model "Card" :id c1-2-eid}] + [{:model "Card" :id c1-3-eid}]}] + (testing "grandchild collection has all its own contents" + (is (= grandchild-paths ; Includes the third card not found in the collection + (->> (extract/extract-subtrees {:targets [["Collection" coll3-id]]}) + (map serdes.base/serdes-path) + set)))) + (testing "middle collection has all its own plus the grandchild and its contents" + (is (= (set/union middle-paths grandchild-paths) + (->> (extract/extract-subtrees {:targets [["Collection" coll2-id]]}) + (map serdes.base/serdes-path) + set)))) + (testing "grandparent collection has all its own plus the grandchild and middle collections with contents" + (is (= (set/union grandparent-paths middle-paths grandchild-paths) + (->> (extract/extract-subtrees {:targets [["Collection" coll1-id]]}) + (map serdes.base/serdes-path) + set))))))))) diff --git a/src/metabase/models/card.clj b/src/metabase/models/card.clj index 6660ec5998c..ee67251c195 100644 --- a/src/metabase/models/card.clj +++ b/src/metabase/models/card.clj @@ -383,3 +383,10 @@ (set/union (serdes.util/mbql-deps dataset_query)) (set/union (serdes.util/visualization-settings-deps visualization_settings)) vec)) + +(defmethod serdes.base/serdes-descendants "Card" [_model-name id] + (let [card (db/select-one Card :id id) + source-table (some-> card :dataset_query :query :source-table)] + (when (and (string? source-table) + (.startsWith ^String source-table "card__")) + #{["Card" (Integer/parseInt (.substring ^String source-table 6))]}))) diff --git a/src/metabase/models/collection.clj b/src/metabase/models/collection.clj index 38507db7c81..52cc250ad6a 100644 --- a/src/metabase/models/collection.clj +++ b/src/metabase/models/collection.clj @@ -964,6 +964,16 @@ [(cond-> (serdes.base/infer-self-path "Collection" coll) slug (assoc :label slug))]) +(defmethod serdes.base/serdes-descendants "Collection" [_model-name id] + (let [location (db/select-one-field :location Collection :id id) + child-colls (set (for [child-id (db/select-ids Collection {:where [:like :location (str location id "/%")]})] + ["Collection" child-id])) + dashboards (set (for [dash-id (db/select-ids 'Dashboard :collection_id id)] + ["Dashboard" dash-id])) + cards (set (for [card-id (db/select-ids 'Card :collection_id id)] + ["Card" card-id]))] + (set/union child-colls dashboards cards))) + ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | Perms Checking Helper Fns | ;;; +----------------------------------------------------------------------------------------------------------------+ diff --git a/src/metabase/models/dashboard.clj b/src/metabase/models/dashboard.clj index b262a651041..b26249181a5 100644 --- a/src/metabase/models/dashboard.clj +++ b/src/metabase/models/dashboard.clj @@ -446,3 +446,8 @@ (defmethod serdes.base/serdes-dependencies "Dashboard" [{:keys [collection_id]}] [[{:model "Collection" :id collection_id}]]) + +(defmethod serdes.base/serdes-descendants "Dashboard" [_model-name id] + ;; Return the set of DashboardCards that belong to this dashboard. + (set (for [dc-id (db/select-ids 'DashboardCard :dashboard_id id)] + ["DashboardCard" dc-id]))) diff --git a/src/metabase/models/dashboard_card.clj b/src/metabase/models/dashboard_card.clj index fdf6274c78e..6528412d8a3 100644 --- a/src/metabase/models/dashboard_card.clj +++ b/src/metabase/models/dashboard_card.clj @@ -255,3 +255,8 @@ (update :dashboard_id serdes.util/import-fk 'Dashboard) (update :parameter_mappings serdes.util/import-parameter-mappings) (update :visualization_settings serdes.util/import-visualization-settings))) + +(defmethod serdes.base/serdes-descendants "DashboardCard" [_model-name id] + (let [{:keys [card_id dashboard_id]} (db/select-one DashboardCard :id id)] + #{["Card" card_id] + ["Dashboard" dashboard_id]})) diff --git a/src/metabase/models/serialization/base.clj b/src/metabase/models/serialization/base.clj index aff416a17fe..0c95b2e1083 100644 --- a/src/metabase/models/serialization/base.clj +++ b/src/metabase/models/serialization/base.clj @@ -135,7 +135,17 @@ ;;; Storage: ;;; The storage system might transform that stream in some arbitrary way. Storage is a dead end - it should perform side ;;; effects like writing to the disk or network, and return nothing. - +;;; +;;; Selective Serialization: +;;; Sometimes we want to export a "subtree" instead of the complete appdb. At the simplest, we might serialize a single +;;; question. Moving up, it might be a Dashboard and all its questions, or a Collection and all its content Cards and +;;; Dashboards. +;;; There's a relation to be captured here: the *descendants* of an entity are the ones it semantically "contains" (or +;;; those it needs in order to be executed, such as when questions depend on each other, or NativeQuerySnippets are +;;; referenced by a SQL question. +;;; +;;; (serdes-descendants entity) returns a set of such descendants for the given entity (in its exported form); see that +;;; multimethod for more details. (defmulti extract-all "Entry point for extracting all entities of a particular model: `(extract-all \"ModelName\" {opts...})` @@ -418,6 +428,39 @@ (load-insert! model adjusted) (load-update! model adjusted (db/select-one (symbol model) pkey maybe-local-id))))) +(defmulti serdes-descendants + "Captures the notion that eg. a dashboard \"contains\" its cards. + Returns a set, possibly empty or nil, of `[model-name database-id]` pairs for all entities that this entity contains + or requires to be executed. + Dispatches on the model name. + + For example: + - a `Collection` contains 0 or more other `Collection`s plus many `Card`s and `Dashboard`s; + - a `Dashboard` contains its `DashboardCard`s; + - each `DashboardCard` contains its `Card`; + - a `Card` might stand alone, or it might require `NativeQuerySnippet`s or other `Card`s as inputs; and + - a `NativeQuerySnippet` similarly might derive from others; + + A transitive closure over [[serdes-descendants]] should thus give a complete \"subtree\", such as a complete + `Collection` and all its contents. + + A typical implementation will run a query or two to collect eg. all `DashboardCard`s that are part of this + `Dashboard`, and return them as pairs like `[\"DashboardCard\" 17]`. + + What about [[serdes-dependencies]]? + Despite the similar-sounding names, this differs crucially from [[serdes-dependencies]]. [[serdes-descendants]] finds + all entities that are \"part\" of the given entity. + + [[serdes-dependencies]] finds all entities that need to be loaded into appdb before this one can be, generally because + this has a foreign key to them. The arrow \"points the other way\": [[serdes-dependencies]] points *up* -- from a + `Dashboard` to its containing `Collection`, `Collection` to its parent, from a `DashboardCard` to its `Dashboard` and + `Card`. [[serdes-descendants]] points *down* to contents, children, and components." + {:arglists '([model-name db-id])} + (fn [model-name _] model-name)) + +(defmethod serdes-descendants :default [_ _] + nil) + (defn entity-id? "Checks if the given string is a 21-character NanoID. Useful for telling entity IDs apart from identity hashes." [id-str] diff --git a/test/metabase/models/card_test.clj b/test/metabase/models/card_test.clj index 72b87f06a30..4e8735971a8 100644 --- a/test/metabase/models/card_test.clj +++ b/test/metabase/models/card_test.clj @@ -3,6 +3,7 @@ [clojure.test :refer :all] [metabase.models :refer [Action Card Collection Dashboard DashboardCard QueryAction]] [metabase.models.card :as card] + [metabase.models.serialization.base :as serdes.base] [metabase.models.serialization.hash :as serdes.hash] [metabase.query-processor :as qp] [metabase.test :as mt] @@ -361,3 +362,16 @@ (is (= "ead6cc05" (serdes.hash/raw-hash ["the card" (serdes.hash/identity-hash coll)]) (serdes.hash/identity-hash card)))))) + +(deftest serdes-descendants-test + (testing "regular cards don't depend on anything" + (mt/with-temp* [Card [card {:name "some card"}]] + (is (empty? (serdes.base/serdes-descendants "Card" (:id card)))))) + + (testing "cards which have another card as the source depend on that card" + (mt/with-temp* [Card [card1 {:name "base card"}] + Card [card2 {:name "derived card" + :dataset_query {:query {:source-table (str "card__" (:id card1))}}}]] + (is (empty? (serdes.base/serdes-descendants "Card" (:id card1)))) + (is (= #{["Card" (:id card1)]} + (serdes.base/serdes-descendants "Card" (:id card2))))))) -- GitLab