Skip to content
Snippets Groups Projects
Unverified Commit 9c61561d authored by Braden Shepherdson's avatar Braden Shepherdson Committed by GitHub
Browse files

Add `serdes-descendants` for "containment" to serialize a subtree (#25017)

* Add `serdes-descendants` for "containment" to serialize a subtree

This allows naming eg. a Collection and will recursively serialize: all
dashboards, cards and dashcards it contains directly, plus all child
collections and everything they contain.

Currently this words on Collection, Dashboard, DashboardCard, and Card.

* lint

* Switch to plural `extract-subtrees` with a list of `targets`
parent 94f43d5b
Branches
Tags
No related merge requests found
......@@ -2,7 +2,9 @@
"Extraction is the first step in serializing a Metabase appdb so it can be eg. written to disk.
See the detailed descriptions of the (de)serialization processes in [[metabase.models.serialization.base]]."
(:require [metabase-enterprise.serialization.v2.models :as serdes.models]
(:require [clojure.set :as set]
[medley.core :as m]
[metabase-enterprise.serialization.v2.models :as serdes.models]
[metabase.models.serialization.base :as serdes.base]))
(defn extract-metabase
......@@ -20,3 +22,31 @@
(eduction cat (for [model serdes.models/exported-models
:when (model-pred model)]
(serdes.base/extract-all model opts)))))
(defn- descendants-closure [targets]
(loop [to-chase (set targets)
chased #{}]
(let [[m i :as item] (first to-chase)
desc (serdes.base/serdes-descendants m i)
chased (conj chased item)
to-chase (set/union (disj to-chase item) (set/difference desc chased))]
(if (empty? to-chase)
chased
(recur to-chase chased)))))
(defn extract-subtrees
"Extracts the targeted entities and all their descendants into a reducible stream of extracted maps.
The targeted entities are specified as a list of `[\"SomeModel\" database-id]` pairs.
[[serdes.base/serdes-descendants]] is recursively called on these entities and all their descendants, until the
complete transitive closure of all descendants is found. This produces a set of `[\"ModelName\" id]` pairs, which
entities are then extracted the same way as [[extract-metabase]]."
[{:keys [targets] :as opts}]
(let [closure (descendants-closure targets)
by-model (->> closure
(group-by first)
(m/map-vals #(set (map second %))))]
(eduction cat (for [[model ids] by-model]
(eduction (map #(serdes.base/extract-one model opts %))
(serdes.base/raw-reducible-query model {:where [:in :id ids]}))))))
(ns metabase-enterprise.serialization.v2.extract-test
(:require [cheshire.core :as json]
[clojure.set :as set]
[clojure.test :refer :all]
[metabase-enterprise.serialization.test-util :as ts]
[metabase-enterprise.serialization.v2.extract :as extract]
......@@ -863,3 +864,191 @@
[{:model "Card" :id card1-eid}]
[{:model "DashboardCard" :id dashcard-eid}]}
(set (serdes.base/serdes-dependencies ser))))))))))
(deftest selective-serialization-basic-test
(ts/with-empty-h2-app-db
(ts/with-temp-dpc [User [{mark-id :id} {:first_name "Mark"
:last_name "Knopfler"
:email "mark@direstrai.ts"}]
Collection [{coll1-id :id
coll1-eid :entity_id} {:name "Some Collection"}]
Collection [{coll2-id :id
coll2-eid :entity_id} {:name "Nested Collection"
:location (str "/" coll1-id "/")}]
Collection [{coll3-id :id
coll3-eid :entity_id} {:name "Grandchild Collection"
:location (str "/" coll1-id "/" coll2-id "/")}]
Database [{db-id :id} {:name "My Database"}]
Table [{no-schema-id :id} {:name "Schemaless Table" :db_id db-id}]
Field [_ {:name "Some Field" :table_id no-schema-id}]
Table [{schema-id :id} {:name "Schema'd Table"
:db_id db-id
:schema "PUBLIC"}]
Field [_ {:name "Other Field" :table_id schema-id}]
;; One dashboard and three cards in each of the three collections:
;; Two cards contained in the dashboard and one freestanding.
Dashboard [{dash1-id :id
dash1-eid :entity_id} {:name "Dashboard 1"
:collection_id coll1-id
:creator_id mark-id}]
Card [{c1-1-id :id
c1-1-eid :entity_id} {:name "Question 1-1"
:database_id db-id
:table_id no-schema-id
:collection_id coll1-id
:creator_id mark-id}]
Card [{c1-2-id :id
c1-2-eid :entity_id} {:name "Question 1-2"
:database_id db-id
:table_id schema-id
:collection_id coll1-id
:creator_id mark-id}]
Card [{c1-3-eid :entity_id} {:name "Question 1-3"
:database_id db-id
:table_id schema-id
:collection_id coll1-id
:creator_id mark-id}]
DashboardCard [{dc1-1-eid :entity_id} {:card_id c1-1-id
:dashboard_id dash1-id}]
DashboardCard [{dc1-2-eid :entity_id} {:card_id c1-2-id
:dashboard_id dash1-id}]
;; Second dashboard, in the middle collection.
Dashboard [{dash2-id :id
dash2-eid :entity_id} {:name "Dashboard 2"
:collection_id coll2-id
:creator_id mark-id}]
Card [{c2-1-id :id
c2-1-eid :entity_id} {:name "Question 2-1"
:database_id db-id
:table_id no-schema-id
:collection_id coll2-id
:creator_id mark-id}]
Card [{c2-2-id :id
c2-2-eid :entity_id} {:name "Question 2-2"
:database_id db-id
:table_id schema-id
:collection_id coll2-id
:creator_id mark-id}]
Card [{c2-3-eid :entity_id} {:name "Question 2-3"
:database_id db-id
:table_id schema-id
:collection_id coll2-id
:creator_id mark-id}]
DashboardCard [{dc2-1-eid :entity_id} {:card_id c2-1-id
:dashboard_id dash2-id}]
DashboardCard [{dc2-2-eid :entity_id} {:card_id c2-2-id
:dashboard_id dash2-id}]
;; Third dashboard, in the grandchild collection.
Dashboard [{dash3-id :id
dash3-eid :entity_id} {:name "Dashboard 3"
:collection_id coll3-id
:creator_id mark-id}]
Card [{c3-1-id :id
c3-1-eid :entity_id} {:name "Question 3-1"
:database_id db-id
:table_id no-schema-id
:collection_id coll3-id
:creator_id mark-id}]
Card [{c3-2-id :id
c3-2-eid :entity_id} {:name "Question 3-2"
:database_id db-id
:table_id schema-id
:collection_id coll3-id
:creator_id mark-id}]
Card [{c3-3-eid :entity_id} {:name "Question 3-3"
:database_id db-id
:table_id schema-id
:collection_id coll3-id
:creator_id mark-id}]
DashboardCard [{dc3-1-eid :entity_id} {:card_id c3-1-id
:dashboard_id dash3-id}]
DashboardCard [{dc3-2-eid :entity_id} {:card_id c3-2-id
:dashboard_id dash3-id}]]
(testing "selecting a dashboard gets its dashcards and cards as well"
(testing "grandparent dashboard"
(is (= #{[{:model "Dashboard" :id dash1-eid}]
[{:model "Dashboard" :id dash1-eid}
{:model "DashboardCard" :id dc1-1-eid}]
[{:model "Dashboard" :id dash1-eid}
{:model "DashboardCard" :id dc1-2-eid}]
[{:model "Card" :id c1-1-eid}]
[{:model "Card" :id c1-2-eid}]}
(->> (extract/extract-subtrees {:targets [["Dashboard" dash1-id]]})
(map serdes.base/serdes-path)
set))))
(testing "middle dashboard"
(is (= #{[{:model "Dashboard" :id dash2-eid}]
[{:model "Dashboard" :id dash2-eid}
{:model "DashboardCard" :id dc2-1-eid}]
[{:model "Dashboard" :id dash2-eid}
{:model "DashboardCard" :id dc2-2-eid}]
[{:model "Card" :id c2-1-eid}]
[{:model "Card" :id c2-2-eid}]}
(->> (extract/extract-subtrees {:targets [["Dashboard" dash2-id]]})
(map serdes.base/serdes-path)
set))))
(testing "grandchild dashboard"
(is (= #{[{:model "Dashboard" :id dash3-eid}]
[{:model "Dashboard" :id dash3-eid}
{:model "DashboardCard" :id dc3-1-eid}]
[{:model "Dashboard" :id dash3-eid}
{:model "DashboardCard" :id dc3-2-eid}]
[{:model "Card" :id c3-1-eid}]
[{:model "Card" :id c3-2-eid}]}
(->> (extract/extract-subtrees {:targets [["Dashboard" dash3-id]]})
(map serdes.base/serdes-path)
set)))))
(testing "selecting a collection gets all its contents"
(let [grandchild-paths #{[{:model "Collection" :id coll3-eid :label "grandchild_collection"}]
[{:model "Dashboard" :id dash3-eid}]
[{:model "Dashboard" :id dash3-eid}
{:model "DashboardCard" :id dc3-1-eid}]
[{:model "Dashboard" :id dash3-eid}
{:model "DashboardCard" :id dc3-2-eid}]
[{:model "Card" :id c3-1-eid}]
[{:model "Card" :id c3-2-eid}]
[{:model "Card" :id c3-3-eid}]}
middle-paths #{[{:model "Collection" :id coll2-eid :label "nested_collection"}]
[{:model "Dashboard" :id dash2-eid}]
[{:model "Dashboard" :id dash2-eid}
{:model "DashboardCard" :id dc2-1-eid}]
[{:model "Dashboard" :id dash2-eid}
{:model "DashboardCard" :id dc2-2-eid}]
[{:model "Card" :id c2-1-eid}]
[{:model "Card" :id c2-2-eid}]
[{:model "Card" :id c2-3-eid}]}
grandparent-paths #{[{:model "Collection" :id coll1-eid :label "some_collection"}]
[{:model "Dashboard" :id dash1-eid}]
[{:model "Dashboard" :id dash1-eid}
{:model "DashboardCard" :id dc1-1-eid}]
[{:model "Dashboard" :id dash1-eid}
{:model "DashboardCard" :id dc1-2-eid}]
[{:model "Card" :id c1-1-eid}]
[{:model "Card" :id c1-2-eid}]
[{:model "Card" :id c1-3-eid}]}]
(testing "grandchild collection has all its own contents"
(is (= grandchild-paths ; Includes the third card not found in the collection
(->> (extract/extract-subtrees {:targets [["Collection" coll3-id]]})
(map serdes.base/serdes-path)
set))))
(testing "middle collection has all its own plus the grandchild and its contents"
(is (= (set/union middle-paths grandchild-paths)
(->> (extract/extract-subtrees {:targets [["Collection" coll2-id]]})
(map serdes.base/serdes-path)
set))))
(testing "grandparent collection has all its own plus the grandchild and middle collections with contents"
(is (= (set/union grandparent-paths middle-paths grandchild-paths)
(->> (extract/extract-subtrees {:targets [["Collection" coll1-id]]})
(map serdes.base/serdes-path)
set)))))))))
......@@ -383,3 +383,10 @@
(set/union (serdes.util/mbql-deps dataset_query))
(set/union (serdes.util/visualization-settings-deps visualization_settings))
vec))
(defmethod serdes.base/serdes-descendants "Card" [_model-name id]
(let [card (db/select-one Card :id id)
source-table (some-> card :dataset_query :query :source-table)]
(when (and (string? source-table)
(.startsWith ^String source-table "card__"))
#{["Card" (Integer/parseInt (.substring ^String source-table 6))]})))
......@@ -964,6 +964,16 @@
[(cond-> (serdes.base/infer-self-path "Collection" coll)
slug (assoc :label slug))])
(defmethod serdes.base/serdes-descendants "Collection" [_model-name id]
(let [location (db/select-one-field :location Collection :id id)
child-colls (set (for [child-id (db/select-ids Collection {:where [:like :location (str location id "/%")]})]
["Collection" child-id]))
dashboards (set (for [dash-id (db/select-ids 'Dashboard :collection_id id)]
["Dashboard" dash-id]))
cards (set (for [card-id (db/select-ids 'Card :collection_id id)]
["Card" card-id]))]
(set/union child-colls dashboards cards)))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | Perms Checking Helper Fns |
;;; +----------------------------------------------------------------------------------------------------------------+
......
......@@ -446,3 +446,8 @@
(defmethod serdes.base/serdes-dependencies "Dashboard"
[{:keys [collection_id]}]
[[{:model "Collection" :id collection_id}]])
(defmethod serdes.base/serdes-descendants "Dashboard" [_model-name id]
;; Return the set of DashboardCards that belong to this dashboard.
(set (for [dc-id (db/select-ids 'DashboardCard :dashboard_id id)]
["DashboardCard" dc-id])))
......@@ -255,3 +255,8 @@
(update :dashboard_id serdes.util/import-fk 'Dashboard)
(update :parameter_mappings serdes.util/import-parameter-mappings)
(update :visualization_settings serdes.util/import-visualization-settings)))
(defmethod serdes.base/serdes-descendants "DashboardCard" [_model-name id]
(let [{:keys [card_id dashboard_id]} (db/select-one DashboardCard :id id)]
#{["Card" card_id]
["Dashboard" dashboard_id]}))
......@@ -135,7 +135,17 @@
;;; Storage:
;;; The storage system might transform that stream in some arbitrary way. Storage is a dead end - it should perform side
;;; effects like writing to the disk or network, and return nothing.
;;;
;;; Selective Serialization:
;;; Sometimes we want to export a "subtree" instead of the complete appdb. At the simplest, we might serialize a single
;;; question. Moving up, it might be a Dashboard and all its questions, or a Collection and all its content Cards and
;;; Dashboards.
;;; There's a relation to be captured here: the *descendants* of an entity are the ones it semantically "contains" (or
;;; those it needs in order to be executed, such as when questions depend on each other, or NativeQuerySnippets are
;;; referenced by a SQL question.
;;;
;;; (serdes-descendants entity) returns a set of such descendants for the given entity (in its exported form); see that
;;; multimethod for more details.
(defmulti extract-all
"Entry point for extracting all entities of a particular model:
`(extract-all \"ModelName\" {opts...})`
......@@ -418,6 +428,39 @@
(load-insert! model adjusted)
(load-update! model adjusted (db/select-one (symbol model) pkey maybe-local-id)))))
(defmulti serdes-descendants
"Captures the notion that eg. a dashboard \"contains\" its cards.
Returns a set, possibly empty or nil, of `[model-name database-id]` pairs for all entities that this entity contains
or requires to be executed.
Dispatches on the model name.
For example:
- a `Collection` contains 0 or more other `Collection`s plus many `Card`s and `Dashboard`s;
- a `Dashboard` contains its `DashboardCard`s;
- each `DashboardCard` contains its `Card`;
- a `Card` might stand alone, or it might require `NativeQuerySnippet`s or other `Card`s as inputs; and
- a `NativeQuerySnippet` similarly might derive from others;
A transitive closure over [[serdes-descendants]] should thus give a complete \"subtree\", such as a complete
`Collection` and all its contents.
A typical implementation will run a query or two to collect eg. all `DashboardCard`s that are part of this
`Dashboard`, and return them as pairs like `[\"DashboardCard\" 17]`.
What about [[serdes-dependencies]]?
Despite the similar-sounding names, this differs crucially from [[serdes-dependencies]]. [[serdes-descendants]] finds
all entities that are \"part\" of the given entity.
[[serdes-dependencies]] finds all entities that need to be loaded into appdb before this one can be, generally because
this has a foreign key to them. The arrow \"points the other way\": [[serdes-dependencies]] points *up* -- from a
`Dashboard` to its containing `Collection`, `Collection` to its parent, from a `DashboardCard` to its `Dashboard` and
`Card`. [[serdes-descendants]] points *down* to contents, children, and components."
{:arglists '([model-name db-id])}
(fn [model-name _] model-name))
(defmethod serdes-descendants :default [_ _]
nil)
(defn entity-id?
"Checks if the given string is a 21-character NanoID. Useful for telling entity IDs apart from identity hashes."
[id-str]
......
......@@ -3,6 +3,7 @@
[clojure.test :refer :all]
[metabase.models :refer [Action Card Collection Dashboard DashboardCard QueryAction]]
[metabase.models.card :as card]
[metabase.models.serialization.base :as serdes.base]
[metabase.models.serialization.hash :as serdes.hash]
[metabase.query-processor :as qp]
[metabase.test :as mt]
......@@ -361,3 +362,16 @@
(is (= "ead6cc05"
(serdes.hash/raw-hash ["the card" (serdes.hash/identity-hash coll)])
(serdes.hash/identity-hash card))))))
(deftest serdes-descendants-test
(testing "regular cards don't depend on anything"
(mt/with-temp* [Card [card {:name "some card"}]]
(is (empty? (serdes.base/serdes-descendants "Card" (:id card))))))
(testing "cards which have another card as the source depend on that card"
(mt/with-temp* [Card [card1 {:name "base card"}]
Card [card2 {:name "derived card"
:dataset_query {:query {:source-table (str "card__" (:id card1))}}}]]
(is (empty? (serdes.base/serdes-descendants "Card" (:id card1))))
(is (= #{["Card" (:id card1)]}
(serdes.base/serdes-descendants "Card" (:id card2)))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment