From 2eb89b4d47a31ae5eb5725d4cca263dcd2555773 Mon Sep 17 00:00:00 2001 From: Braden Shepherdson <Braden.Shepherdson@gmail.com> Date: Thu, 23 Jun 2022 12:30:44 -0400 Subject: [PATCH] Foundation for v2 serialization and deserialization (#23204) This supports serialization of only Collections and Settings so far, but it demonstrates the design of the new serialization system. `metabase.models.serialization.base` defines the multimethods, which are to be implemented by all the exported models eventually. The actual serialization code that drives the larger process is in `metabase_enterprise.serialization.v2.extract` and `.merge`, since serialization is an enterprise feature. The design calls for two matching phases on each side: - Serialization is extract + store; - Deserialization is ingest + load. Extract and load deal with vanilla Clojure maps with a `serdes/meta` key giving common details; they deliberately know nothing about files. Store and ingest deal with the storage medium and the process of listing and reading a stored export. Laziness is retained: the `load` process ingests full details on demand, so only the metadata of the importing database needs to fit in memory. --- .../serialization/v2/extract.clj | 18 + .../serialization/v2/ingest.clj | 20 + .../serialization/v2/load.clj | 92 +++++ .../serialization/v2/models.clj | 6 + .../serialization/v2/extract_test.clj | 71 ++++ .../serialization/v2/load_test.clj | 133 +++++++ src/metabase/models/collection.clj | 53 +++ src/metabase/models/interface.clj | 4 +- src/metabase/models/serialization/base.clj | 369 ++++++++++++++++++ src/metabase/models/setting.clj | 13 +- test/metabase/api/dashboard_test.clj | 4 +- test/metabase/test/util.clj | 2 +- 12 files changed, 780 insertions(+), 5 deletions(-) create mode 100644 enterprise/backend/src/metabase_enterprise/serialization/v2/extract.clj create mode 100644 enterprise/backend/src/metabase_enterprise/serialization/v2/ingest.clj create mode 100644 enterprise/backend/src/metabase_enterprise/serialization/v2/load.clj create mode 100644 enterprise/backend/src/metabase_enterprise/serialization/v2/models.clj create mode 100644 enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj create mode 100644 enterprise/backend/test/metabase_enterprise/serialization/v2/load_test.clj create mode 100644 src/metabase/models/serialization/base.clj diff --git a/enterprise/backend/src/metabase_enterprise/serialization/v2/extract.clj b/enterprise/backend/src/metabase_enterprise/serialization/v2/extract.clj new file mode 100644 index 00000000000..06f7e4a310f --- /dev/null +++ b/enterprise/backend/src/metabase_enterprise/serialization/v2/extract.clj @@ -0,0 +1,18 @@ +(ns metabase-enterprise.serialization.v2.extract + "Extraction is the first step in serializing a Metabase appdb so it can be eg. written to disk. + + See the detailed descriptions of the (de)serialization processes in [[metabase.models.serialization.base]]." + (:require [metabase-enterprise.serialization.v2.models :as serdes.models] + [metabase.models.serialization.base :as serdes.base])) + +(defn extract-metabase + "Extracts the appdb into a reducible stream of serializable maps, with `:serdes/meta` keys. + + This is the first step in serialization; see [[metabase-enterprise.serialization.v2.storage]] for actually writing to + files. Only the models listed in [[serdes.models/exported-models]] get exported. + + Takes an options map which is passed on to [[serdes.base/extract-all]] for each model. The options are documented + there." + [opts] + (eduction cat (for [model serdes.models/exported-models] + (serdes.base/extract-all model opts)))) diff --git a/enterprise/backend/src/metabase_enterprise/serialization/v2/ingest.clj b/enterprise/backend/src/metabase_enterprise/serialization/v2/ingest.clj new file mode 100644 index 00000000000..d3dd8f33425 --- /dev/null +++ b/enterprise/backend/src/metabase_enterprise/serialization/v2/ingest.clj @@ -0,0 +1,20 @@ +(ns metabase-enterprise.serialization.v2.ingest + "Ingestion is the first step in deserialization - reading from the export format (eg. a tree of YAML files) and + producing Clojure maps with `:serdes/meta` keys. + + See the detailed description of the (de)serialization processes in [[metabase.models.serialization.base]]." + (:require [potemkin.types :as p])) + +(p/defprotocol+ Ingestable + ;; Represents a data source for deserializing previously-exported appdb content into this Metabase instance. + ;; This is written as a protocol since overriding it with [[reify]] if useful for testing. + (ingest-list + [this] + "Return a reducible stream of meta-maps, one for each entity in the dump. + See the description of the `:serdes/meta` maps in [[metabase.models.serialization.base]]. + + The order is not specified and should not be relied upon!") + + (ingest-one + [this meta-map] + "Given one of the meta-maps returned by [[ingest-list]], read in and return the entire corresponding entity.")) diff --git a/enterprise/backend/src/metabase_enterprise/serialization/v2/load.clj b/enterprise/backend/src/metabase_enterprise/serialization/v2/load.clj new file mode 100644 index 00000000000..e70254ea905 --- /dev/null +++ b/enterprise/backend/src/metabase_enterprise/serialization/v2/load.clj @@ -0,0 +1,92 @@ +(ns metabase-enterprise.serialization.v2.load + "Loading is the interesting part of deserialization: integrating the maps \"ingested\" from files into the appdb. + See the detailed breakdown of the (de)serialization processes in [[metabase.models.serialization.base]]." + (:require [medley.core :as m] + [metabase-enterprise.serialization.v2.ingest :as serdes.ingest] + [metabase-enterprise.serialization.v2.models :as serdes.models] + [metabase.models.serialization.base :as serdes.base] + [toucan.db :as db])) + +(defn- load-prescan-model [model] + (transduce (map (fn [[eid ih pk]] + {:by-entity-id [eid pk] + :by-identity-hash [ih pk]})) + (partial merge-with conj) + {:by-entity-id {} :by-identity-hash {}} + (serdes.base/load-prescan-all model))) + +(defn- load-prescan + "For all the exported models in the list, run the prescan process." + [] + (into {} (for [model serdes.models/exported-models] + [model (load-prescan-model model)]))) + +;; These are on ice for now; they'll be dusted off as the YAML storage/ingestion code is added in a later PR. +;; (defn- path-parts [path] +;; (->> (java.nio.file.Paths/get path (into-array String [])) +;; (.iterator) +;; (iterator-seq) +;; (map str))) +;; +;; (defn- id-from-path [path] +;; (let [^String file (last (path-parts path)) +;; base (.substring file 0 (.lastIndexOf file ".")) +;; ; Things with human-readable names use the form identity_hash+human_name.yaml +;; plus (.indexOf base "+")] +;; (if (< plus 0) +;; base +;; (.substring base 0 plus)))) + +(declare load-one) + +(defn- load-deps + "Given a list of `deps` (raw IDs), convert it to a list of meta-maps and `load-one` them all." + [ctx deps] + (if (empty? deps) + ctx + (reduce load-one ctx (map (:from-ids ctx) deps)))) + +(defn- load-one + "Loads a single meta-map into the appdb, doing the necessary bookkeeping. + + If the incoming entity has any dependencies, they are processed first (postorder) so that any foreign key references + in this entity can be resolved properly. + + This is mostly bookkeeping for the overall deserialization process - the actual load of any given entity is done by + [[metabase.models.serialization.base/load-one!]] and its various overridable parts, which see. + + Circular dependencies are not allowed, and are detected and thrown as an error." + [{:keys [expanding ingestion seen] :as ctx} {:keys [id type] :as meta-map}] + (cond + (expanding id) (throw (ex-info (format "Circular dependency on %s %s" type id) {})) + (seen id) ctx ; Already been done, just skip it. + :else (let [ingested (serdes.ingest/ingest-one ingestion meta-map) + model (db/resolve-model (symbol type)) + deps (serdes.base/serdes-dependencies ingested) + ctx (-> ctx + (update :expanding conj id) + (load-deps deps) + (update :seen conj id) + (update :expanding disj id)) + pk (serdes.base/load-one! + ingested + (or (get-in ctx [:local (name model) :by-entity-id id]) + (get-in ctx [:local (name model) :by-identity-hash id])))] + (assoc-in ctx [:local + (name model) + (if (serdes.base/entity-id? id) :by-entity-id :by-identity-hash) + id] + pk)))) + +(defn load-metabase + "Loads in a database export from an ingestion source, which is any Ingestable instance." + [ingestion] + ;; We proceed in the arbitrary order of ingest-list, deserializing all the files. Their declared dependencies guide + ;; the import, and make sure all containers are imported before contents, etc. + (let [contents (serdes.ingest/ingest-list ingestion)] + (reduce load-one {:local (load-prescan) + :expanding #{} + :seen #{} + :ingestion ingestion + :from-ids (m/index-by :id contents)} + contents))) diff --git a/enterprise/backend/src/metabase_enterprise/serialization/v2/models.clj b/enterprise/backend/src/metabase_enterprise/serialization/v2/models.clj new file mode 100644 index 00000000000..8b004ac0db8 --- /dev/null +++ b/enterprise/backend/src/metabase_enterprise/serialization/v2/models.clj @@ -0,0 +1,6 @@ +(ns metabase-enterprise.serialization.v2.models) + +(def exported-models + "The list of models which are exported by serialization. Used for production code and by tests." + ["Collection" + "Setting"]) diff --git a/enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj b/enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj new file mode 100644 index 00000000000..90574451886 --- /dev/null +++ b/enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj @@ -0,0 +1,71 @@ +(ns metabase-enterprise.serialization.v2.extract-test + (:require [clojure.test :refer :all] + [metabase-enterprise.serialization.test-util :as ts] + [metabase-enterprise.serialization.v2.extract :as extract] + [metabase.models :refer [Collection User]] + [metabase.models.serialization.base :as serdes.base])) + +(defn- select-one [model-name where] + (first (into [] (serdes.base/raw-reducible-query model-name {:where where})))) + +(deftest fundamentals-test + (ts/with-empty-h2-app-db + (ts/with-temp-dpc [Collection [{coll-id :id + coll-eid :entity_id + coll-slug :slug} {:name "Some Collection"}] + Collection [{child-id :id + child-eid :entity_id + child-slug :slug} {:name "Nested Collection" + :location (format "/%s/" coll-id)}] + + User [{mark-id :id} {:first_name "Mark" + :last_name "Knopfler" + :email "mark@direstrai.ts"}] + Collection [{pc-id :id + pc-eid :entity_id + pc-slug :slug} {:name "Mark's Personal Collection" + :personal_owner_id mark-id}]] + + (testing "a top-level collection is extracted correctly" + (let [ser (serdes.base/extract-one "Collection" (select-one "Collection" [:= :id coll-id]))] + (is (= {:type "Collection" :id coll-eid :label coll-slug} (:serdes/meta ser))) + (is (not (contains? ser :location))) + (is (not (contains? ser :id))) + (is (nil? (:personal_owner_id ser))) + (is (contains? ser :parent_id)) + (is (nil? (:parent_id ser))))) + + (testing "a nested collection is extracted with the right parent_id" + (let [ser (serdes.base/extract-one "Collection" (select-one "Collection" [:= :id child-id]))] + (is (= {:type "Collection" :id child-eid :label child-slug} (:serdes/meta ser))) + (is (not (contains? ser :location))) + (is (not (contains? ser :id))) + (is (= coll-eid (:parent_id ser))) + (is (nil? (:personal_owner_id ser))))) + + (testing "personal collections are extracted with email as key" + (let [ser (serdes.base/extract-one "Collection" (select-one "Collection" [:= :id pc-id]))] + (is (= {:type "Collection" :id pc-eid :label pc-slug} (:serdes/meta ser))) + (is (not (contains? ser :location))) + (is (not (contains? ser :id))) + (is (nil? (:parent_id ser))) + (is (= "mark@direstrai.ts" (:personal_owner_id ser))))) + + (testing "overall extraction returns the expected set" + (letfn [(collections [extraction] (->> extraction + (into []) + (map :serdes/meta) + (filter #(= "Collection" (:type %))) + (map :id) + set))] + (testing "no user specified" + (is (= #{coll-eid child-eid} + (collections (extract/extract-metabase nil))))) + + (testing "valid user specified" + (is (= #{coll-eid child-eid pc-eid} + (collections (extract/extract-metabase {:user mark-id}))))) + + (testing "invalid user specified" + (is (= #{coll-eid child-eid} + (collections (extract/extract-metabase {:user 218921})))))))))) diff --git a/enterprise/backend/test/metabase_enterprise/serialization/v2/load_test.clj b/enterprise/backend/test/metabase_enterprise/serialization/v2/load_test.clj new file mode 100644 index 00000000000..864b8252b70 --- /dev/null +++ b/enterprise/backend/test/metabase_enterprise/serialization/v2/load_test.clj @@ -0,0 +1,133 @@ +(ns metabase-enterprise.serialization.v2.load-test + (:require [clojure.test :refer :all] + [metabase-enterprise.serialization.test-util :as ts] + [metabase-enterprise.serialization.v2.extract :as serdes.extract] + [metabase-enterprise.serialization.v2.ingest :as serdes.ingest] + [metabase-enterprise.serialization.v2.load :as serdes.load] + [metabase.models :refer [Collection]] + [metabase.models.serialization.hash :as serdes.hash] + [toucan.db :as db])) + +(defn- ingestion-in-memory [extractions] + (let [mapped (into {} (for [{{:keys [type id]} :serdes/meta :as m} (into [] extractions)] + [[type id] m]))] + (reify + serdes.ingest/Ingestable + (ingest-list [_] + (eduction (map :serdes/meta) (vals mapped))) + (ingest-one [_ {:keys [type id]}] + (or (get mapped [type id]) + (throw (ex-info (format "Unknown ingestion target: %s %s" type id) + {:type type :id id :world mapped}))))))) + +;;; WARNING for test authors: [[extract/extract-metabase]] returns a lazy reducible value. To make sure you don't +;;; confound your tests with data from your dev appdb, remember to eagerly +;;; `(into [] (extract/extract-metabase ...))` in these tests. + +(deftest load-basics-test + (testing "a simple, fresh collection is imported" + (let [serialized (atom nil) + eid1 "123456789abcdef_0123"] + (ts/with-source-and-dest-dbs + (testing "extraction succeeds" + (ts/with-source-db + (ts/create! Collection :name "Basic Collection" :entity_id eid1) + (reset! serialized (into [] (serdes.extract/extract-metabase {}))) + (is (some (fn [{{:keys [type id]} :serdes/meta}] + (and (= type "Collection") (= id eid1))) + @serialized)))) + + (testing "loading into an empty database succeeds" + (ts/with-dest-db + (serdes.load/load-metabase (ingestion-in-memory @serialized)) + (let [colls (db/select Collection)] + (is (= 1 (count colls))) + (is (= "Basic Collection" (:name (first colls)))) + (is (= eid1 (:entity_id (first colls))))))) + + (testing "loading again into the same database does not duplicate" + (ts/with-dest-db + (serdes.load/load-metabase (ingestion-in-memory @serialized)) + (let [colls (db/select Collection)] + (is (= 1 (count colls))) + (is (= "Basic Collection" (:name (first colls)))) + (is (= eid1 (:entity_id (first colls))))))))))) + +(deftest deserialization-nested-collections-test + (testing "with a three-level nesting of collections" + (let [serialized (atom nil) + parent (atom nil) + child (atom nil) + grandchild (atom nil)] + (ts/with-source-and-dest-dbs + (testing "serialization of the three collections" + (ts/with-source-db + (reset! parent (ts/create! Collection :name "Parent Collection" :location "/")) + (reset! child (ts/create! Collection + :name "Child Collection" + :location (format "/%d/" (:id @parent)))) + (reset! grandchild (ts/create! Collection + :name "Grandchild Collection" + :location (format "/%d/%d/" (:id @parent) (:id @child)))) + (reset! serialized (into [] (serdes.extract/extract-metabase {}))))) + + (testing "deserialization into a database that already has the parent, but with a different ID" + (ts/with-dest-db + (ts/create! Collection :name "Unrelated Collection") + (ts/create! Collection :name "Parent Collection" :location "/" :entity_id (:entity_id @parent)) + (serdes.load/load-metabase (ingestion-in-memory @serialized)) + (let [parent-dest (db/select-one Collection :entity_id (:entity_id @parent)) + child-dest (db/select-one Collection :entity_id (:entity_id @child)) + grandchild-dest (db/select-one Collection :entity_id (:entity_id @grandchild))] + (is (some? parent-dest)) + (is (some? child-dest)) + (is (some? grandchild-dest)) + (is (not= (:id parent-dest) (:id @parent)) "should have different primary keys") + (is (= 4 (db/count Collection))) + (is (= "/" + (:location parent-dest))) + (is (= (format "/%d/" (:id parent-dest)) + (:location child-dest))) + (is (= (format "/%d/%d/" (:id parent-dest) (:id child-dest)) + (:location grandchild-dest)))))))))) + +(deftest deserialization-upsert-and-dupe-test + (testing "basic collections with their names changing, one without entity_id:" + (let [serialized (atom nil) + c1a (atom nil) + c2a (atom nil) + c1b (atom nil) + c2b (atom nil)] + (ts/with-source-and-dest-dbs + (testing "serializing the two collections" + (ts/with-source-db + (reset! c1b (ts/create! Collection :name "Renamed Collection 1")) + (reset! c2b (ts/create! Collection :name "Collection 2 version 2")) + (db/update! Collection (:id @c2b) {:entity_id nil}) + (reset! c2b (db/select-one Collection :id (:id @c2b))) + (is (nil? (:entity_id @c2b))) + (reset! serialized (into [] (serdes.extract/extract-metabase {}))))) + + (testing "serialization should use identity hashes where no entity_id is defined" + (is (= #{(:entity_id @c1b) + (serdes.hash/identity-hash @c2b)} + (->> @serialized + (map :serdes/meta) + (filter #(= "Collection" (:type %))) + (map :id) + set)))) + + (testing "deserializing, the name change causes a duplicated collection" + (ts/with-dest-db + (reset! c1a (ts/create! Collection :name "Collection 1" :entity_id (:entity_id @c1b))) + (reset! c2a (ts/create! Collection :name "Collection 2 version 1")) + (db/update! Collection (:id @c2a) {:entity_id nil}) + (reset! c2a (db/select-one Collection :id (:id @c2a))) + (is (nil? (:entity_id @c2b))) + + (serdes.load/load-metabase (ingestion-in-memory @serialized)) + (is (= 3 (db/count Collection)) "Collection 2 versions get duplicated, since the identity-hash changed") + (is (= #{"Renamed Collection 1" + "Collection 2 version 1" + "Collection 2 version 2"} + (set (db/select-field :name Collection)))))))))) diff --git a/src/metabase/models/collection.clj b/src/metabase/models/collection.clj index e37b71554fa..2cb7e779092 100644 --- a/src/metabase/models/collection.clj +++ b/src/metabase/models/collection.clj @@ -14,6 +14,7 @@ [metabase.models.collection.root :as collection.root] [metabase.models.interface :as mi] [metabase.models.permissions :as perms :refer [Permissions]] + [metabase.models.serialization.base :as serdes.base] [metabase.models.serialization.hash :as serdes.hash] [metabase.public-settings.premium-features :as premium-features] [metabase.util :as u] @@ -905,6 +906,58 @@ serdes.hash/IdentityHashable {:identity-hash-fields (constantly [:name :namespace parent-identity-hash])}) +(defn- collection-query [maybe-user] + (serdes.base/raw-reducible-query + "Collection" + {:where [:and + [:= :archived false] + (if (nil? maybe-user) + [:is :personal_owner_id nil] + [:= :personal_owner_id maybe-user])]})) + +(defmethod serdes.base/extract-query "Collection" [_ {:keys [user]}] + (let [unowned (collection-query nil)] + (if user + (eduction cat [unowned (collection-query user)]) + unowned))) + +(defmethod serdes.base/extract-one "Collection" + ;; Transform :location (which uses database IDs) into a portable :parent_id with the parent's entity ID. + ;; Also transform :personal_owner_id from a database ID to the email string, if it's defined. + ;; Use the :slug as the human-readable label. + [_ coll] + (let [parent (some-> coll + :id + Collection + (hydrate :parent_id) + :parent_id + Collection) + parent-id (when parent + (or (:entity_id parent) (serdes.hash/identity-hash parent))) + owner-email (when (:personal_owner_id coll) + (db/select-one-field :email 'User :id (:personal_owner_id coll)))] + (-> (serdes.base/extract-one-basics "Collection" coll) + (dissoc :location) + (assoc :parent_id parent-id :personal_owner_id owner-email) + (assoc-in [:serdes/meta :label] (:slug coll))))) + +(defmethod serdes.base/load-xform "Collection" [{:keys [parent_id personal_owner_id] :as contents}] + (let [loc (if parent_id + (let [{:keys [id location]} (serdes.base/lookup-by-id Collection parent_id)] + (str location id "/")) + "/") + user-id (when personal_owner_id + (db/select-one-field :id 'User :email personal_owner_id))] + (-> contents + serdes.base/load-xform-basics + (dissoc :parent_id) + (assoc :location loc :personal_owner_id user-id)))) + +(defmethod serdes.base/serdes-dependencies "Collection" + [{:keys [parent_id]}] + (if parent_id + [parent_id] + [])) ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | Perms Checking Helper Fns | diff --git a/src/metabase/models/interface.clj b/src/metabase/models/interface.clj index f8d81c34f23..027e6060c2a 100644 --- a/src/metabase/models/interface.clj +++ b/src/metabase/models/interface.clj @@ -237,7 +237,9 @@ :update add-updated-at-timestamp) (defn- add-entity-id [obj & _] - (assoc obj :entity_id (u/generate-nano-id))) + (if (contains? obj :entity_id) + obj + (assoc obj :entity_id (u/generate-nano-id)))) (models/add-property! :entity_id :insert add-entity-id) diff --git a/src/metabase/models/serialization/base.clj b/src/metabase/models/serialization/base.clj new file mode 100644 index 00000000000..c2bd53143a1 --- /dev/null +++ b/src/metabase/models/serialization/base.clj @@ -0,0 +1,369 @@ +(ns metabase.models.serialization.base + "Defines several helper functions and multimethods for the serialization system. + Serialization is an enterprise feature, but in the interest of keeping all the code for an entity in one place, these + methods are defined here and implemented for all the exported models. + + Whether to export a new model: + - Generally, the high-profile user facing things (databases, questions, dashboards, snippets, etc.) are exported. + - Internal or automatic things (users, activity logs, permissions) are not. + + If the model is not exported, add it to the exclusion lists in the tests. Every model should be explicitly listed as + exported or not, and a test enforces this so serialization isn't forgotten for new models." + (:require [clojure.tools.logging :as log] + [metabase.models.serialization.hash :as serdes.hash] + [toucan.db :as db] + [toucan.models :as models])) + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | Serialization Process | +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; Serialization happens in two stages: extraction and storage. These are independent and deliberately decoupled. +;;; The result of extraction is a reducible stream of Clojure maps with `:serdes/meta` keys on them (see below). +;;; In particular, extraction does not care about file formats or other such things. +;;; +;;; Storage takes the stream from extraction and actually stores it or sends it. Traditionally we have serialized to a +;;; directory tree full of YAML files, and that's the only storage approach implemented here. But since the process is +;;; decoupled, we or a user could write their own storage layer, using JSON or protocol buffers or any other format. +;;; +;;; Both extraction and storage are written as a set of multimethods, with defaults for the common path. +;;; Note that extraction is controlled by a map of options and settings, detailed below. +;;; +;;; Extraction: +;;; - Top-level serialization code [[metabase-enterprise.serialization.v2.extract/extract-metabase]] has a list of +;;; models to be exported. +;;; - A test enforces that all models are either exported, or explicitly excluded, so new ones can't be forgotten. +;;; - It calls `(extract-all "ModelName" opts)` for each model. +;;; - The default for this calls `(extract-query "ModelName" opts)`, getting back a reducible stream of entities. +;;; - For each entity in that stream, it calls `(extract-one "ModelName" entity)`, which converts the map from the +;;; database to a portable map with `:serdes/meta` on it. Eg. no database IDs as foreign keys. +;;; - The default [[extract-all]] should work for most models (overrride [[extract-query]] and [[extract-one]] instead), +;;; but it can be overridden if needed. +;;; +;;; The end result of extraction is a reducible stream of Clojure maps; this is passed to storage directly, along with +;;; the map of options. +;;; +;;; Options currently supported by extraction: +;;; - `:user 6` giving the primary key for a user whose personal collections should be extracted. +;;; +;;; Storage: +;;; The storage system might transform that stream in some arbitrary way. Storage is a dead end - it should perform side +;;; effects like writing to the disk or network, and return nothing. + +(defmulti extract-all + "Entry point for extracting all entities of a particular model: + `(extract-all \"ModelName\" {opts...})` + Keyed on the model name. + + Returns a reducible stream of extracted maps (ie. vanilla Clojure maps with `:serdes/meta` keys). + + You probably don't want to implement this directly. The default implementation delegates to [[extract-query]] and + [[extract-one]], which are usually more convenient to override." + (fn [model _] model)) + +(defmulti extract-query + "Performs the select query, possibly filtered, for all the entities of this type that should be serialized. Called + from [[extract-all]]'s default implementation. + + `(extract-query \"ModelName\" opts)` + + Keyed on the model name, the first argument. + + Returns a reducible stream of maps with `:serdes/meta` keys on them. It should *not* be a stream of Toucan entities, + but vanilla Clojure maps. + + In fact, Toucan's high-level niceties (eg. expanding JSON-encoded fields to Clojure data, decrypting, type + conversions, or hydrating some relationship by default) are counterproductive when our goal is a database-level + export. As a specific example, [[db/simple-select]] expands JSON but [[db/simple-insert!]] doesn't put it back. + There's also no `simple-update!`, making a fresh insert diverge from an update. + + Defaults to using the helper `(raw-reducible-query model)` for the entire table, which is equivalent to + `(db/simple-select-reducible model)` but without running post-select handlers. This returns vanilla maps, not + [[db/IModel]] instances. + + You may want to override this to eg. skip archived entities, or otherwise filter what gets serialized. Prefer using + the two-argument form of [[raw-reducible-query]]." + (fn [model _] model)) + +(defmulti extract-one + "Extracts a single entity retrieved from the database into a portable map with `:serdes/meta` attached. + + The default implementation uses the model name as the `:type` and either `:entity_id` or [[serdes.hash/identity-hash]] + as the `:id`. It also strips off the database's numeric primary key. + + That suffices for a few simple entities, but most entities will need to override this. + They should follow the pattern of: + - Convert to a vanilla Clojure map, not a [[models/IModel]] instance. + - Drop the numeric database primary key + - Replace any foreign keys with portable values (eg. entity IDs or `identity-hash`es, owning user's ID with their + email, etc.) + - Consider attaching a human-friendly `:label` under `:serdes/meta`. (Eg. a Collection's `:slug`) + + When overriding this, [[extract-one-basics]] is probably a useful starting point. + + Keyed by the model name of the entity, the first argument." + (fn [model _] model)) + +(defmethod extract-all :default [model opts] + (eduction (map (partial extract-one model)) + (extract-query model opts))) + +(defn raw-reducible-query + "Helper for calling Toucan's raw [[db/reducible-query]]. With just the model name, fetches everything. You can filter + with a HoneySQL map like {:where [:= :archived true]}. + + Returns a reducible stream of JDBC row maps." + ([model-name] + (raw-reducible-query model-name nil)) + ([model-name honeysql-form] + (db/reducible-query (merge {:select [:*] :from [(symbol model-name)]} + honeysql-form)))) + +(defmethod extract-query :default [model-name _] + (raw-reducible-query model-name)) + +(defn extract-one-basics + "A helper for writing [[extract-one]] implementations. It takes care of the basics: + - Convert to a vanilla Clojure map. + - Add `:serdes/meta`. + - Drop the primary key. + + Returns the Clojure map." + [model-name entity] + (let [model (db/resolve-model (symbol model-name)) + pk (models/primary-key model)] + (-> entity + (assoc :serdes/meta {:type model-name + :id (or (:entity_id entity) + (serdes.hash/identity-hash (model (get entity pk))))}) + (dissoc pk)))) + +(defmethod extract-one :default [model-name entity] + (extract-one-basics model-name entity)) + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | Deserialization Process | +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; Deserialization is split into two stages, mirroring serialization. They are called ingestion and loading. +;;; Ingestion turns whatever serialized form (eg. a tree of YAML files) was produced by storage into Clojure maps with +;;; `:serdes/meta` maps. Loading imports those entities into the appdb, updating and inserting rows as needed. +;;; +;;; Ingestion: +;;; Ingestion is intended to be a black box, like storage above. [[Ingestable]] is a protocol to allow easy [[reify]] +;;; usage for testing in-memory deserialization. +;;; +;;; Factory functions consume some details (like a file path) and return an [[Ingestable]], with its two methods: +;;; - `(ingest-list ingestable)` returns a reducible stream of `:serdes/meta` maps in any order. +;;; - `(ingest-one ingestable meta-map)` ingests a single entity into memory, returning it as a map. +;;; +;;; This two-stage design avoids needing all the data in memory at once, where that's practical with the underlying +;;; storage media (eg. files). +;;; +;;; Loading: +;;; Loading tries to find corresponding entities in the destination appdb by `entity_id` or `identity-hash`, and update +;;; those rows rather than duplicating. +;;; The entry point is [[metabase-enterprise.serialization.v2.load/load-metabase]]. The top-level process works like +;;; this: +;;; - `(load-prescan-all "ModelName")` is called, which selects the entire collection as a reducible stream and calls +;;; [[load-prescan-one]] on each entry. +;;; - The default for that usually is the right thing. +;;; - `(load-prescan-one entity)` turns a particular entity into an `[entity_id identity-hash primary-key]` triple. +;;; - The default will work for models with a literal `entity_id` field; those with alternative IDs (database, +;;; table, field, setting, etc.) should override this method. +;;; - Prescanning complete, `(ingest-list ingestable)` gets the metadata for every exported entity in arbitrary order. +;;; - `(ingest-one meta-map opts)` is called on each first to ingest the value into memory, then +;;; - `(serdes-dependencies ingested)` to get a list of other IDs (entity IDs or identity hashes). +;;; - The default is an empty list. +;;; - The idea of dependencies is eg. a database must be loaded before its tables, a table before its fields, a +;;; collection's ancestors before the collection itself. +;;; - Dependencies are loaded recursively in postorder; circular dependencies cause the process to throw. +;;; - Having found an entity it can really load, the core code will check its table of IDs found by prescanning. +;;; - Then it calls `(load-one! ingested maybe-local-entity)`, passing the `ingested` value and either `nil` or the +;;; Toucan entity corresponding to the incoming map. +;;; - `load-one!` is a side-effecting black box to the rest of the deserialization process. +;;; It returns the primary key of the new or existing entity, which is necessary to resolve foreign keys between +;;; imported entities. +;;; - The table of "local" entities found by the prescan is updated to include newly loaded ones. +;;; +;;; +;;; `load-one!` has a default implementation that works for most models: +;;; - Call `(load-xform ingested)` to massage the map as needed. +;;; - This is the spot to override, for example to convert a foreign key from portable entity ID into a database ID. +;;; - Then, call either: +;;; - `(load-update! ingested local-entity)` if the local entity exists, or +;;; - `(load-insert! ingested)` if the entity is new. +;;; Both of these have the obvious defaults of [[jdbc/update!]] or [[jdbc/insert!]]. + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | :serdes/meta maps | +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; The Clojure maps from extraction and ingestion always include a special key `:serdes/meta` giving some information +;;; about the serialized entity. The value is always a map like: +;;; `{:type "ModelName" :id "entity ID or identity hash string" :label "Human-readable name"}` +;;; `:type` and `:id` are required; `:label` is optional. +;;; +;;; Many of the multimethods are keyed on the `:type` field. + +(defmulti load-prescan-all + "Returns a reducible stream of `[entity_id identity-hash primary-key]` triples for the entire table. + + Defaults to running [[load-prescan-one]] over each entity returned by [[jdbc/reducible-query]] for this model. + Override this method if filtering is needed. + + Keyed on the model name." + identity) + +(defmulti load-prescan-one + "Converts a database entity into a `[entity_id identity-hash primary-key]` triple for the deserialization machinery. + Called with the Toucan model (*not* this entity), and the JDBC map for the entity in question. + + Defaults to using a literal `:entity_id` column. For models with a different entity ID (eg. a Table's name, a + Setting's key), override this method. + + Keyed on the model name." + (fn [model _] (name model))) + +(defmethod load-prescan-all :default [model-name] + (let [model (db/resolve-model (symbol model-name))] + (eduction (map (partial load-prescan-one model)) + (raw-reducible-query model-name)))) + +(defmethod load-prescan-one :default [model entity] + (let [pk (models/primary-key model) + key (get entity pk)] + [(:entity_id entity) + (serdes.hash/identity-hash (db/select-one model pk key)) ; TODO This sucks for identity-hash! + key])) + +(defn- ingested-model + "The dispatch function for several of the load multimethods: dispatching on the type of the incoming entity." + [ingested] + (-> ingested :serdes/meta :type)) + +(defmulti serdes-dependencies + "Given an entity map as ingested (not a Toucan entity) returns a (possibly empty) list of its dependencies, where each + dependency is represented by either the entity ID or identity hash of the target entity. + + Keyed on the model name. + Default implementation returns an empty vector, so only models that have dependencies need to implement this." + ingested-model) + +(defmethod serdes-dependencies :default [_] + []) + +(defmulti load-xform + "Given the incoming vanilla map as ingested, transform it so it's suitable for sending to the database (in eg. + [[db/simple-insert!]]). + For example, this should convert any foreign keys back from a portable entity ID or identity hash into a numeric + database ID. This is the mirror of [[extract-one]], in spirit. (They're not strictly inverses - [[extract-one]] drops + the primary key but this need not put one back, for example.) + + By default, this just calls [[load-xform-basics]]. + If you override this, call [[load-xform-basics]] as well." + ingested-model) + +(defn load-xform-basics + "Performs the usual steps for an incoming entity: + - Drop :serdes/meta + + You should call this as a first step from any implementation of [[load-xform]]. + + This is a mirror (but not precise inverse) of [[extract-one-basics]]." + [ingested] + (dissoc ingested :serdes/meta)) + +(defmethod load-xform :default [ingested] + (load-xform-basics ingested)) + +(defmulti load-update! + "Called by the default [[load-one!]] if there is a corresponding entity already in the appdb. + The first argument is the model name, the second the incoming map we're deserializing, and the third is the Toucan + entity found in the appdb. + + Defaults to a straightforward [[db/update!]], and you may not need to update it. + + Keyed on the model name (the first argument), because the second argument doesn't have its `:serdes/meta` anymore. + + Returns the primary key of the updated entity." + (fn [model _ _] model)) + +(defmethod load-update! :default [model-name ingested local] + (let [model (db/resolve-model (symbol model-name)) + pk (models/primary-key model) + id (get local pk) + ; Get a WHERE clause, but then strip off the WHERE part to include it in the JDBC call below. + ;where (update (db/honeysql->sql {:where [:= pk id]}) 0 + ; #(.substring 5)) + ] + (log/tracef "Upserting %s %d: old %s new %s" model-name id (pr-str local) (pr-str ingested)) + ; Using the two-argument form of [[db/update!]] that takes the model and a HoneySQL form for the actual update. + ; It works differently from the more typical `(db/update! 'Model id updates...)` form: this form doesn't run any of + ; the pre-update magic, it just updates the database directly. + (db/update! (symbol model-name) {:where [:= pk id] :set ingested}) + pk)) + +(defmulti load-insert! + "Called by the default [[load-one!]] if there is no corresponding entity already in the appdb. + + Defaults to a straightforward [[db/simple-insert!]], and you probably don't need to implement this. + Note that [[db/insert!]] should be avoided - we don't want to populate the `:entity_id` field if it wasn't already + set! + + Keyed on the model name (the first argument), because the second argument doesn't have its `:serdes/meta` anymore. + + Returns the primary key of the newly inserted entity." + (fn [model _] model)) + +(defmethod load-insert! :default [model ingested] + (log/tracef "Inserting %s: %s" model (pr-str ingested)) + ; Toucan's simple-insert! actually does the right thing for our purposes: it doesn't call pre-insert or post-insert. + (db/simple-insert! (symbol model) ingested)) + +(defmulti load-one! + "Black box for integrating a deserialized entity into this appdb. + `(load-one! ingested maybe-local)` + + `ingested` is the vanilla map from ingestion, with the `:serdes/meta` key on it. + `maybe-local` is either `nil`, or the corresponding Toucan entity from the appdb. + + Defaults to calling [[load-xform]] to massage the incoming map, then either [[load-update!]] if `maybe-local` + exists, or [[load-insert!]] if it's `nil`. + + Prefer overriding [[load-xform]], and if necessary [[load-update!]] and [[load-insert!]], rather than this. + + Keyed on the model name. + + Returns the primary key of the updated or inserted entity." + (fn [ingested _] + (ingested-model ingested))) + +(defmethod load-one! :default [ingested maybe-local-id] + (let [model (ingested-model ingested) + pkey (models/primary-key (db/resolve-model (symbol model))) + adjusted (load-xform ingested)] + (if (nil? maybe-local-id) + (load-insert! model adjusted) + (load-update! model adjusted (db/select-one (symbol model) pkey maybe-local-id))))) + +(defn entity-id? + "Checks if the given string is a 21-character NanoID. Useful for telling entity IDs apart from identity hashes." + [id-str] + (boolean (re-matches #"^[A-Za-z0-9_-]{21}$" id-str))) + +(defn- find-by-identity-hash + "Given a model and a target identity hash, this scans the appdb for any instance of the model corresponding to the + hash. Does a complete scan, so this should be called sparingly!" + ;; TODO This should be able to use a cache of identity-hash values from the start of the deserialization process. + [model id-hash] + (->> (db/select-reducible model) + (into [] (comp (filter #(= id-hash (serdes.hash/identity-hash %))) + (take 1))) + first)) + +(defn lookup-by-id + "Given an ID string, this endeavours to find the matching entity, whether it's an entity ID or identity hash. + This is useful when writing [[load-xform]] to turn a foreign key from a portable form to an appdb ID. + Returns a Toucan entity or nil." + [model id-str] + (if (entity-id? id-str) + (db/select-one model :entity_id id-str) + (find-by-identity-hash model id-str))) diff --git a/src/metabase/models/setting.clj b/src/metabase/models/setting.clj index b3002a88967..0a3837ec4cd 100644 --- a/src/metabase/models/setting.clj +++ b/src/metabase/models/setting.clj @@ -81,6 +81,7 @@ [environ.core :as env] [medley.core :as m] [metabase.api.common :as api] + [metabase.models.serialization.base :as serdes.base] [metabase.models.serialization.hash :as serdes.hash] [metabase.models.setting.cache :as setting.cache] [metabase.plugins.classloader :as classloader] @@ -132,6 +133,8 @@ Primarily used in test to disable retired setting check." false) +(declare admin-writable-site-wide-settings get-value-of-type set-value-of-type!) + (models/defmodel Setting "The model that underlies [[defsetting]]." :setting) @@ -145,7 +148,15 @@ serdes.hash/IdentityHashable {:identity-hash-fields (constantly [:key])}) -(declare get-value-of-type) +(defmethod serdes.base/extract-all "Setting" [_model _opts] + (for [{:keys [key value]} (admin-writable-site-wide-settings + :getter (partial get-value-of-type :string))] + {:serdes/meta {:type "Setting" :id (name key)} + :key key + :value value})) + +(defmethod serdes.base/load-one! "Setting" [{:keys [key value]} _] + (set-value-of-type! :string key value)) (def ^:private Type (s/pred (fn [a-type] diff --git a/test/metabase/api/dashboard_test.clj b/test/metabase/api/dashboard_test.clj index d7c1ab7b0ef..515910f621a 100644 --- a/test/metabase/api/dashboard_test.clj +++ b/test/metabase/api/dashboard_test.clj @@ -1410,9 +1410,9 @@ (testing "Should work if Dashboard has multiple mappings for a single param" (with-chain-filter-fixtures [{:keys [dashboard card dashcard param-keys]}] - (mt/with-temp* [Card [card-2 (dissoc card :id)] + (mt/with-temp* [Card [card-2 (dissoc card :id :entity_id)] DashboardCard [dashcard-2 (-> dashcard - (dissoc :id :card_id) + (dissoc :id :card_id :entity_id) (assoc :card_id (:id card-2)))]] (is (= ["African" "American" "Artisan"] (take 3 (mt/user-http-request :rasta :get 200 (chain-filter-values-url diff --git a/test/metabase/test/util.clj b/test/metabase/test/util.clj index d8c743ea6da..d158334fdea 100644 --- a/test/metabase/test/util.clj +++ b/test/metabase/test/util.clj @@ -733,7 +733,7 @@ (let [card-count-before (db/count Card) card-name (random-name)] (with-model-cleanup [Card] - (db/insert! Card (-> other-card (dissoc :id) (assoc :name card-name))) + (db/insert! Card (-> other-card (dissoc :id :entity_id) (assoc :name card-name))) (testing "Card count should have increased by one" (is (= (inc card-count-before) (db/count Card)))) -- GitLab