From 2eb89b4d47a31ae5eb5725d4cca263dcd2555773 Mon Sep 17 00:00:00 2001
From: Braden Shepherdson <Braden.Shepherdson@gmail.com>
Date: Thu, 23 Jun 2022 12:30:44 -0400
Subject: [PATCH] Foundation for v2 serialization and deserialization (#23204)

This supports serialization of only Collections and Settings so far, but
it demonstrates the design of the new serialization system.

`metabase.models.serialization.base` defines the multimethods, which
are to be implemented by all the exported models eventually.
The actual serialization code that drives the larger process is in
`metabase_enterprise.serialization.v2.extract` and `.merge`, since
serialization is an enterprise feature.

The design calls for two matching phases on each side:
- Serialization is extract + store;
- Deserialization is ingest + load.

Extract and load deal with vanilla Clojure maps with a `serdes/meta` key
giving common details; they deliberately know nothing about files.

Store and ingest deal with the storage medium and the process of
listing and reading a stored export.

Laziness is retained: the `load` process ingests full details on demand,
so only the metadata of the importing database needs to fit in memory.
---
 .../serialization/v2/extract.clj              |  18 +
 .../serialization/v2/ingest.clj               |  20 +
 .../serialization/v2/load.clj                 |  92 +++++
 .../serialization/v2/models.clj               |   6 +
 .../serialization/v2/extract_test.clj         |  71 ++++
 .../serialization/v2/load_test.clj            | 133 +++++++
 src/metabase/models/collection.clj            |  53 +++
 src/metabase/models/interface.clj             |   4 +-
 src/metabase/models/serialization/base.clj    | 369 ++++++++++++++++++
 src/metabase/models/setting.clj               |  13 +-
 test/metabase/api/dashboard_test.clj          |   4 +-
 test/metabase/test/util.clj                   |   2 +-
 12 files changed, 780 insertions(+), 5 deletions(-)
 create mode 100644 enterprise/backend/src/metabase_enterprise/serialization/v2/extract.clj
 create mode 100644 enterprise/backend/src/metabase_enterprise/serialization/v2/ingest.clj
 create mode 100644 enterprise/backend/src/metabase_enterprise/serialization/v2/load.clj
 create mode 100644 enterprise/backend/src/metabase_enterprise/serialization/v2/models.clj
 create mode 100644 enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj
 create mode 100644 enterprise/backend/test/metabase_enterprise/serialization/v2/load_test.clj
 create mode 100644 src/metabase/models/serialization/base.clj

diff --git a/enterprise/backend/src/metabase_enterprise/serialization/v2/extract.clj b/enterprise/backend/src/metabase_enterprise/serialization/v2/extract.clj
new file mode 100644
index 00000000000..06f7e4a310f
--- /dev/null
+++ b/enterprise/backend/src/metabase_enterprise/serialization/v2/extract.clj
@@ -0,0 +1,18 @@
+(ns metabase-enterprise.serialization.v2.extract
+  "Extraction is the first step in serializing a Metabase appdb so it can be eg. written to disk.
+
+  See the detailed descriptions of the (de)serialization processes in [[metabase.models.serialization.base]]."
+  (:require [metabase-enterprise.serialization.v2.models :as serdes.models]
+            [metabase.models.serialization.base :as serdes.base]))
+
+(defn extract-metabase
+  "Extracts the appdb into a reducible stream of serializable maps, with `:serdes/meta` keys.
+
+  This is the first step in serialization; see [[metabase-enterprise.serialization.v2.storage]] for actually writing to
+  files. Only the models listed in [[serdes.models/exported-models]] get exported.
+
+  Takes an options map which is passed on to [[serdes.base/extract-all]] for each model. The options are documented
+  there."
+  [opts]
+  (eduction cat (for [model serdes.models/exported-models]
+                   (serdes.base/extract-all model opts))))
diff --git a/enterprise/backend/src/metabase_enterprise/serialization/v2/ingest.clj b/enterprise/backend/src/metabase_enterprise/serialization/v2/ingest.clj
new file mode 100644
index 00000000000..d3dd8f33425
--- /dev/null
+++ b/enterprise/backend/src/metabase_enterprise/serialization/v2/ingest.clj
@@ -0,0 +1,20 @@
+(ns metabase-enterprise.serialization.v2.ingest
+  "Ingestion is the first step in deserialization - reading from the export format (eg. a tree of YAML files) and
+  producing Clojure maps with `:serdes/meta` keys.
+
+  See the detailed description of the (de)serialization processes in [[metabase.models.serialization.base]]."
+  (:require [potemkin.types :as p]))
+
+(p/defprotocol+ Ingestable
+  ;; Represents a data source for deserializing previously-exported appdb content into this Metabase instance.
+  ;; This is written as a protocol since overriding it with [[reify]] if useful for testing.
+  (ingest-list
+    [this]
+    "Return a reducible stream of meta-maps, one for each entity in the dump.
+    See the description of the `:serdes/meta` maps in [[metabase.models.serialization.base]].
+
+    The order is not specified and should not be relied upon!")
+
+  (ingest-one
+    [this meta-map]
+    "Given one of the meta-maps returned by [[ingest-list]], read in and return the entire corresponding entity."))
diff --git a/enterprise/backend/src/metabase_enterprise/serialization/v2/load.clj b/enterprise/backend/src/metabase_enterprise/serialization/v2/load.clj
new file mode 100644
index 00000000000..e70254ea905
--- /dev/null
+++ b/enterprise/backend/src/metabase_enterprise/serialization/v2/load.clj
@@ -0,0 +1,92 @@
+(ns metabase-enterprise.serialization.v2.load
+  "Loading is the interesting part of deserialization: integrating the maps \"ingested\" from files into the appdb.
+  See the detailed breakdown of the (de)serialization processes in [[metabase.models.serialization.base]]."
+  (:require [medley.core :as m]
+            [metabase-enterprise.serialization.v2.ingest :as serdes.ingest]
+            [metabase-enterprise.serialization.v2.models :as serdes.models]
+            [metabase.models.serialization.base :as serdes.base]
+            [toucan.db :as db]))
+
+(defn- load-prescan-model [model]
+  (transduce (map (fn [[eid ih pk]]
+                    {:by-entity-id     [eid pk]
+                     :by-identity-hash [ih pk]}))
+             (partial merge-with conj)
+             {:by-entity-id {} :by-identity-hash {}}
+             (serdes.base/load-prescan-all model)))
+
+(defn- load-prescan
+  "For all the exported models in the list, run the prescan process."
+  []
+  (into {} (for [model serdes.models/exported-models]
+             [model (load-prescan-model model)])))
+
+;; These are on ice for now; they'll be dusted off as the YAML storage/ingestion code is added in a later PR.
+;; (defn- path-parts [path]
+;;   (->> (java.nio.file.Paths/get path (into-array String []))
+;;        (.iterator)
+;;        (iterator-seq)
+;;        (map str)))
+;;
+;; (defn- id-from-path [path]
+;;   (let [^String file (last (path-parts path))
+;;         base         (.substring file 0 (.lastIndexOf file "."))
+;;         ; Things with human-readable names use the form identity_hash+human_name.yaml
+;;         plus         (.indexOf base "+")]
+;;     (if (< plus 0)
+;;       base
+;;       (.substring base 0 plus))))
+
+(declare load-one)
+
+(defn- load-deps
+  "Given a list of `deps` (raw IDs), convert it to a list of meta-maps and `load-one` them all."
+  [ctx deps]
+  (if (empty? deps)
+    ctx
+    (reduce load-one ctx (map (:from-ids ctx) deps))))
+
+(defn- load-one
+  "Loads a single meta-map into the appdb, doing the necessary bookkeeping.
+
+  If the incoming entity has any dependencies, they are processed first (postorder) so that any foreign key references
+  in this entity can be resolved properly.
+
+  This is mostly bookkeeping for the overall deserialization process - the actual load of any given entity is done by
+  [[metabase.models.serialization.base/load-one!]] and its various overridable parts, which see.
+
+  Circular dependencies are not allowed, and are detected and thrown as an error."
+  [{:keys [expanding ingestion seen] :as ctx} {:keys [id type] :as meta-map}]
+  (cond
+    (expanding id) (throw (ex-info (format "Circular dependency on %s %s" type id) {}))
+    (seen id)      ctx ; Already been done, just skip it.
+    :else (let [ingested (serdes.ingest/ingest-one ingestion meta-map)
+                model    (db/resolve-model (symbol type))
+                deps     (serdes.base/serdes-dependencies ingested)
+                ctx      (-> ctx
+                             (update :expanding conj id)
+                             (load-deps deps)
+                             (update :seen conj id)
+                             (update :expanding disj id))
+                pk       (serdes.base/load-one!
+                           ingested
+                           (or (get-in ctx [:local (name model) :by-entity-id id])
+                               (get-in ctx [:local (name model) :by-identity-hash id])))]
+            (assoc-in ctx [:local
+                           (name model)
+                           (if (serdes.base/entity-id? id) :by-entity-id :by-identity-hash)
+                           id]
+                      pk))))
+
+(defn load-metabase
+  "Loads in a database export from an ingestion source, which is any Ingestable instance."
+  [ingestion]
+  ;; We proceed in the arbitrary order of ingest-list, deserializing all the files. Their declared dependencies guide
+  ;; the import, and make sure all containers are imported before contents, etc.
+  (let [contents (serdes.ingest/ingest-list ingestion)]
+    (reduce load-one {:local     (load-prescan)
+                      :expanding #{}
+                      :seen      #{}
+                      :ingestion ingestion
+                      :from-ids  (m/index-by :id contents)}
+            contents)))
diff --git a/enterprise/backend/src/metabase_enterprise/serialization/v2/models.clj b/enterprise/backend/src/metabase_enterprise/serialization/v2/models.clj
new file mode 100644
index 00000000000..8b004ac0db8
--- /dev/null
+++ b/enterprise/backend/src/metabase_enterprise/serialization/v2/models.clj
@@ -0,0 +1,6 @@
+(ns metabase-enterprise.serialization.v2.models)
+
+(def exported-models
+  "The list of models which are exported by serialization. Used for production code and by tests."
+  ["Collection"
+   "Setting"])
diff --git a/enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj b/enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj
new file mode 100644
index 00000000000..90574451886
--- /dev/null
+++ b/enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj
@@ -0,0 +1,71 @@
+(ns metabase-enterprise.serialization.v2.extract-test
+  (:require [clojure.test :refer :all]
+            [metabase-enterprise.serialization.test-util :as ts]
+            [metabase-enterprise.serialization.v2.extract :as extract]
+            [metabase.models :refer [Collection User]]
+            [metabase.models.serialization.base :as serdes.base]))
+
+(defn- select-one [model-name where]
+  (first (into [] (serdes.base/raw-reducible-query model-name {:where where}))))
+
+(deftest fundamentals-test
+  (ts/with-empty-h2-app-db
+    (ts/with-temp-dpc [Collection [{coll-id    :id
+                                    coll-eid   :entity_id
+                                    coll-slug  :slug}      {:name "Some Collection"}]
+                       Collection [{child-id   :id
+                                    child-eid  :entity_id
+                                    child-slug :slug}      {:name "Nested Collection"
+                                                            :location (format "/%s/" coll-id)}]
+
+                       User       [{mark-id :id} {:first_name "Mark"
+                                                  :last_name  "Knopfler"
+                                                  :email      "mark@direstrai.ts"}]
+                       Collection [{pc-id   :id
+                                    pc-eid  :entity_id
+                                    pc-slug :slug}     {:name "Mark's Personal Collection"
+                                                        :personal_owner_id mark-id}]]
+
+      (testing "a top-level collection is extracted correctly"
+        (let [ser (serdes.base/extract-one "Collection" (select-one "Collection" [:= :id coll-id]))]
+          (is (= {:type "Collection" :id coll-eid :label coll-slug} (:serdes/meta ser)))
+          (is (not (contains? ser :location)))
+          (is (not (contains? ser :id)))
+          (is (nil? (:personal_owner_id ser)))
+          (is (contains? ser :parent_id))
+          (is (nil? (:parent_id ser)))))
+
+      (testing "a nested collection is extracted with the right parent_id"
+        (let [ser (serdes.base/extract-one "Collection" (select-one "Collection" [:= :id child-id]))]
+          (is (= {:type "Collection" :id child-eid :label child-slug} (:serdes/meta ser)))
+          (is (not (contains? ser :location)))
+          (is (not (contains? ser :id)))
+          (is (= coll-eid (:parent_id ser)))
+          (is (nil? (:personal_owner_id ser)))))
+
+      (testing "personal collections are extracted with email as key"
+        (let [ser (serdes.base/extract-one "Collection" (select-one "Collection" [:= :id pc-id]))]
+          (is (= {:type "Collection" :id pc-eid :label pc-slug} (:serdes/meta ser)))
+          (is (not (contains? ser :location)))
+          (is (not (contains? ser :id)))
+          (is (nil? (:parent_id ser)))
+          (is (= "mark@direstrai.ts" (:personal_owner_id ser)))))
+
+      (testing "overall extraction returns the expected set"
+        (letfn [(collections [extraction] (->> extraction
+                                               (into [])
+                                               (map :serdes/meta)
+                                               (filter #(= "Collection" (:type %)))
+                                               (map :id)
+                                               set))]
+          (testing "no user specified"
+            (is (= #{coll-eid child-eid}
+                   (collections (extract/extract-metabase nil)))))
+
+          (testing "valid user specified"
+            (is (= #{coll-eid child-eid pc-eid}
+                   (collections (extract/extract-metabase {:user mark-id})))))
+
+          (testing "invalid user specified"
+            (is (= #{coll-eid child-eid}
+                   (collections (extract/extract-metabase {:user 218921}))))))))))
diff --git a/enterprise/backend/test/metabase_enterprise/serialization/v2/load_test.clj b/enterprise/backend/test/metabase_enterprise/serialization/v2/load_test.clj
new file mode 100644
index 00000000000..864b8252b70
--- /dev/null
+++ b/enterprise/backend/test/metabase_enterprise/serialization/v2/load_test.clj
@@ -0,0 +1,133 @@
+(ns metabase-enterprise.serialization.v2.load-test
+  (:require [clojure.test :refer :all]
+            [metabase-enterprise.serialization.test-util :as ts]
+            [metabase-enterprise.serialization.v2.extract :as serdes.extract]
+            [metabase-enterprise.serialization.v2.ingest :as serdes.ingest]
+            [metabase-enterprise.serialization.v2.load :as serdes.load]
+            [metabase.models :refer [Collection]]
+            [metabase.models.serialization.hash :as serdes.hash]
+            [toucan.db :as db]))
+
+(defn- ingestion-in-memory [extractions]
+  (let [mapped (into {} (for [{{:keys [type id]} :serdes/meta :as m} (into [] extractions)]
+                          [[type id] m]))]
+    (reify
+      serdes.ingest/Ingestable
+      (ingest-list [_]
+        (eduction (map :serdes/meta) (vals mapped)))
+      (ingest-one [_ {:keys [type id]}]
+        (or (get mapped [type id])
+            (throw (ex-info (format "Unknown ingestion target: %s %s" type id)
+                            {:type type :id id :world mapped})))))))
+
+;;; WARNING for test authors: [[extract/extract-metabase]] returns a lazy reducible value. To make sure you don't
+;;; confound your tests with data from your dev appdb, remember to eagerly
+;;; `(into [] (extract/extract-metabase ...))` in these tests.
+
+(deftest load-basics-test
+  (testing "a simple, fresh collection is imported"
+    (let [serialized (atom nil)
+          eid1       "123456789abcdef_0123"]
+      (ts/with-source-and-dest-dbs
+        (testing "extraction succeeds"
+          (ts/with-source-db
+            (ts/create! Collection :name "Basic Collection" :entity_id eid1)
+            (reset! serialized (into [] (serdes.extract/extract-metabase {})))
+            (is (some (fn [{{:keys [type id]} :serdes/meta}]
+                        (and (= type "Collection") (= id eid1)))
+                      @serialized))))
+
+        (testing "loading into an empty database succeeds"
+          (ts/with-dest-db
+            (serdes.load/load-metabase (ingestion-in-memory @serialized))
+            (let [colls (db/select Collection)]
+              (is (= 1 (count colls)))
+              (is (= "Basic Collection" (:name (first colls))))
+              (is (= eid1               (:entity_id (first colls)))))))
+
+        (testing "loading again into the same database does not duplicate"
+          (ts/with-dest-db
+            (serdes.load/load-metabase (ingestion-in-memory @serialized))
+            (let [colls (db/select Collection)]
+              (is (= 1 (count colls)))
+              (is (= "Basic Collection" (:name (first colls))))
+              (is (= eid1               (:entity_id (first colls)))))))))))
+
+(deftest deserialization-nested-collections-test
+  (testing "with a three-level nesting of collections"
+    (let [serialized (atom nil)
+          parent     (atom nil)
+          child      (atom nil)
+          grandchild (atom nil)]
+      (ts/with-source-and-dest-dbs
+        (testing "serialization of the three collections"
+          (ts/with-source-db
+            (reset! parent     (ts/create! Collection :name "Parent Collection" :location "/"))
+            (reset! child      (ts/create! Collection
+                                           :name "Child Collection"
+                                           :location (format "/%d/" (:id @parent))))
+            (reset! grandchild (ts/create! Collection
+                                           :name "Grandchild Collection"
+                                           :location (format "/%d/%d/" (:id @parent) (:id @child))))
+            (reset! serialized (into [] (serdes.extract/extract-metabase {})))))
+
+        (testing "deserialization into a database that already has the parent, but with a different ID"
+          (ts/with-dest-db
+            (ts/create! Collection :name "Unrelated Collection")
+            (ts/create! Collection :name "Parent Collection" :location "/" :entity_id (:entity_id @parent))
+            (serdes.load/load-metabase (ingestion-in-memory @serialized))
+            (let [parent-dest     (db/select-one Collection :entity_id (:entity_id @parent))
+                  child-dest      (db/select-one Collection :entity_id (:entity_id @child))
+                  grandchild-dest (db/select-one Collection :entity_id (:entity_id @grandchild))]
+              (is (some? parent-dest))
+              (is (some? child-dest))
+              (is (some? grandchild-dest))
+              (is (not= (:id parent-dest) (:id @parent)) "should have different primary keys")
+              (is (= 4 (db/count Collection)))
+              (is (= "/"
+                     (:location parent-dest)))
+              (is (= (format "/%d/" (:id parent-dest))
+                     (:location child-dest)))
+              (is (= (format "/%d/%d/" (:id parent-dest) (:id child-dest))
+                     (:location grandchild-dest))))))))))
+
+(deftest deserialization-upsert-and-dupe-test
+  (testing "basic collections with their names changing, one without entity_id:"
+    (let [serialized (atom nil)
+          c1a        (atom nil)
+          c2a        (atom nil)
+          c1b        (atom nil)
+          c2b        (atom nil)]
+      (ts/with-source-and-dest-dbs
+        (testing "serializing the two collections"
+          (ts/with-source-db
+            (reset! c1b (ts/create! Collection :name "Renamed Collection 1"))
+            (reset! c2b (ts/create! Collection :name "Collection 2 version 2"))
+            (db/update! Collection (:id @c2b) {:entity_id nil})
+            (reset! c2b (db/select-one Collection :id (:id @c2b)))
+            (is (nil? (:entity_id @c2b)))
+            (reset! serialized (into [] (serdes.extract/extract-metabase {})))))
+
+        (testing "serialization should use identity hashes where no entity_id is defined"
+          (is (= #{(:entity_id @c1b)
+                   (serdes.hash/identity-hash @c2b)}
+                 (->> @serialized
+                      (map :serdes/meta)
+                      (filter #(= "Collection" (:type %)))
+                      (map :id)
+                      set))))
+
+        (testing "deserializing, the name change causes a duplicated collection"
+          (ts/with-dest-db
+            (reset! c1a (ts/create! Collection :name "Collection 1" :entity_id (:entity_id @c1b)))
+            (reset! c2a (ts/create! Collection :name "Collection 2 version 1"))
+            (db/update! Collection (:id @c2a) {:entity_id nil})
+            (reset! c2a (db/select-one Collection :id (:id @c2a)))
+            (is (nil? (:entity_id @c2b)))
+
+            (serdes.load/load-metabase (ingestion-in-memory @serialized))
+            (is (= 3 (db/count Collection)) "Collection 2 versions get duplicated, since the identity-hash changed")
+            (is (= #{"Renamed Collection 1"
+                     "Collection 2 version 1"
+                     "Collection 2 version 2"}
+                   (set (db/select-field :name Collection))))))))))
diff --git a/src/metabase/models/collection.clj b/src/metabase/models/collection.clj
index e37b71554fa..2cb7e779092 100644
--- a/src/metabase/models/collection.clj
+++ b/src/metabase/models/collection.clj
@@ -14,6 +14,7 @@
             [metabase.models.collection.root :as collection.root]
             [metabase.models.interface :as mi]
             [metabase.models.permissions :as perms :refer [Permissions]]
+            [metabase.models.serialization.base :as serdes.base]
             [metabase.models.serialization.hash :as serdes.hash]
             [metabase.public-settings.premium-features :as premium-features]
             [metabase.util :as u]
@@ -905,6 +906,58 @@
   serdes.hash/IdentityHashable
   {:identity-hash-fields (constantly [:name :namespace parent-identity-hash])})
 
+(defn- collection-query [maybe-user]
+  (serdes.base/raw-reducible-query
+    "Collection"
+    {:where [:and
+             [:= :archived false]
+             (if (nil? maybe-user)
+               [:is :personal_owner_id nil]
+               [:= :personal_owner_id maybe-user])]}))
+
+(defmethod serdes.base/extract-query "Collection" [_ {:keys [user]}]
+  (let [unowned (collection-query nil)]
+    (if user
+      (eduction cat [unowned (collection-query user)])
+      unowned)))
+
+(defmethod serdes.base/extract-one "Collection"
+  ;; Transform :location (which uses database IDs) into a portable :parent_id with the parent's entity ID.
+  ;; Also transform :personal_owner_id from a database ID to the email string, if it's defined.
+  ;; Use the :slug as the human-readable label.
+  [_ coll]
+  (let [parent       (some-> coll
+                             :id
+                             Collection
+                             (hydrate :parent_id)
+                             :parent_id
+                             Collection)
+        parent-id    (when parent
+                       (or (:entity_id parent) (serdes.hash/identity-hash parent)))
+        owner-email  (when (:personal_owner_id coll)
+                       (db/select-one-field :email 'User :id (:personal_owner_id coll)))]
+    (-> (serdes.base/extract-one-basics "Collection" coll)
+        (dissoc :location)
+        (assoc :parent_id parent-id :personal_owner_id owner-email)
+        (assoc-in [:serdes/meta :label] (:slug coll)))))
+
+(defmethod serdes.base/load-xform "Collection" [{:keys [parent_id personal_owner_id] :as contents}]
+  (let [loc        (if parent_id
+                     (let [{:keys [id location]} (serdes.base/lookup-by-id Collection parent_id)]
+                       (str location id "/"))
+                     "/")
+        user-id    (when personal_owner_id
+                     (db/select-one-field :id 'User :email personal_owner_id))]
+    (-> contents
+        serdes.base/load-xform-basics
+        (dissoc :parent_id)
+        (assoc :location loc :personal_owner_id user-id))))
+
+(defmethod serdes.base/serdes-dependencies "Collection"
+  [{:keys [parent_id]}]
+  (if parent_id
+    [parent_id]
+    []))
 
 ;;; +----------------------------------------------------------------------------------------------------------------+
 ;;; |                                           Perms Checking Helper Fns                                            |
diff --git a/src/metabase/models/interface.clj b/src/metabase/models/interface.clj
index f8d81c34f23..027e6060c2a 100644
--- a/src/metabase/models/interface.clj
+++ b/src/metabase/models/interface.clj
@@ -237,7 +237,9 @@
   :update add-updated-at-timestamp)
 
 (defn- add-entity-id [obj & _]
-  (assoc obj :entity_id (u/generate-nano-id)))
+  (if (contains? obj :entity_id)
+    obj
+    (assoc obj :entity_id (u/generate-nano-id))))
 
 (models/add-property! :entity_id
   :insert add-entity-id)
diff --git a/src/metabase/models/serialization/base.clj b/src/metabase/models/serialization/base.clj
new file mode 100644
index 00000000000..c2bd53143a1
--- /dev/null
+++ b/src/metabase/models/serialization/base.clj
@@ -0,0 +1,369 @@
+(ns metabase.models.serialization.base
+  "Defines several helper functions and multimethods for the serialization system.
+  Serialization is an enterprise feature, but in the interest of keeping all the code for an entity in one place, these
+  methods are defined here and implemented for all the exported models.
+
+  Whether to export a new model:
+  - Generally, the high-profile user facing things (databases, questions, dashboards, snippets, etc.) are exported.
+  - Internal or automatic things (users, activity logs, permissions) are not.
+
+  If the model is not exported, add it to the exclusion lists in the tests. Every model should be explicitly listed as
+  exported or not, and a test enforces this so serialization isn't forgotten for new models."
+  (:require [clojure.tools.logging :as log]
+            [metabase.models.serialization.hash :as serdes.hash]
+            [toucan.db :as db]
+            [toucan.models :as models]))
+
+;;; +----------------------------------------------------------------------------------------------------------------+
+;;; |                                          Serialization Process                                                 |
+;;; +----------------------------------------------------------------------------------------------------------------+
+;;; Serialization happens in two stages: extraction and storage. These are independent and deliberately decoupled.
+;;; The result of extraction is a reducible stream of Clojure maps with `:serdes/meta` keys on them (see below).
+;;; In particular, extraction does not care about file formats or other such things.
+;;;
+;;; Storage takes the stream from extraction and actually stores it or sends it. Traditionally we have serialized to a
+;;; directory tree full of YAML files, and that's the only storage approach implemented here. But since the process is
+;;; decoupled, we or a user could write their own storage layer, using JSON or protocol buffers or any other format.
+;;;
+;;; Both extraction and storage are written as a set of multimethods, with defaults for the common path.
+;;; Note that extraction is controlled by a map of options and settings, detailed below.
+;;;
+;;; Extraction:
+;;; - Top-level serialization code [[metabase-enterprise.serialization.v2.extract/extract-metabase]] has a list of
+;;;   models to be exported.
+;;;     - A test enforces that all models are either exported, or explicitly excluded, so new ones can't be forgotten.
+;;; - It calls `(extract-all "ModelName" opts)` for each model.
+;;;     - The default for this calls `(extract-query "ModelName" opts)`, getting back a reducible stream of entities.
+;;;     - For each entity in that stream, it calls `(extract-one "ModelName" entity)`, which converts the map from the
+;;;       database to a portable map with `:serdes/meta` on it. Eg. no database IDs as foreign keys.
+;;; - The default [[extract-all]] should work for most models (overrride [[extract-query]] and [[extract-one]] instead),
+;;;   but it can be overridden if needed.
+;;;
+;;; The end result of extraction is a reducible stream of Clojure maps; this is passed to storage directly, along with
+;;; the map of options.
+;;;
+;;; Options currently supported by extraction:
+;;; - `:user 6` giving the primary key for a user whose personal collections should be extracted.
+;;;
+;;; Storage:
+;;; The storage system might transform that stream in some arbitrary way. Storage is a dead end - it should perform side
+;;; effects like writing to the disk or network, and return nothing.
+
+(defmulti extract-all
+  "Entry point for extracting all entities of a particular model:
+  `(extract-all \"ModelName\" {opts...})`
+  Keyed on the model name.
+
+  Returns a reducible stream of extracted maps (ie. vanilla Clojure maps with `:serdes/meta` keys).
+
+  You probably don't want to implement this directly. The default implementation delegates to [[extract-query]] and
+  [[extract-one]], which are usually more convenient to override."
+  (fn [model _] model))
+
+(defmulti extract-query
+  "Performs the select query, possibly filtered, for all the entities of this type that should be serialized. Called
+  from [[extract-all]]'s default implementation.
+
+  `(extract-query \"ModelName\" opts)`
+
+  Keyed on the model name, the first argument.
+
+  Returns a reducible stream of maps with `:serdes/meta` keys on them. It should *not* be a stream of Toucan entities,
+  but vanilla Clojure maps.
+
+  In fact, Toucan's high-level niceties (eg. expanding JSON-encoded fields to Clojure data, decrypting, type
+  conversions, or hydrating some relationship by default) are counterproductive when our goal is a database-level
+  export. As a specific example, [[db/simple-select]] expands JSON but [[db/simple-insert!]] doesn't put it back.
+  There's also no `simple-update!`, making a fresh insert diverge from an update.
+
+  Defaults to using the helper `(raw-reducible-query model)` for the entire table, which is equivalent to
+  `(db/simple-select-reducible model)` but without running post-select handlers. This returns vanilla maps, not
+  [[db/IModel]] instances.
+
+  You may want to override this to eg. skip archived entities, or otherwise filter what gets serialized. Prefer using
+  the two-argument form of [[raw-reducible-query]]."
+  (fn [model _] model))
+
+(defmulti extract-one
+  "Extracts a single entity retrieved from the database into a portable map with `:serdes/meta` attached.
+
+  The default implementation uses the model name as the `:type` and either `:entity_id` or [[serdes.hash/identity-hash]]
+  as the `:id`. It also strips off the database's numeric primary key.
+
+  That suffices for a few simple entities, but most entities will need to override this.
+  They should follow the pattern of:
+  - Convert to a vanilla Clojure map, not a [[models/IModel]] instance.
+  - Drop the numeric database primary key
+  - Replace any foreign keys with portable values (eg. entity IDs or `identity-hash`es, owning user's ID with their
+    email, etc.)
+  - Consider attaching a human-friendly `:label` under `:serdes/meta`. (Eg. a Collection's `:slug`)
+
+  When overriding this, [[extract-one-basics]] is probably a useful starting point.
+
+  Keyed by the model name of the entity, the first argument."
+  (fn [model _] model))
+
+(defmethod extract-all :default [model opts]
+  (eduction (map (partial extract-one model))
+            (extract-query model opts)))
+
+(defn raw-reducible-query
+  "Helper for calling Toucan's raw [[db/reducible-query]]. With just the model name, fetches everything. You can filter
+  with a HoneySQL map like {:where [:= :archived true]}.
+
+  Returns a reducible stream of JDBC row maps."
+  ([model-name]
+   (raw-reducible-query model-name nil))
+  ([model-name honeysql-form]
+   (db/reducible-query (merge {:select [:*] :from [(symbol model-name)]}
+                              honeysql-form))))
+
+(defmethod extract-query :default [model-name _]
+  (raw-reducible-query model-name))
+
+(defn extract-one-basics
+  "A helper for writing [[extract-one]] implementations. It takes care of the basics:
+  - Convert to a vanilla Clojure map.
+  - Add `:serdes/meta`.
+  - Drop the primary key.
+
+  Returns the Clojure map."
+  [model-name entity]
+  (let [model (db/resolve-model (symbol model-name))
+        pk    (models/primary-key model)]
+    (-> entity
+        (assoc :serdes/meta {:type model-name
+                             :id   (or (:entity_id entity)
+                                       (serdes.hash/identity-hash (model (get entity pk))))})
+        (dissoc pk))))
+
+(defmethod extract-one :default [model-name entity]
+  (extract-one-basics model-name entity))
+
+;;; +----------------------------------------------------------------------------------------------------------------+
+;;; |                                         Deserialization Process                                                |
+;;; +----------------------------------------------------------------------------------------------------------------+
+;;; Deserialization is split into two stages, mirroring serialization. They are called ingestion and loading.
+;;; Ingestion turns whatever serialized form (eg. a tree of YAML files) was produced by storage into Clojure maps with
+;;; `:serdes/meta` maps. Loading imports those entities into the appdb, updating and inserting rows as needed.
+;;;
+;;; Ingestion:
+;;; Ingestion is intended to be a black box, like storage above. [[Ingestable]] is a protocol to allow easy [[reify]]
+;;; usage for testing in-memory deserialization.
+;;;
+;;; Factory functions consume some details (like a file path) and return an [[Ingestable]], with its two methods:
+;;; - `(ingest-list ingestable)` returns a reducible stream of `:serdes/meta` maps in any order.
+;;; - `(ingest-one ingestable meta-map)` ingests a single entity into memory, returning it as a map.
+;;;
+;;; This two-stage design avoids needing all the data in memory at once, where that's practical with the underlying
+;;; storage media (eg. files).
+;;;
+;;; Loading:
+;;; Loading tries to find corresponding entities in the destination appdb by `entity_id` or `identity-hash`, and update
+;;; those rows rather than duplicating.
+;;; The entry point is [[metabase-enterprise.serialization.v2.load/load-metabase]]. The top-level process works like
+;;; this:
+;;; - `(load-prescan-all "ModelName")` is called, which selects the entire collection as a reducible stream and calls
+;;;   [[load-prescan-one]] on each entry.
+;;;     - The default for that usually is the right thing.
+;;; - `(load-prescan-one entity)` turns a particular entity into an `[entity_id identity-hash primary-key]` triple.
+;;;     - The default will work for models with a literal `entity_id` field; those with alternative IDs (database,
+;;;       table, field, setting, etc.) should override this method.
+;;; - Prescanning complete, `(ingest-list ingestable)` gets the metadata for every exported entity in arbitrary order.
+;;;     - `(ingest-one meta-map opts)` is called on each first to ingest the value into memory, then
+;;;     - `(serdes-dependencies ingested)` to get a list of other IDs (entity IDs or identity hashes).
+;;;         - The default is an empty list.
+;;;     - The idea of dependencies is eg. a database must be loaded before its tables, a table before its fields, a
+;;;       collection's ancestors before the collection itself.
+;;;     - Dependencies are loaded recursively in postorder; circular dependencies cause the process to throw.
+;;; - Having found an entity it can really load, the core code will check its table of IDs found by prescanning.
+;;;     - Then it calls `(load-one! ingested maybe-local-entity)`, passing the `ingested` value and either `nil` or the
+;;;       Toucan entity corresponding to the incoming map.
+;;;     - `load-one!` is a side-effecting black box to the rest of the deserialization process.
+;;;       It returns the primary key of the new or existing entity, which is necessary to resolve foreign keys between
+;;;       imported entities.
+;;;     - The table of "local" entities found by the prescan is updated to include newly loaded ones.
+;;;
+;;;
+;;; `load-one!` has a default implementation that works for most models:
+;;; - Call `(load-xform ingested)` to massage the map as needed.
+;;;     - This is the spot to override, for example to convert a foreign key from portable entity ID into a database ID.
+;;; - Then, call either:
+;;;     - `(load-update! ingested local-entity)` if the local entity exists, or
+;;;     - `(load-insert! ingested)` if the entity is new.
+;;;   Both of these have the obvious defaults of [[jdbc/update!]] or [[jdbc/insert!]].
+
+;;; +----------------------------------------------------------------------------------------------------------------+
+;;; |                                            :serdes/meta maps                                                   |
+;;; +----------------------------------------------------------------------------------------------------------------+
+;;; The Clojure maps from extraction and ingestion always include a special key `:serdes/meta` giving some information
+;;; about the serialized entity. The value is always a map like:
+;;; `{:type "ModelName" :id "entity ID or identity hash string" :label "Human-readable name"}`
+;;; `:type` and `:id` are required; `:label` is optional.
+;;;
+;;; Many of the multimethods are keyed on the `:type` field.
+
+(defmulti load-prescan-all
+  "Returns a reducible stream of `[entity_id identity-hash primary-key]` triples for the entire table.
+
+  Defaults to running [[load-prescan-one]] over each entity returned by [[jdbc/reducible-query]] for this model.
+  Override this method if filtering is needed.
+
+  Keyed on the model name."
+  identity)
+
+(defmulti load-prescan-one
+  "Converts a database entity into a `[entity_id identity-hash primary-key]` triple for the deserialization machinery.
+  Called with the Toucan model (*not* this entity), and the JDBC map for the entity in question.
+
+  Defaults to using a literal `:entity_id` column. For models with a different entity ID (eg. a Table's name, a
+  Setting's key), override this method.
+
+  Keyed on the model name."
+  (fn [model _] (name model)))
+
+(defmethod load-prescan-all :default [model-name]
+  (let [model (db/resolve-model (symbol model-name))]
+    (eduction (map (partial load-prescan-one model))
+              (raw-reducible-query model-name))))
+
+(defmethod load-prescan-one :default [model entity]
+  (let [pk  (models/primary-key model)
+        key (get entity pk)]
+    [(:entity_id entity)
+     (serdes.hash/identity-hash (db/select-one model pk key)) ; TODO This sucks for identity-hash!
+     key]))
+
+(defn- ingested-model
+  "The dispatch function for several of the load multimethods: dispatching on the type of the incoming entity."
+  [ingested]
+  (-> ingested :serdes/meta :type))
+
+(defmulti serdes-dependencies
+  "Given an entity map as ingested (not a Toucan entity) returns a (possibly empty) list of its dependencies, where each
+  dependency is represented by either the entity ID or identity hash of the target entity.
+
+  Keyed on the model name.
+  Default implementation returns an empty vector, so only models that have dependencies need to implement this."
+  ingested-model)
+
+(defmethod serdes-dependencies :default [_]
+  [])
+
+(defmulti load-xform
+  "Given the incoming vanilla map as ingested, transform it so it's suitable for sending to the database (in eg.
+  [[db/simple-insert!]]).
+  For example, this should convert any foreign keys back from a portable entity ID or identity hash into a numeric
+  database ID. This is the mirror of [[extract-one]], in spirit. (They're not strictly inverses - [[extract-one]] drops
+  the primary key but this need not put one back, for example.)
+
+  By default, this just calls [[load-xform-basics]].
+  If you override this, call [[load-xform-basics]] as well."
+  ingested-model)
+
+(defn load-xform-basics
+  "Performs the usual steps for an incoming entity:
+  - Drop :serdes/meta
+
+  You should call this as a first step from any implementation of [[load-xform]].
+
+  This is a mirror (but not precise inverse) of [[extract-one-basics]]."
+  [ingested]
+  (dissoc ingested :serdes/meta))
+
+(defmethod load-xform :default [ingested]
+  (load-xform-basics ingested))
+
+(defmulti load-update!
+  "Called by the default [[load-one!]] if there is a corresponding entity already in the appdb.
+  The first argument is the model name, the second the incoming map we're deserializing, and the third is the Toucan
+  entity found in the appdb.
+
+  Defaults to a straightforward [[db/update!]], and you may not need to update it.
+
+  Keyed on the model name (the first argument), because the second argument doesn't have its `:serdes/meta` anymore.
+
+  Returns the primary key of the updated entity."
+  (fn [model _ _] model))
+
+(defmethod load-update! :default [model-name ingested local]
+  (let [model (db/resolve-model (symbol model-name))
+        pk    (models/primary-key model)
+        id    (get local pk)
+        ; Get a WHERE clause, but then strip off the WHERE part to include it in the JDBC call below.
+        ;where (update (db/honeysql->sql {:where [:= pk id]}) 0
+        ;              #(.substring 5))
+        ]
+    (log/tracef "Upserting %s %d: old %s new %s" model-name id (pr-str local) (pr-str ingested))
+    ; Using the two-argument form of [[db/update!]] that takes the model and a HoneySQL form for the actual update.
+    ; It works differently from the more typical `(db/update! 'Model id updates...)` form: this form doesn't run any of
+    ; the pre-update magic, it just updates the database directly.
+    (db/update! (symbol model-name) {:where [:= pk id] :set ingested})
+    pk))
+
+(defmulti load-insert!
+  "Called by the default [[load-one!]] if there is no corresponding entity already in the appdb.
+
+  Defaults to a straightforward [[db/simple-insert!]], and you probably don't need to implement this.
+  Note that [[db/insert!]] should be avoided - we don't want to populate the `:entity_id` field if it wasn't already
+  set!
+
+  Keyed on the model name (the first argument), because the second argument doesn't have its `:serdes/meta` anymore.
+
+  Returns the primary key of the newly inserted entity."
+  (fn [model _] model))
+
+(defmethod load-insert! :default [model ingested]
+  (log/tracef "Inserting %s: %s" model (pr-str ingested))
+  ; Toucan's simple-insert! actually does the right thing for our purposes: it doesn't call pre-insert or post-insert.
+  (db/simple-insert! (symbol model) ingested))
+
+(defmulti load-one!
+  "Black box for integrating a deserialized entity into this appdb.
+  `(load-one! ingested maybe-local)`
+
+  `ingested` is the vanilla map from ingestion, with the `:serdes/meta` key on it.
+  `maybe-local` is either `nil`, or the corresponding Toucan entity from the appdb.
+
+  Defaults to calling [[load-xform]] to massage the incoming map, then either [[load-update!]] if `maybe-local`
+  exists, or [[load-insert!]] if it's `nil`.
+
+  Prefer overriding [[load-xform]], and if necessary [[load-update!]] and [[load-insert!]], rather than this.
+
+  Keyed on the model name.
+
+  Returns the primary key of the updated or inserted entity."
+  (fn [ingested _]
+    (ingested-model ingested)))
+
+(defmethod load-one! :default [ingested maybe-local-id]
+  (let [model    (ingested-model ingested)
+        pkey     (models/primary-key (db/resolve-model (symbol model)))
+        adjusted (load-xform ingested)]
+    (if (nil? maybe-local-id)
+      (load-insert! model adjusted)
+      (load-update! model adjusted (db/select-one (symbol model) pkey maybe-local-id)))))
+
+(defn entity-id?
+  "Checks if the given string is a 21-character NanoID. Useful for telling entity IDs apart from identity hashes."
+  [id-str]
+  (boolean (re-matches #"^[A-Za-z0-9_-]{21}$" id-str)))
+
+(defn- find-by-identity-hash
+  "Given a model and a target identity hash, this scans the appdb for any instance of the model corresponding to the
+  hash. Does a complete scan, so this should be called sparingly!"
+  ;; TODO This should be able to use a cache of identity-hash values from the start of the deserialization process.
+  [model id-hash]
+  (->> (db/select-reducible model)
+       (into [] (comp (filter #(= id-hash (serdes.hash/identity-hash %)))
+                      (take 1)))
+       first))
+
+(defn lookup-by-id
+  "Given an ID string, this endeavours to find the matching entity, whether it's an entity ID or identity hash.
+  This is useful when writing [[load-xform]] to turn a foreign key from a portable form to an appdb ID.
+  Returns a Toucan entity or nil."
+  [model id-str]
+  (if (entity-id? id-str)
+    (db/select-one model :entity_id id-str)
+    (find-by-identity-hash model id-str)))
diff --git a/src/metabase/models/setting.clj b/src/metabase/models/setting.clj
index b3002a88967..0a3837ec4cd 100644
--- a/src/metabase/models/setting.clj
+++ b/src/metabase/models/setting.clj
@@ -81,6 +81,7 @@
             [environ.core :as env]
             [medley.core :as m]
             [metabase.api.common :as api]
+            [metabase.models.serialization.base :as serdes.base]
             [metabase.models.serialization.hash :as serdes.hash]
             [metabase.models.setting.cache :as setting.cache]
             [metabase.plugins.classloader :as classloader]
@@ -132,6 +133,8 @@
   Primarily used in test to disable retired setting check."
   false)
 
+(declare admin-writable-site-wide-settings get-value-of-type set-value-of-type!)
+
 (models/defmodel Setting
   "The model that underlies [[defsetting]]."
   :setting)
@@ -145,7 +148,15 @@
   serdes.hash/IdentityHashable
   {:identity-hash-fields (constantly [:key])})
 
-(declare get-value-of-type)
+(defmethod serdes.base/extract-all "Setting" [_model _opts]
+  (for [{:keys [key value]} (admin-writable-site-wide-settings
+                              :getter (partial get-value-of-type :string))]
+    {:serdes/meta {:type "Setting" :id (name key)}
+     :key key
+     :value value}))
+
+(defmethod serdes.base/load-one! "Setting" [{:keys [key value]} _]
+  (set-value-of-type! :string key value))
 
 (def ^:private Type
   (s/pred (fn [a-type]
diff --git a/test/metabase/api/dashboard_test.clj b/test/metabase/api/dashboard_test.clj
index d7c1ab7b0ef..515910f621a 100644
--- a/test/metabase/api/dashboard_test.clj
+++ b/test/metabase/api/dashboard_test.clj
@@ -1410,9 +1410,9 @@
 
     (testing "Should work if Dashboard has multiple mappings for a single param"
       (with-chain-filter-fixtures [{:keys [dashboard card dashcard param-keys]}]
-        (mt/with-temp* [Card          [card-2 (dissoc card :id)]
+        (mt/with-temp* [Card          [card-2 (dissoc card :id :entity_id)]
                         DashboardCard [dashcard-2 (-> dashcard
-                                                      (dissoc :id :card_id)
+                                                      (dissoc :id :card_id :entity_id)
                                                       (assoc  :card_id (:id card-2)))]]
           (is (= ["African" "American" "Artisan"]
                  (take 3 (mt/user-http-request :rasta :get 200 (chain-filter-values-url
diff --git a/test/metabase/test/util.clj b/test/metabase/test/util.clj
index d8c743ea6da..d158334fdea 100644
--- a/test/metabase/test/util.clj
+++ b/test/metabase/test/util.clj
@@ -733,7 +733,7 @@
       (let [card-count-before (db/count Card)
             card-name         (random-name)]
         (with-model-cleanup [Card]
-          (db/insert! Card (-> other-card (dissoc :id) (assoc :name card-name)))
+          (db/insert! Card (-> other-card (dissoc :id :entity_id) (assoc :name card-name)))
           (testing "Card count should have increased by one"
             (is (= (inc card-count-before)
                    (db/count Card))))
-- 
GitLab