From c952c87c720b833ba88723edec92681bcbc5a74c Mon Sep 17 00:00:00 2001
From: Braden Shepherdson <Braden.Shepherdson@gmail.com>
Date: Fri, 8 Jul 2022 15:01:55 -0400
Subject: [PATCH] Add serialization hierarchy, serialize Database, Table and
 Field (#23622)

Serialization of Databases, Tables, Fields

This brought a few core changes:
- Add `serdes-entity-id` to abstract the field used for the ID
- Pass the options to `extract-one` so it can eg. do encryption things.
- Handle dates in YAML storage and ingestion
- `:serdes/meta` now holds the entire hierarchy, not just the leaf model+ID pair.

There's an open problem here about the right way to handle secrets like
a database's password. Do we assume both sides have the same
`MB_ENCRYPTION_SECRET_KEY`? Provide a serdes-specific password the user
just made up, and every secret gets decrypted with the source key, encrypted with
the serdes key, stored, decrypted with the serdes key, and encrypted with
the destination key?
---
 .clj-kondo/config.edn                         |   2 +-
 .../serialization/v2/ingest.clj               |  14 +-
 .../serialization/v2/ingest/yaml.clj          |  72 ++++--
 .../serialization/v2/load.clj                 |  77 ++----
 .../serialization/v2/models.clj               |   5 +-
 .../serialization/v2/storage/yaml.clj         |  31 ++-
 .../serialization/v2/utils/yaml.clj           |  46 ++++
 .../serialization/cmd_test.clj                |   6 +-
 .../serialization/test_util.clj               |  12 +-
 .../serialization/v2/extract_test.clj         |  14 +-
 .../serialization/v2/load_test.clj            |  83 +++++--
 .../serialization/v2/yaml_test.clj            | 119 +++++++--
 src/metabase/models/collection.clj            |  10 +-
 src/metabase/models/database.clj              |  27 +++
 src/metabase/models/field.clj                 |  51 ++++
 src/metabase/models/serialization/base.clj    | 228 ++++++++++++------
 src/metabase/models/setting.clj               |   5 +-
 src/metabase/models/table.clj                 |  35 ++-
 test/metabase/test/generate.clj               |  20 +-
 19 files changed, 613 insertions(+), 244 deletions(-)
 create mode 100644 enterprise/backend/src/metabase_enterprise/serialization/v2/utils/yaml.clj

diff --git a/.clj-kondo/config.edn b/.clj-kondo/config.edn
index e3152a075e6..a32df3b6285 100644
--- a/.clj-kondo/config.edn
+++ b/.clj-kondo/config.edn
@@ -366,7 +366,7 @@
            metabase.test/with-user-in-groups clojure.core/let
            metabase.test.data.interface/defdataset clojure.core/def
            metabase.test.data.interface/defdataset-edn clojure.core/def
-           metabase-enterprise.serialization.test-util/with-random-dump-dir clojure.core/fn
+           metabase-enterprise.serialization.test-util/with-random-dump-dir clojure.core/let
            metabase.driver.mongo.util/with-mongo-connection clojure.core/let
            metabase.driver.mongo.query-processor/mongo-let clojure.core/let
            toucan.db/with-call-counting clojure.core/fn
diff --git a/enterprise/backend/src/metabase_enterprise/serialization/v2/ingest.clj b/enterprise/backend/src/metabase_enterprise/serialization/v2/ingest.clj
index d3dd8f33425..ecd52ea129d 100644
--- a/enterprise/backend/src/metabase_enterprise/serialization/v2/ingest.clj
+++ b/enterprise/backend/src/metabase_enterprise/serialization/v2/ingest.clj
@@ -7,14 +7,16 @@
 
 (p/defprotocol+ Ingestable
   ;; Represents a data source for deserializing previously-exported appdb content into this Metabase instance.
-  ;; This is written as a protocol since overriding it with [[reify]] if useful for testing.
+  ;; This is written as a protocol since overriding it with [[reify]] is useful for testing.
   (ingest-list
     [this]
-    "Return a reducible stream of meta-maps, one for each entity in the dump.
-    See the description of the `:serdes/meta` maps in [[metabase.models.serialization.base]].
+    "Return a reducible stream of `:serdes/meta`-style abstract paths, one for each entity in the dump.
+    See the description of these abstract paths in [[metabase.models.serialization.base]].
+    Each path is ordered from the root to the leaf.
 
-    The order is not specified and should not be relied upon!")
+    The order of the whole list is not specified and should not be relied upon!")
 
   (ingest-one
-    [this meta-map]
-    "Given one of the meta-maps returned by [[ingest-list]], read in and return the entire corresponding entity."))
+    [this path]
+    "Given one of the `:serdes/meta` abstract paths returned by [[ingest-list]], read in and return the entire
+    corresponding entity."))
diff --git a/enterprise/backend/src/metabase_enterprise/serialization/v2/ingest/yaml.clj b/enterprise/backend/src/metabase_enterprise/serialization/v2/ingest/yaml.clj
index bf524725873..3362afc4a08 100644
--- a/enterprise/backend/src/metabase_enterprise/serialization/v2/ingest/yaml.clj
+++ b/enterprise/backend/src/metabase_enterprise/serialization/v2/ingest/yaml.clj
@@ -1,41 +1,65 @@
 (ns metabase-enterprise.serialization.v2.ingest.yaml
+  "Note that throughout the YAML file handling, the `:serdes/meta` abstract path is referred to as the \"hierarchy\",
+  to avoid confusion with filesystem paths."
   (:require [clojure.java.io :as io]
             [metabase-enterprise.serialization.v2.ingest :as ingest]
-            [yaml.core :as yaml])
-  (:import java.io.File))
+            [metabase-enterprise.serialization.v2.utils.yaml :as u.yaml]
+            [metabase.util.date-2 :as u.date]
+            [yaml.core :as yaml]
+            [yaml.reader :as y.reader])
+  (:import java.io.File
+           java.time.temporal.Temporal))
 
-(defmulti ^:private build-metas
-  (fn [^File file] (.getName file)))
+(extend-type Temporal y.reader/YAMLReader
+  (decode [data]
+    (u.date/parse data)))
 
-(defmethod build-metas "settings.yaml" [file]
+(defn- build-settings [file]
   (let [settings (yaml/from-file file)]
     (for [[k _] settings]
-      {:model "Setting" :id (name k)})))
-
-(defmethod build-metas :default [^File file]
-  (let [model-name   (-> file .getParentFile .getName)
-        [_ id label] (re-matches #"^([A-Za-z0-9_-]+)(?:\+(.*))?.yaml$" (.getName file))]
-    [(cond-> {:model model-name :id id}
-       label (assoc :label label))]))
-
-(defn- ingest-entity [root-dir {:keys [model id label] :as meta-map}]
-  (let [filename (if label
-                   (str id "+" label ".yaml")
-                   (str id ".yaml"))]
-    (-> (io/file root-dir model filename)
+      ; We return a path of 1 item, the setting itself.
+      [{:model "Setting" :id (name k)}])))
+
+
+(defn- build-metas [^File root-dir ^File file]
+  (let [path-parts (u.yaml/path-split root-dir file)]
+    (if (= ["settings.yaml"] path-parts)
+      (build-settings file)
+      [(u.yaml/path->hierarchy path-parts)])))
+
+(defn- read-timestamps [entity]
+  (->> (keys entity)
+       (filter #(.endsWith (name %) "_at"))
+       (reduce #(update %1 %2 u.date/parse) entity)))
+
+(defn- ingest-entity
+  "Given a hierarchy, read in the YAML file it identifies. Clean it up (eg. parsing timestamps) and attach the
+  hierarchy as `:serdes/meta`.
+  The returned entity is in \"extracted\" form, ready to be passed to the `load` step.
+
+  The labels are removed from the hierarchy attached at `:serdes/meta`, since the storage system might have damaged the
+  original labels by eg. truncating them to keep the file names from getting too long. The labels aren't used at all on
+  the loading side, so it's fine to drop them."
+  [root-dir hierarchy]
+  (let [unlabeled (mapv #(dissoc % :label) hierarchy)]
+    (-> (u.yaml/hierarchy->file root-dir hierarchy) ; Use the original hierarchy for the filesystem.
         yaml/from-file
-        (assoc :serdes/meta meta-map))))
+        read-timestamps
+        (assoc :serdes/meta unlabeled)))) ; But return the hierarchy without labels.
 
 (deftype YamlIngestion [^File root-dir settings]
   ingest/Ingestable
   (ingest-list [_]
     (eduction (comp (filter (fn [^File f] (.isFile f)))
-                    (mapcat build-metas))
+                    (mapcat (partial build-metas root-dir)))
               (file-seq root-dir)))
-  (ingest-one [_ {:keys [model id] :as meta-map}]
-    (if (= "Setting" model)
-      {:serdes/meta meta-map :key (keyword id) :value (get settings (keyword id))}
-      (ingest-entity  root-dir meta-map))))
+
+  (ingest-one [_ abs-path]
+    (let [{:keys [model id]} (first abs-path)]
+      (if (and (= (count abs-path) 1)
+               (= model "Setting"))
+        {:serdes/meta abs-path :key (keyword id) :value (get settings (keyword id))}
+        (ingest-entity root-dir abs-path)))))
 
 (defn ingest-yaml
   "Creates a new Ingestable on a directory of YAML files, as created by
diff --git a/enterprise/backend/src/metabase_enterprise/serialization/v2/load.clj b/enterprise/backend/src/metabase_enterprise/serialization/v2/load.clj
index 39b5e98e719..5089ae86b35 100644
--- a/enterprise/backend/src/metabase_enterprise/serialization/v2/load.clj
+++ b/enterprise/backend/src/metabase_enterprise/serialization/v2/load.clj
@@ -3,80 +3,44 @@
   See the detailed breakdown of the (de)serialization processes in [[metabase.models.serialization.base]]."
   (:require [medley.core :as m]
             [metabase-enterprise.serialization.v2.ingest :as serdes.ingest]
-            [metabase-enterprise.serialization.v2.models :as serdes.models]
-            [metabase.models.serialization.base :as serdes.base]
-            [toucan.db :as db]))
-
-(defn- load-prescan-model [model]
-  (transduce (map (fn [[eid ih pk]]
-                    {:by-entity-id     [eid pk]
-                     :by-identity-hash [ih pk]}))
-             (partial merge-with conj)
-             {:by-entity-id {} :by-identity-hash {}}
-             (serdes.base/load-prescan-all model)))
-
-(defn- load-prescan
-  "For all the exported models in the list, run the prescan process."
-  []
-  (into {} (for [model serdes.models/exported-models]
-             [model (load-prescan-model model)])))
-
-;; These are on ice for now; they'll be dusted off as the YAML storage/ingestion code is added in a later PR.
-;; (defn- path-parts [path]
-;;   (->> (java.nio.file.Paths/get path (into-array String []))
-;;        (.iterator)
-;;        (iterator-seq)
-;;        (map str)))
-;;
-;; (defn- id-from-path [path]
-;;   (let [^String file (last (path-parts path))
-;;         base         (.substring file 0 (.lastIndexOf file "."))
-;;         ; Things with human-readable names use the form identity_hash+human_name.yaml
-;;         plus         (.indexOf base "+")]
-;;     (if (< plus 0)
-;;       base
-;;       (.substring base 0 plus))))
+            [metabase.models.serialization.base :as serdes.base]))
 
 (declare load-one)
 
 (defn- load-deps
-  "Given a list of `deps` (raw IDs), convert it to a list of meta-maps and `load-one` them all."
+  "Given a list of `deps` (hierarchies), `load-one` them all."
   [ctx deps]
   (if (empty? deps)
     ctx
-    (reduce load-one ctx (map (:from-ids ctx) deps))))
+    (reduce load-one ctx deps)))
 
 (defn- load-one
-  "Loads a single meta-map into the appdb, doing the necessary bookkeeping.
+  "Loads a single entity, specified by its `:serdes/meta` abstract path, into the appdb, doing some bookkeeping to avoid
+  cycles.
 
-  If the incoming entity has any dependencies, they are processed first (postorder) so that any foreign key references
-  in this entity can be resolved properly.
+  If the incoming entity has any dependencies, they are recursively processed first (postorder) so that any foreign key
+  references in this entity can be resolved properly.
 
   This is mostly bookkeeping for the overall deserialization process - the actual load of any given entity is done by
   [[metabase.models.serialization.base/load-one!]] and its various overridable parts, which see.
 
   Circular dependencies are not allowed, and are detected and thrown as an error."
-  [{:keys [expanding ingestion seen] :as ctx} {id :id model-name :model :as meta-map}]
+  [{:keys [expanding ingestion seen] :as ctx} path]
   (cond
-    (expanding id) (throw (ex-info (format "Circular dependency on %s %s" model-name id) {}))
-    (seen id)      ctx ; Already been done, just skip it.
-    :else (let [ingested (serdes.ingest/ingest-one ingestion meta-map)
-                model    (db/resolve-model (symbol model-name))
+    (expanding path) (throw (ex-info (format "Circular dependency on %s" (pr-str path)) {:path path}))
+    (seen path) ctx ; Already been done, just skip it.
+    :else (let [ingested (serdes.ingest/ingest-one ingestion path)
                 deps     (serdes.base/serdes-dependencies ingested)
                 ctx      (-> ctx
-                             (update :expanding conj id)
+                             (update :expanding conj path)
                              (load-deps deps)
-                             (update :seen conj id)
-                             (update :expanding disj id))
-                pk       (serdes.base/load-one!
-                           ingested
-                           (or (get-in ctx [:local (name model) :by-entity-id id])
-                               (get-in ctx [:local (name model) :by-identity-hash id])))]
-            (assoc-in ctx [:local
-                           (name model)
-                           (if (serdes.base/entity-id? id) :by-entity-id :by-identity-hash)
-                           id]
-                      pk))))
+                             (update :seen conj path)
+                             (update :expanding disj path))
+                ;; Use the abstract path as attached by the ingestion process, not the original one we were passed.
+                rebuilt-path    (serdes.base/serdes-path ingested)
+                local-pk-or-nil (serdes.base/load-find-local rebuilt-path)
+                _               (serdes.base/load-one! ingested local-pk-or-nil)]
+            ctx)))
 
 (defn load-metabase
   "Loads in a database export from an ingestion source, which is any Ingestable instance."
@@ -84,8 +48,7 @@
   ;; We proceed in the arbitrary order of ingest-list, deserializing all the files. Their declared dependencies guide
   ;; the import, and make sure all containers are imported before contents, etc.
   (let [contents (serdes.ingest/ingest-list ingestion)]
-    (reduce load-one {:local     (load-prescan)
-                      :expanding #{}
+    (reduce load-one {:expanding #{}
                       :seen      #{}
                       :ingestion ingestion
                       :from-ids  (m/index-by :id contents)}
diff --git a/enterprise/backend/src/metabase_enterprise/serialization/v2/models.clj b/enterprise/backend/src/metabase_enterprise/serialization/v2/models.clj
index 8b004ac0db8..11ebe6e4808 100644
--- a/enterprise/backend/src/metabase_enterprise/serialization/v2/models.clj
+++ b/enterprise/backend/src/metabase_enterprise/serialization/v2/models.clj
@@ -3,4 +3,7 @@
 (def exported-models
   "The list of models which are exported by serialization. Used for production code and by tests."
   ["Collection"
-   "Setting"])
+   "Database"
+   "Field"
+   "Setting"
+   "Table"])
diff --git a/enterprise/backend/src/metabase_enterprise/serialization/v2/storage/yaml.clj b/enterprise/backend/src/metabase_enterprise/serialization/v2/storage/yaml.clj
index 733aca79dd4..1b1826ed27a 100644
--- a/enterprise/backend/src/metabase_enterprise/serialization/v2/storage/yaml.clj
+++ b/enterprise/backend/src/metabase_enterprise/serialization/v2/storage/yaml.clj
@@ -1,26 +1,31 @@
 (ns metabase-enterprise.serialization.v2.storage.yaml
   (:require [clojure.java.io :as io]
             [metabase-enterprise.serialization.v2.storage :as storage]
-            [yaml.core :as yaml]))
+            [metabase-enterprise.serialization.v2.utils.yaml :as u.yaml]
+            [metabase.models.serialization.base :as serdes.base]
+            [metabase.util.date-2 :as u.date]
+            [yaml.core :as yaml]
+            [yaml.writer :as y.writer])
+  (:import java.time.temporal.Temporal))
+
+(extend-type Temporal y.writer/YAMLWriter
+  (encode [data]
+    (u.date/format data)))
 
 (defn- spit-yaml
-  [path obj]
-  (apply io/make-parents path)
-  (spit (apply io/file path) (yaml/generate-string obj :dumper-options {:flow-style :block})))
+  [file obj]
+  (io/make-parents file)
+  (spit (io/file file) (yaml/generate-string obj :dumper-options {:flow-style :block})))
 
-(defn- store-entity! [{:keys [root-dir]} {{:keys [id model label]} :serdes/meta :as entity}]
-  (let [basename (if (nil? label)
-                   (str id ".yaml")
-                   ;; + is a legal, unescaped character on all common filesystems, but not `identity-hash` or NanoID!
-                   (str id "+" label ".yaml"))
-        path [root-dir model basename]]
-    (spit-yaml path (dissoc entity :serdes/meta))))
+(defn- store-entity! [{:keys [root-dir]} entity]
+  (spit-yaml (u.yaml/hierarchy->file root-dir (serdes.base/serdes-path entity))
+             (dissoc entity :serdes/meta)))
 
 (defn- store-settings! [{:keys [root-dir]} settings]
   (let [as-map (into (sorted-map)
                      (for [{:keys [key value]} settings]
                        [key value]))]
-    (spit-yaml [root-dir "settings.yaml"] as-map)))
+    (spit-yaml (io/file root-dir "settings.yaml") as-map)))
 
 (defmethod storage/store-all! :yaml [stream opts]
   (when-not (or (string? (:root-dir opts))
@@ -29,7 +34,7 @@
                     {:opts opts})))
   (let [settings (atom [])]
     (doseq [entity stream]
-      (if (-> entity :serdes/meta :model (= "Setting"))
+      (if (-> entity :serdes/meta last :model (= "Setting"))
         (swap! settings conj entity)
         (store-entity! opts entity)))
     (store-settings! opts @settings)))
diff --git a/enterprise/backend/src/metabase_enterprise/serialization/v2/utils/yaml.clj b/enterprise/backend/src/metabase_enterprise/serialization/v2/utils/yaml.clj
new file mode 100644
index 00000000000..d0d80c8078e
--- /dev/null
+++ b/enterprise/backend/src/metabase_enterprise/serialization/v2/utils/yaml.clj
@@ -0,0 +1,46 @@
+(ns metabase-enterprise.serialization.v2.utils.yaml
+  (:require [clojure.java.io :as io])
+  (:import java.io.File
+           java.nio.file.Path))
+
+(def ^:private max-label-length 100)
+
+(defn- truncate-label [s]
+  (if (> (count s) max-label-length)
+      (subs s 0 max-label-length)
+      s))
+
+(defn hierarchy->file
+  "Given a :serdes/meta abstract path, return a [[File]] corresponding to it."
+  ^File [root-dir hierarchy]
+  (let [;; All earlier parts of the hierarchy form Model/id/ pairs.
+        prefix                   (apply concat (for [{:keys [model id]} (drop-last hierarchy)]
+                                                 [model id]))
+        ;; The last part of the hierarchy is used for the basename; this is the only part with the label.
+        {:keys [id model label]} (last hierarchy)
+        basename                 (if (nil? label)
+                                   (str id)
+                                   ;; + is a legal, unescaped character on all common filesystems,
+                                   ;; but doesn't appear in `identity-hash` or NanoID!
+                                   (str id "+" (truncate-label label)))]
+    (apply io/file root-dir (concat prefix [model (str basename ".yaml")]))))
+
+(defn path-split
+  "Given a root directory and a file underneath it, return a sequence of path parts to get there.
+  Given a root of /foo and file /foo/bar/baz/this.file, returns `[\"bar\" \"baz\" \"this.file\"]`."
+  [^File root-dir ^File file]
+  (let [relative (.relativize (.toPath root-dir) (.toPath file))]
+    (for [^Path path (iterator-seq (.iterator relative))]
+      (.getName (.toFile path)))))
+
+(defn path->hierarchy
+  "Given the list of file path chunks as returned by [[path-split]], reconstruct the `:serdes/meta` abstract path
+  corresponding to it."
+  [path-parts]
+  (let [parentage        (into [] (for [[model id] (partition 2 (drop-last 2 path-parts))]
+                                    {:model model :id id}))
+        [model basename] (take-last 2 path-parts)
+        [_ id label]     (or (re-matches #"^([A-Za-z0-9_-]+)(?:\+(.*))?\.yaml$" basename)
+                             (re-matches #"^(.+)\.yaml$" basename))]
+    (conj parentage (cond-> {:model model :id id}
+                      label (assoc :label label)))))
diff --git a/enterprise/backend/test/metabase_enterprise/serialization/cmd_test.clj b/enterprise/backend/test/metabase_enterprise/serialization/cmd_test.clj
index 20b73310145..5897be185a5 100644
--- a/enterprise/backend/test/metabase_enterprise/serialization/cmd_test.clj
+++ b/enterprise/backend/test/metabase_enterprise/serialization/cmd_test.clj
@@ -52,11 +52,11 @@
           :created_at             :%now
           :updated_at             :%now)
         ;; serialize "everything" (which should just be the card and user), which should succeed if #16931 is fixed
-        (is (nil? (cmd/dump (ts/random-dump-dir))))))))
+        (is (nil? (cmd/dump (ts/random-dump-dir "serdes-"))))))))
 
 (deftest blank-target-db-test
   (testing "Loading a dump into an empty app DB still works (#16639)"
-    (let [dump-dir                 (ts/random-dump-dir)
+    (let [dump-dir                 (ts/random-dump-dir "serdes-")
           user-pre-insert-called?  (atom false)]
       (log/infof "Dumping to %s" dump-dir)
       (cmd/dump dump-dir "--user" "crowberto@metabase.com")
@@ -70,7 +70,7 @@
 
 (deftest mode-update-remove-cards-test
   (testing "--mode update should remove Cards in a Dashboard if they're gone from the serialized YAML (#20786)"
-    (ts/with-random-dump-dir [dump-dir]
+    (ts/with-random-dump-dir [dump-dir "serialization"]
       (let [dashboard-yaml-filename (str dump-dir "/collections/root/dashboards/Dashboard.yaml")]
         (ts/with-source-and-dest-dbs
           (testing "create 2 questions in the source and add them to a dashboard"
diff --git a/enterprise/backend/test/metabase_enterprise/serialization/test_util.clj b/enterprise/backend/test/metabase_enterprise/serialization/test_util.clj
index 7647b7e22ff..49d6c05b4ea 100644
--- a/enterprise/backend/test/metabase_enterprise/serialization/test_util.clj
+++ b/enterprise/backend/test/metabase_enterprise/serialization/test_util.clj
@@ -117,11 +117,11 @@
   [& body]
   `(~'&do-with-dest-db (fn [] ~@body)))
 
-(defn random-dump-dir []
-  (str (System/getProperty "java.io.tmpdir") "/" (mt/random-name)))
+(defn random-dump-dir [prefix]
+  (str (System/getProperty "java.io.tmpdir") "/" prefix (mt/random-name)))
 
-(defn do-with-random-dump-dir [f]
-  (let [dump-dir (random-dump-dir)]
+(defn do-with-random-dump-dir [prefix f]
+  (let [dump-dir (random-dump-dir (or prefix ""))]
     (testing (format "\nDump dir = %s" (pr-str dump-dir))
       (try
         (f dump-dir)
@@ -129,8 +129,8 @@
           (when (.exists (io/file dump-dir))
             (.delete (io/file dump-dir))))))))
 
-(defmacro with-random-dump-dir {:style/indent 1} [[dump-dir-binding] & body]
-  `(do-with-random-dump-dir (fn [~dump-dir-binding] ~@body)))
+(defmacro with-random-dump-dir {:style/indent 2} [[dump-dir-binding prefix] & body]
+  `(do-with-random-dump-dir ~prefix (fn [~dump-dir-binding] ~@body)))
 
 (defmacro with-world
   "Run test in the context of a minimal Metabase instance connected to our test database."
diff --git a/enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj b/enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj
index 797408564ad..f5edd4fca58 100644
--- a/enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj
+++ b/enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj
@@ -27,8 +27,8 @@
                                                         :personal_owner_id mark-id}]]
 
       (testing "a top-level collection is extracted correctly"
-        (let [ser (serdes.base/extract-one "Collection" (select-one "Collection" [:= :id coll-id]))]
-          (is (= {:model "Collection" :id coll-eid :label coll-slug} (:serdes/meta ser)))
+        (let [ser (serdes.base/extract-one "Collection" {} (select-one "Collection" [:= :id coll-id]))]
+          (is (= [{:model "Collection" :id coll-eid :label coll-slug}] (:serdes/meta ser)))
           (is (not (contains? ser :location)))
           (is (not (contains? ser :id)))
           (is (nil? (:personal_owner_id ser)))
@@ -36,16 +36,16 @@
           (is (nil? (:parent_id ser)))))
 
       (testing "a nested collection is extracted with the right parent_id"
-        (let [ser (serdes.base/extract-one "Collection" (select-one "Collection" [:= :id child-id]))]
-          (is (= {:model "Collection" :id child-eid :label child-slug} (:serdes/meta ser)))
+        (let [ser (serdes.base/extract-one "Collection" {} (select-one "Collection" [:= :id child-id]))]
+          (is (= [{:model "Collection" :id child-eid :label child-slug}] (:serdes/meta ser)))
           (is (not (contains? ser :location)))
           (is (not (contains? ser :id)))
           (is (= coll-eid (:parent_id ser)))
           (is (nil? (:personal_owner_id ser)))))
 
       (testing "personal collections are extracted with email as key"
-        (let [ser (serdes.base/extract-one "Collection" (select-one "Collection" [:= :id pc-id]))]
-          (is (= {:model "Collection" :id pc-eid :label pc-slug} (:serdes/meta ser)))
+        (let [ser (serdes.base/extract-one "Collection" {} (select-one "Collection" [:= :id pc-id]))]
+          (is (= [{:model "Collection" :id pc-eid :label pc-slug}] (:serdes/meta ser)))
           (is (not (contains? ser :location)))
           (is (not (contains? ser :id)))
           (is (nil? (:parent_id ser)))
@@ -54,7 +54,7 @@
       (testing "overall extraction returns the expected set"
         (letfn [(collections [extraction] (->> extraction
                                                (into [])
-                                               (map :serdes/meta)
+                                               (map (comp last :serdes/meta))
                                                (filter #(= "Collection" (:model %)))
                                                (map :id)
                                                set))]
diff --git a/enterprise/backend/test/metabase_enterprise/serialization/v2/load_test.clj b/enterprise/backend/test/metabase_enterprise/serialization/v2/load_test.clj
index aae206bdcd6..e06fa4a199d 100644
--- a/enterprise/backend/test/metabase_enterprise/serialization/v2/load_test.clj
+++ b/enterprise/backend/test/metabase_enterprise/serialization/v2/load_test.clj
@@ -4,21 +4,33 @@
             [metabase-enterprise.serialization.v2.extract :as serdes.extract]
             [metabase-enterprise.serialization.v2.ingest :as serdes.ingest]
             [metabase-enterprise.serialization.v2.load :as serdes.load]
-            [metabase.models :refer [Collection]]
+            [metabase.models :refer [Collection Database Table]]
+            [metabase.models.serialization.base :as serdes.base]
             [metabase.models.serialization.hash :as serdes.hash]
             [toucan.db :as db]))
 
+(defn- no-labels [path]
+  (mapv #(dissoc % :label) path))
+
+(defn- ids-by-model [entities model-name]
+  (->> entities
+       (map (comp last :serdes/meta))
+       (filter #(= model-name (:model %)))
+       (map :id)
+       set))
+
 (defn- ingestion-in-memory [extractions]
-  (let [mapped (into {} (for [{{:keys [model id]} :serdes/meta :as m} (into [] extractions)]
-                          [[model id] m]))]
+  (let [mapped (into {} (for [entity (into [] extractions)]
+                          [(no-labels (serdes.base/serdes-path entity))
+                           entity]))]
     (reify
       serdes.ingest/Ingestable
       (ingest-list [_]
-        (eduction (map :serdes/meta) (vals mapped)))
-      (ingest-one [_ {:keys [model id]}]
-        (or (get mapped [model id])
-            (throw (ex-info (format "Unknown ingestion target: %s %s" model id)
-                            {:model model :id id :world mapped})))))))
+        (keys mapped))
+      (ingest-one [_ path]
+        (or (get mapped (no-labels path))
+            (throw (ex-info (format "Unknown ingestion target: %s" path)
+                            {:path path :world mapped})))))))
 
 ;;; WARNING for test authors: [[extract/extract-metabase]] returns a lazy reducible value. To make sure you don't
 ;;; confound your tests with data from your dev appdb, remember to eagerly
@@ -27,13 +39,13 @@
 (deftest load-basics-test
   (testing "a simple, fresh collection is imported"
     (let [serialized (atom nil)
-          eid1       "123456789abcdef_0123"]
+          eid1       "0123456789abcdef_0123"]
       (ts/with-source-and-dest-dbs
         (testing "extraction succeeds"
           (ts/with-source-db
             (ts/create! Collection :name "Basic Collection" :entity_id eid1)
             (reset! serialized (into [] (serdes.extract/extract-metabase {})))
-            (is (some (fn [{{:keys [model id]} :serdes/meta}]
+            (is (some (fn [{[{:keys [model id]}] :serdes/meta}]
                         (and (= model "Collection") (= id eid1)))
                       @serialized))))
 
@@ -111,11 +123,7 @@
         (testing "serialization should use identity hashes where no entity_id is defined"
           (is (= #{(:entity_id @c1b)
                    (serdes.hash/identity-hash @c2b)}
-                 (->> @serialized
-                      (map :serdes/meta)
-                      (filter #(= "Collection" (:model %)))
-                      (map :id)
-                      set))))
+                 (ids-by-model @serialized "Collection"))))
 
         (testing "deserializing, the name change causes a duplicated collection"
           (ts/with-dest-db
@@ -131,3 +139,48 @@
                      "Collection 2 version 1"
                      "Collection 2 version 2"}
                    (set (db/select-field :name Collection))))))))))
+
+(deftest deserialization-database-table-field-test
+  (testing "databases, tables and fields are nested in namespaces"
+    (let [serialized (atom nil)
+          db1s       (atom nil)
+          db1d       (atom nil)
+          db2s       (atom nil)
+          db2d       (atom nil)
+          t1s        (atom nil)
+          t1d        (atom nil)
+          t2s        (atom nil)
+          t2d        (atom nil)]
+      (ts/with-source-and-dest-dbs
+        (testing "serializing the two collections"
+          (ts/with-source-db
+            (reset! db1s (ts/create! Database :name "db1"))
+            (reset! t1s  (ts/create! Table    :name "posts" :db_id (:id @db1s)))
+            (reset! db2s (ts/create! Database :name "db2"))
+            (reset! t2s  (ts/create! Table    :name "posts" :db_id (:id @db2s))) ; Deliberately the same name!
+            (reset! serialized (into [] (serdes.extract/extract-metabase {})))))
+
+        (testing "serialization of databases is based on the :name"
+          (is (= #{(:name @db1s) (:name @db2s) "test-data"} ; TODO I'm not sure where the `test-data` one comes from.
+                 (ids-by-model @serialized "Database"))))
+
+        (testing "tables reference their databases by name"
+          (is (= #{(:name @db1s) (:name @db2s) "test-data"}
+                 (->> @serialized
+                      (filter #(-> % :serdes/meta last :model (= "Table")))
+                      (map :db_id)
+                      set))))
+
+        (testing "deserialization works properly, keeping the same-named tables apart"
+          (ts/with-dest-db
+            (serdes.load/load-metabase (ingestion-in-memory @serialized))
+            (reset! db1d (db/select-one Database :name (:name @db1s)))
+            (reset! db2d (db/select-one Database :name (:name @db2s)))
+
+            (is (= 3 (db/count Database)))
+            (is (= #{"db1" "db2" "test-data"}
+                   (db/select-field :name Database)))
+            (is (= #{(:id @db1d) (:id @db2d)}
+                   (db/select-field :db_id Table :name "posts")))
+            (is (db/exists? Table :name "posts" :db_id (:id @db1d)))
+            (is (db/exists? Table :name "posts" :db_id (:id @db2d)))))))))
diff --git a/enterprise/backend/test/metabase_enterprise/serialization/v2/yaml_test.clj b/enterprise/backend/test/metabase_enterprise/serialization/v2/yaml_test.clj
index 2377774282e..031bd911677 100644
--- a/enterprise/backend/test/metabase_enterprise/serialization/v2/yaml_test.clj
+++ b/enterprise/backend/test/metabase_enterprise/serialization/v2/yaml_test.clj
@@ -6,8 +6,11 @@
             [metabase-enterprise.serialization.v2.ingest :as ingest]
             [metabase-enterprise.serialization.v2.ingest.yaml :as ingest.yaml]
             [metabase-enterprise.serialization.v2.storage.yaml :as storage.yaml]
+            [metabase-enterprise.serialization.v2.utils.yaml :as u.yaml]
             [metabase.models.collection :refer [Collection]]
+            [metabase.models.serialization.base :as serdes.base]
             [metabase.test.generate :as test-gen]
+            [metabase.util.date-2 :as u.date]
             [reifyhealth.specmonstah.core :as rs]
             [yaml.core :as yaml]))
 
@@ -18,8 +21,16 @@
        (map #(.getName %))
        set))
 
+(defn- subdirs [dir]
+  (->> dir
+       .listFiles
+       (remove #(.isFile %))))
+
+(defn- strip-labels [path]
+  (mapv #(dissoc % :label) path))
+
 (deftest basic-dump-test
-  (ts/with-random-dump-dir [dump-dir]
+  (ts/with-random-dump-dir [dump-dir "serdesv2-"]
     (ts/with-empty-h2-app-db
       (ts/with-temp-dpc [Collection [parent {:name "Some Collection"}]
                          Collection [child  {:name "Child Collection" :location (format "/%d/" (:id parent))}]]
@@ -47,69 +58,127 @@
                    (yaml/from-file (io/file dump-dir "Collection" child-filename))))))))))
 
 (deftest basic-ingest-test
-  (ts/with-random-dump-dir [dump-dir]
+  (ts/with-random-dump-dir [dump-dir "serdesv2-"]
     (io/make-parents dump-dir "Collection" "fake") ; Prepare the right directories.
     (spit (io/file dump-dir "settings.yaml")
           (yaml/generate-string {:some-key "with string value"
                                  :another-key 7
                                  :blank-key nil}))
     (spit (io/file dump-dir "Collection" "fake-id+the_label.yaml")
-          (yaml/generate-string {:some "made up" :data "here"}))
+          (yaml/generate-string {:some "made up" :data "here" :entity_id "fake-id" :slug "the_label"}))
     (spit (io/file dump-dir "Collection" "no-label.yaml")
-          (yaml/generate-string {:some "other" :data "in this one"}))
+          (yaml/generate-string {:some "other" :data "in this one" :entity_id "no-label"}))
 
     (let [ingestable (ingest.yaml/ingest-yaml dump-dir)
-          meta-maps  (into [] (ingest/ingest-list ingestable))
-          exp-files  {{:model "Collection" :id "fake-id" :label "the_label"} {:some "made up" :data "here"}
-                      {:model "Collection" :id "no-label"}                   {:some "other" :data "in this one"}
-                      {:model "Setting" :id "some-key"}                      {:key :some-key :value "with string value"}
-                      {:model "Setting" :id "another-key"}                   {:key :another-key :value 7}
-                      {:model "Setting" :id "blank-key"}                     {:key :blank-key :value nil}}]
+          exp-files  {[{:model "Collection" :id "fake-id" :label "the_label"}] {:some "made up"
+                                                                                :data "here"
+                                                                                :entity_id "fake-id"
+                                                                                :slug "the_label"}
+                      [{:model "Collection" :id "no-label"}]                   {:some "other"
+                                                                                :data "in this one"
+                                                                                :entity_id "no-label"}
+                      [{:model "Setting" :id "some-key"}]                      {:key :some-key :value "with string value"}
+                      [{:model "Setting" :id "another-key"}]                   {:key :another-key :value 7}
+                      [{:model "Setting" :id "blank-key"}]                     {:key :blank-key :value nil}}]
       (testing "the right set of file is returned by ingest-list"
         (is (= (set (keys exp-files))
-               (set meta-maps))))
+               (into #{} (ingest/ingest-list ingestable)))))
 
       (testing "individual reads in any order are correct"
-        (doseq [meta-map (->> exp-files
+        (doseq [abs-path (->> exp-files
                               keys
                               (repeat 10)
                               (into [] cat)
                               shuffle)]
           (is (= (-> exp-files
-                     (get meta-map)
-                     (assoc :serdes/meta meta-map))
-                 (ingest/ingest-one ingestable meta-map))))))))
+                     (get abs-path)
+                     (assoc :serdes/meta (mapv #(dissoc % :label) abs-path)))
+                 (ingest/ingest-one ingestable abs-path))))))))
 
 (deftest e2e-storage-ingestion-test
-  (ts/with-random-dump-dir [dump-dir]
+  (ts/with-random-dump-dir [dump-dir "serdesv2-"]
     (ts/with-empty-h2-app-db
-      (test-gen/insert! {:collection [[100 {:refs {:personal_owner_id ::rs/omit}}]]})
+      (test-gen/insert! {:collection [[100 {:refs {:personal_owner_id ::rs/omit}}]]
+                         :database   [[10]]
+                         :table      (into [] (for [db [:db0 :db1 :db2 :db3 :db4 :db5 :db6 :db7 :db8 :db9]]
+                                                [10 {:refs {:db_id db}}]))
+                         :field      (into [] (for [n     (range 100)
+                                                    :let [table (keyword (str "t" n))]]
+                                                [10 {:refs {:table_id table}}]))})
       (let [extraction (into [] (extract/extract-metabase {}))
-            entities   (reduce (fn [m {{:keys [model id]} :serdes/meta :as entity}]
-                                 (assoc-in m [model id] entity))
+            entities   (reduce (fn [m entity]
+                                 (update m (-> entity :serdes/meta last :model)
+                                         (fnil conj []) entity))
                                {} extraction)]
-        (is (= 100 (-> entities (get "Collection") vals count)))
+        (is (= 100 (-> entities (get "Collection") count)))
 
         (testing "storage"
           (storage.yaml/store! (seq extraction) dump-dir)
           (testing "for Collections"
             (is (= 100 (count (dir->file-set (io/file dump-dir "Collection")))))
-            (doseq [{:keys [entity_id slug] :as coll} (vals (get entities "Collection"))
-                    :let [filename (str entity_id "+" slug ".yaml")]]
+            (doseq [{:keys [entity_id slug] :as coll} (get entities "Collection")
+                    :let [filename (str entity_id "+" (#'u.yaml/truncate-label slug) ".yaml")]]
               (is (= (dissoc coll :serdes/meta)
                      (yaml/from-file (io/file dump-dir "Collection" filename))))))
 
+          (testing "for Databases"
+            (is (= 10 (count (dir->file-set (io/file dump-dir "Database")))))
+            (doseq [{:keys [name] :as coll} (get entities "Database")
+                    :let [filename (str name ".yaml")]]
+              (is (= (-> coll
+                         (dissoc :serdes/meta)
+                         (update :created_at u.date/format)
+                         (update :updated_at u.date/format))
+                     (yaml/from-file (io/file dump-dir "Database" filename))))))
+
+          (testing "for Tables"
+            (is (= 100
+                   (reduce + (for [db    (get entities "Database")
+                                   :let [tables (dir->file-set (io/file dump-dir "Database" (:name db) "Table"))]]
+                               (count tables))))
+                "Tables are scattered, so the directories are harder to count")
+
+            (doseq [{:keys [db_id name] :as coll} (get entities "Table")]
+              (is (= (-> coll
+                         (dissoc :serdes/meta)
+                         (update :created_at u.date/format)
+                         (update :updated_at u.date/format))
+                     (yaml/from-file (io/file dump-dir "Database" db_id "Table" (str name ".yaml")))))))
+
+          (testing "for Fields"
+            (is (= 1000
+                   (reduce + (for [db    (get entities "Database")
+                                   table (subdirs (io/file dump-dir "Database" (:name db) "Table"))]
+                               (->> (io/file table "Field")
+                                    dir->file-set
+                                    count))))
+                "Fields are scattered, so the directories are harder to count")
+
+            (doseq [{[db schema table] :table_id name :name :as coll} (get entities "Field")]
+              (is (nil? schema))
+              (is (= (-> coll
+                         (dissoc :serdes/meta)
+                         (update :created_at u.date/format)
+                         (update :updated_at u.date/format))
+                     (yaml/from-file (io/file dump-dir "Database" db "Table" table "Field" (str name ".yaml")))))))
+
           (testing "for settings"
-            (is (= (into {} (for [{:keys [key value]} (vals (get entities "Setting"))]
+            (is (= (into {} (for [{:keys [key value]} (get entities "Setting")]
                               [key value]))
                    (yaml/from-file (io/file dump-dir "settings.yaml"))))))
 
         (testing "ingestion"
           (let [ingestable (ingest.yaml/ingest-yaml dump-dir)]
             (testing "ingest-list is accurate"
-              (is (= (into #{} (comp (map vals) cat (map :serdes/meta)) (vals entities))
+              (is (= (into #{} (comp cat
+                                     (map (fn [entity]
+                                            (mapv #(cond-> %
+                                                     (:label %) (update :label #'u.yaml/truncate-label))
+                                                  (serdes.base/serdes-path entity)))))
+                           (vals entities))
                      (into #{} (ingest/ingest-list ingestable)))))
 
             (testing "each entity matches its in-memory original"
               (doseq [entity extraction]
-                (is (= entity (ingest/ingest-one ingestable (:serdes/meta entity))))))))))))
+                (is (= (update entity :serdes/meta strip-labels)
+                       (ingest/ingest-one ingestable (serdes.base/serdes-path entity))))))))))))
diff --git a/src/metabase/models/collection.clj b/src/metabase/models/collection.clj
index 2cb7e779092..a81471e7528 100644
--- a/src/metabase/models/collection.clj
+++ b/src/metabase/models/collection.clj
@@ -925,7 +925,7 @@
   ;; Transform :location (which uses database IDs) into a portable :parent_id with the parent's entity ID.
   ;; Also transform :personal_owner_id from a database ID to the email string, if it's defined.
   ;; Use the :slug as the human-readable label.
-  [_ coll]
+  [_ _ coll]
   (let [parent       (some-> coll
                              :id
                              Collection
@@ -939,7 +939,7 @@
     (-> (serdes.base/extract-one-basics "Collection" coll)
         (dissoc :location)
         (assoc :parent_id parent-id :personal_owner_id owner-email)
-        (assoc-in [:serdes/meta :label] (:slug coll)))))
+        (assoc-in [:serdes/meta 0 :label] (:slug coll)))))
 
 (defmethod serdes.base/load-xform "Collection" [{:keys [parent_id personal_owner_id] :as contents}]
   (let [loc        (if parent_id
@@ -956,9 +956,13 @@
 (defmethod serdes.base/serdes-dependencies "Collection"
   [{:keys [parent_id]}]
   (if parent_id
-    [parent_id]
+    [[{:model "Collection" :id parent_id}]]
     []))
 
+(defmethod serdes.base/serdes-generate-path "Collection" [_ {:keys [slug] :as coll}]
+  [(cond-> (serdes.base/infer-self-path "Collection" coll)
+     slug  (assoc :label slug))])
+
 ;;; +----------------------------------------------------------------------------------------------------------------+
 ;;; |                                           Perms Checking Helper Fns                                            |
 ;;; +----------------------------------------------------------------------------------------------------------------+
diff --git a/src/metabase/models/database.clj b/src/metabase/models/database.clj
index 60458040d4a..bb91d330268 100644
--- a/src/metabase/models/database.clj
+++ b/src/metabase/models/database.clj
@@ -10,6 +10,7 @@
             [metabase.models.permissions :as perms]
             [metabase.models.permissions-group :as perms-group]
             [metabase.models.secret :as secret :refer [Secret]]
+            [metabase.models.serialization.base :as serdes.base]
             [metabase.models.serialization.hash :as serdes.hash]
             [metabase.plugins.classloader :as classloader]
             [metabase.util :as u]
@@ -277,3 +278,29 @@
                              details
                              (sensitive-fields-for-db db)))))
     json-generator)))
+
+;;; ------------------------------------------------ Serialization ----------------------------------------------------
+
+(defmethod serdes.base/extract-one "Database"
+  [_ {secrets :database/secrets :or {secrets :exclude}} entity]
+  ;; TODO Support alternative encryption of secret database details.
+  ;; There's one optional foreign key: creator_id. Resolve it as an email.
+  (cond-> (serdes.base/extract-one-basics "Database" entity)
+    (:creator_id entity) (assoc :creator_id (db/select-one-field :email 'User :id (:creator_id entity)))
+    (= :exclude secrets) (dissoc :details)))
+
+(defmethod serdes.base/serdes-entity-id "Database"
+  [_ {:keys [name]}]
+  name)
+
+(defmethod serdes.base/serdes-generate-path "Database"
+  [_ {:keys [name]}]
+  [{:model "Database" :id name}])
+
+(defmethod serdes.base/load-find-local "Database"
+  [[{:keys [id]}]]
+  (db/select-one-field :id Database :name id))
+
+(defmethod serdes.base/load-xform "Database" [{:keys [creator_id] :as entity}]
+  (cond-> (serdes.base/load-xform-basics entity)
+    creator_id (assoc :creator_id (db/select-one-field :id 'User :email creator_id))))
diff --git a/src/metabase/models/field.clj b/src/metabase/models/field.clj
index b9b97c258be..1d6db971789 100644
--- a/src/metabase/models/field.clj
+++ b/src/metabase/models/field.clj
@@ -9,6 +9,7 @@
             [metabase.models.humanization :as humanization]
             [metabase.models.interface :as mi]
             [metabase.models.permissions :as perms]
+            [metabase.models.serialization.base :as serdes.base]
             [metabase.models.serialization.hash :as serdes.hash]
             [metabase.util :as u]
             [metabase.util.honeysql-extensions :as hx]
@@ -378,3 +379,53 @@
   {:arglists '([field])}
   [{:keys [table_id]}]
   (db/select-one 'Table, :id table_id))
+
+;;; ------------------------------------------------- Serialization -------------------------------------------------
+
+;; In order to retrieve the dependencies for a field its table_id needs to be serialized as [database schema table],
+;; a trio of strings with schema maybe nil.
+(defmethod serdes.base/serdes-generate-path "Field" [_ {table_id :table_id field :name}]
+  (let [table (when (number? table_id)
+                   (db/select-one 'Table :id table_id))
+        db    (when table
+                (db/select-one-field :name 'Database :id (:db_id table)))
+        [db schema table] (if (number? table_id)
+                            [db (:schema table) (:name table)]
+                            ;; If table_id is not a number, it's already been exported as a [db schema table] triple.
+                            table_id)]
+    (filterv some? [{:model "Database" :id db}
+                    (when schema {:model "Schema" :id schema})
+                    {:model "Table"    :id table}
+                    {:model "Field"    :id field}])))
+
+(defmethod serdes.base/serdes-entity-id "Field" [_ {:keys [name]}]
+  name)
+
+(defmethod serdes.base/serdes-dependencies "Field" [field]
+  ;; Take the path, but drop the Field section to get the parent Table's path instead.
+  [(pop (serdes.base/serdes-path field))])
+
+(defmethod serdes.base/extract-one "Field"
+  [_ _ {:keys [table_id] :as field}]
+  (let [table   (db/select-one 'Table :id table_id)
+        db-name (db/select-one-field :name 'Database :id (:db_id table))]
+    (-> (serdes.base/extract-one-basics "Field" field)
+        (assoc :table_id [db-name (:schema table) (:name table)]))))
+
+(defmethod serdes.base/load-xform "Field"
+  [{[db-name schema table-name] :table_id :as field}]
+  (let [db       (db/select-one 'Database :name db-name)
+        table-id (db/select-one-field :id 'Table :db_id (:id db) :name table-name :schema schema)]
+    (-> (serdes.base/load-xform-basics field)
+        (assoc :table_id table-id))))
+
+(defmethod serdes.base/load-find-local "Field"
+  [path]
+  (let [db-name            (-> path first :id)
+        schema-name        (when (= 3 (count path))
+                             (-> path second :id))
+        [{table-name :id}
+         {field-name :id}] (take-last 2 path)
+        db-id              (db/select-one-field :id 'Database :name db-name)
+        table-id           (db/select-one-field :id 'Table :name table-name :db_id db-id :schema schema-name)]
+    (db/select-one-field :id Field :name field-name :table_id table-id)))
diff --git a/src/metabase/models/serialization/base.clj b/src/metabase/models/serialization/base.clj
index d29a3fd93ca..99912438296 100644
--- a/src/metabase/models/serialization/base.clj
+++ b/src/metabase/models/serialization/base.clj
@@ -14,11 +14,98 @@
             [toucan.db :as db]
             [toucan.models :as models]))
 
+;;; +----------------------------------------------------------------------------------------------------------------+
+;;; |                                              :serdes/meta                                                      |
+;;; +----------------------------------------------------------------------------------------------------------------+
+;;; The Clojure maps from extraction and ingestion always include a special key `:serdes/meta` giving some information
+;;; about the serialized entity. The value is always a vector of maps that give a "path" to the entity. This is not a
+;;; filesystem path; rather it defines the nesting of some entities inside others.
+;;;
+;;; Most paths are a single layer:
+;;; `[{:model "ModelName" :id "entity ID or identity hash string" :label "Human-readable name"}]`
+;;; `:model` and `:id` are required; `:label` is optional.
+;;;
+;;; But for some entities, it can be deeper. For example Fields belong to Tables, which are in Schemas (which don't
+;;; really exist in appdb, but are reflected here for namespacing of table names), which are in Databases:
+;;; `[{:model "Database" :id "my_db"}
+;;;   {:model "Schema"   :id "PUBLIC"}
+;;;   {:model "Table"    :id "Users"}
+;;;   {:model "Field"    :id "email"}]`
+;;;
+;;; Many of the multimethods are keyed on the `:model` field of the leaf entry (the last).
+
+(defmulti serdes-entity-id
+  "Given the model name and an entity, returns its entity ID (which might be nil).
+
+  This abstracts over the exact definition of the \"entity ID\" for a given entity.
+  By default this is a column, `:entity_id`.
+
+  Models that have a different portable ID should override this."
+  (fn [model-name _] model-name))
+
+(defmethod serdes-entity-id :default [_ {:keys [entity_id]}]
+  entity_id)
+
+(defmulti serdes-generate-path
+  "Given the model name and raw entity from the database, returns a vector giving its *path*.
+  `(serdes-generate-path \"ModelName\" entity)`
+
+  The path is a vector of maps, root first and this entity itself last. Each map looks like:
+  `{:model \"ModelName\" :id \"entity ID, identity hash, or custom ID\" :label \"optional human label\"}`
+
+  Some entities stand alone, while some are naturally nested inside others. For example, fields belong in tables, which
+  belong in databases. Further, since these use eg. column names as entity IDs, they can collide if all the fields get
+  poured into one namespace (like a directory of YAML files).
+
+  Finally, it's often useful to delete the databases from an export, since the receiving end has its own different, but
+  compatible, database definitions. (For example, staging and prod instances of Metabase.) It's convenient for human
+  understanding and editing to group fields under tables under databases.
+
+  Therefore we provide an abstract path on the entities, which will generally be stored in a directory tree.
+  (This is not strictly required - for a different medium like protobufs the path might be encoded some other way.)
+
+  The path is reconstructed by ingestion and used as the key to read entities with `ingest-one`, and to match
+  against existing entities.
+
+  The default implementation is a single level, using the model name provided and the ID from either
+  [[serdes-entity-id]] or [[serdes.hash/identity-hash]].
+
+  Implementation notes:
+  - `:serdes/meta` might be defined - if so it's coming from ingestion and might have truncated values in it, and should
+    be reconstructed from the rest of the data.
+  - The primary key might still be attached, during extraction.
+  - `:label` is optional
+  - The logic to guess the leaf part of the path is in [[infer-self-path]], for use in overriding."
+  (fn [model _] model))
+
+(defn infer-self-path
+  "Implements the default logic from [[serdes-generate-path]] that guesses the `:id` of this entity. Factored out
+  so it can be called by implementors of [[serdes-generate-path]].
+
+  The guesses are:
+  - [[serdes-entity-id]]
+  - [[serdes.hash/identity-hash]] after looking up the Toucan entity by primary key
+
+  Returns `{:model \"ModelName\" :id \"id-string\"}`; throws if the inference fails, since it indicates a programmer
+  error and not a runtime one."
+  [model-name entity]
+  (let [model (db/resolve-model (symbol model-name))
+        pk    (models/primary-key model)]
+    {:model model-name
+     :id    (or (serdes-entity-id model-name entity)
+                (some-> (get entity pk) model serdes.hash/identity-hash)
+                (throw (ex-info "Could not infer-self-path on this entity - maybe implement serdes-entity-id ?"
+                                {:model model-name :entity entity})))}))
+
+(defmethod serdes-generate-path :default [model-name entity]
+  ;; This default works for most models, but needs overriding for nested ones.
+  [(infer-self-path model-name entity)])
+
 ;;; +----------------------------------------------------------------------------------------------------------------+
 ;;; |                                          Serialization Process                                                 |
 ;;; +----------------------------------------------------------------------------------------------------------------+
 ;;; Serialization happens in two stages: extraction and storage. These are independent and deliberately decoupled.
-;;; The result of extraction is a reducible stream of Clojure maps with `:serdes/meta` keys on them (see below).
+;;; The result of extraction is a reducible stream of Clojure maps with `:serdes/meta` keys on them (see above).
 ;;; In particular, extraction does not care about file formats or other such things.
 ;;;
 ;;; Storage takes the stream from extraction and actually stores it or sends it. Traditionally we have serialized to a
@@ -86,9 +173,10 @@
 
 (defmulti extract-one
   "Extracts a single entity retrieved from the database into a portable map with `:serdes/meta` attached.
+  `(extract-one \"ModelName\" opts entity)`
 
-  The default implementation uses the model name as the `:model` and either `:entity_id` or
-  [[serdes.hash/identity-hash]] as the `:id`. It also strips off the database's numeric primary key.
+  The default implementation uses [[serdes-generate-path]] to build the `:serdes/meta`. It also strips off the
+  database's numeric primary key.
 
   That suffices for a few simple entities, but most entities will need to override this.
   They should follow the pattern of:
@@ -96,20 +184,19 @@
   - Drop the numeric database primary key
   - Replace any foreign keys with portable values (eg. entity IDs or `identity-hash`es, owning user's ID with their
     email, etc.)
-  - Consider attaching a human-friendly `:label` under `:serdes/meta`. (Eg. a Collection's `:slug`)
 
   When overriding this, [[extract-one-basics]] is probably a useful starting point.
 
   Keyed by the model name of the entity, the first argument."
-  (fn [model _] model))
+  (fn [model _ _] model))
 
 (defmethod extract-all :default [model opts]
-  (eduction (map (partial extract-one model))
+  (eduction (map (partial extract-one model opts))
             (extract-query model opts)))
 
 (defn raw-reducible-query
   "Helper for calling Toucan's raw [[db/reducible-query]]. With just the model name, fetches everything. You can filter
-  with a HoneySQL map like {:where [:= :archived true]}.
+  with a HoneySQL map like `{:where [:= :archived true]}`.
 
   Returns a reducible stream of JDBC row maps."
   ([model-name]
@@ -118,13 +205,21 @@
    (db/reducible-query (merge {:select [:*] :from [(symbol model-name)]}
                               honeysql-form))))
 
+(defn- model-name->table
+  "The model name is not necessarily the table name. This pulls the table name from the Toucan model."
+  [model-name]
+  (-> model-name
+      symbol
+      db/resolve-model
+      :table))
+
 (defmethod extract-query :default [model-name _]
-  (raw-reducible-query model-name))
+  (raw-reducible-query (model-name->table model-name)))
 
 (defn extract-one-basics
   "A helper for writing [[extract-one]] implementations. It takes care of the basics:
   - Convert to a vanilla Clojure map.
-  - Add `:serdes/meta`.
+  - Add `:serdes/meta` by calling [[serdes-generate-path]].
   - Drop the primary key.
 
   Returns the Clojure map."
@@ -132,12 +227,10 @@
   (let [model (db/resolve-model (symbol model-name))
         pk    (models/primary-key model)]
     (-> entity
-        (assoc :serdes/meta {:model model-name
-                             :id    (or (:entity_id entity)
-                                        (serdes.hash/identity-hash (model (get entity pk))))})
+        (assoc :serdes/meta (serdes-generate-path model-name entity))
         (dissoc pk))))
 
-(defmethod extract-one :default [model-name entity]
+(defmethod extract-one :default [model-name _opts entity]
   (extract-one-basics model-name entity))
 
 ;;; +----------------------------------------------------------------------------------------------------------------+
@@ -152,32 +245,27 @@
 ;;; usage for testing in-memory deserialization.
 ;;;
 ;;; Factory functions consume some details (like a file path) and return an [[Ingestable]], with its two methods:
-;;; - `(ingest-list ingestable)` returns a reducible stream of `:serdes/meta` maps in any order.
-;;; - `(ingest-one ingestable meta-map)` ingests a single entity into memory, returning it as a map.
+;;; - `(ingest-list ingestable)` returns a reducible stream of `:serdes/meta` paths in any order.
+;;; - `(ingest-one ingestable meta-path)` ingests a single entity into memory, returning it as a map.
 ;;;
 ;;; This two-stage design avoids needing all the data in memory at once, where that's practical with the underlying
 ;;; storage media (eg. files).
 ;;;
 ;;; Loading:
-;;; Loading tries to find corresponding entities in the destination appdb by `entity_id` or `identity-hash`, and update
+;;; Loading tries to find corresponding entities in the destination appdb by entity ID or identity hash, and update
 ;;; those rows rather than duplicating.
 ;;; The entry point is [[metabase-enterprise.serialization.v2.load/load-metabase]]. The top-level process works like
 ;;; this:
-;;; - `(load-prescan-all "ModelName")` is called, which selects the entire collection as a reducible stream and calls
-;;;   [[load-prescan-one]] on each entry.
-;;;     - The default for that usually is the right thing.
-;;; - `(load-prescan-one entity)` turns a particular entity into an `[entity_id identity-hash primary-key]` triple.
-;;;     - The default will work for models with a literal `entity_id` field; those with alternative IDs (database,
-;;;       table, field, setting, etc.) should override this method.
-;;; - Prescanning complete, `(ingest-list ingestable)` gets the metadata for every exported entity in arbitrary order.
+;;; - `(ingest-list ingestable)` gets the `:serdes/meta` "path" for every exported entity in arbitrary order.
 ;;;     - `(ingest-one meta-map opts)` is called on each first to ingest the value into memory, then
-;;;     - `(serdes-dependencies ingested)` to get a list of other IDs (entity IDs or identity hashes).
+;;;     - `(serdes-dependencies ingested)` to get a list of other paths that need to be loaded first.
 ;;;         - The default is an empty list.
 ;;;     - The idea of dependencies is eg. a database must be loaded before its tables, a table before its fields, a
 ;;;       collection's ancestors before the collection itself.
 ;;;     - Dependencies are loaded recursively in postorder; circular dependencies cause the process to throw.
-;;; - Having found an entity it can really load, the core code will check its table of IDs found by prescanning.
-;;;     - Then it calls `(load-one! ingested maybe-local-entity)`, passing the `ingested` value and either `nil` or the
+;;; - Having found an entity it can really load, check for any existing one:
+;;;     - `(load-find-local path)` returns the corresponding primary key, or nil.
+;;; - Then it calls `(load-one! ingested maybe-local-entity)`, passing the `ingested` value and either `nil` or the
 ;;;       Toucan entity corresponding to the incoming map.
 ;;;     - `load-one!` is a side-effecting black box to the rest of the deserialization process.
 ;;;       It returns the primary key of the new or existing entity, which is necessary to resolve foreign keys between
@@ -193,57 +281,45 @@
 ;;;     - `(load-insert! ingested)` if the entity is new.
 ;;;   Both of these have the obvious defaults of [[jdbc/update!]] or [[jdbc/insert!]].
 
-;;; +----------------------------------------------------------------------------------------------------------------+
-;;; |                                            :serdes/meta maps                                                   |
-;;; +----------------------------------------------------------------------------------------------------------------+
-;;; The Clojure maps from extraction and ingestion always include a special key `:serdes/meta` giving some information
-;;; about the serialized entity. The value is always a map like:
-;;; `{:model "ModelName" :id "entity ID or identity hash string" :label "Human-readable name"}`
-;;; `:model` and `:id` are required; `:label` is optional.
-;;;
-;;; Many of the multimethods are keyed on the `:model` field.
-
-(defmulti load-prescan-all
-  "Returns a reducible stream of `[entity_id identity-hash primary-key]` triples for the entire table.
-
-  Defaults to running [[load-prescan-one]] over each entity returned by [[jdbc/reducible-query]] for this model.
-  Override this method if filtering is needed.
-
-  Keyed on the model name."
-  identity)
+(defn- ingested-model
+  "The dispatch function for several of the load multimethods: dispatching on the model of the incoming entity."
+  [ingested]
+  (-> ingested :serdes/meta last :model))
 
-(defmulti load-prescan-one
-  "Converts a database entity into a `[entity_id identity-hash primary-key]` triple for the deserialization machinery.
-  Called with the Toucan model (*not* this entity), and the JDBC map for the entity in question.
+(defn serdes-path
+  "Given an exported or imported entity with a `:serdes/meta` key on it, return the abstract path (not a filesystem
+  path)."
+  [entity]
+  (:serdes/meta entity))
 
-  Defaults to using a literal `:entity_id` column. For models with a different entity ID (eg. a Table's name, a
-  Setting's key), override this method.
+(defmulti load-find-local
+  "Given a path, tries to look up any corresponding local entity.
 
-  Keyed on the model name."
-  (fn [model _] (name model)))
+  Returns nil, or the primary key of the local entity.
+  Keyed on the model name at the leaf of the path.
 
-(defmethod load-prescan-all :default [model-name]
-  (let [model (db/resolve-model (symbol model-name))]
-    (eduction (map (partial load-prescan-one model))
-              (raw-reducible-query model-name))))
+  By default, this tries to look up the entity by its `:entity_id` column, or identity hash, depending on the shape of
+  the incoming key. For the identity hash, this scans the entire table and builds a cache of
+  [[serdes.hash/identity-hash]] to primary keys, since the identity hash cannot be queried directly.
+  This cache is cleared at the beginning and end of the deserialization process."
+  (fn [path]
+    (-> path last :model)))
 
-(defmethod load-prescan-one :default [model entity]
-  (let [pk  (models/primary-key model)
-        key (get entity pk)]
-    [(:entity_id entity)
-     (serdes.hash/identity-hash (db/select-one model pk key)) ; TODO This sucks for identity-hash!
-     key]))
+(declare lookup-by-id)
 
-(defn- ingested-model
-  "The dispatch function for several of the load multimethods: dispatching on the model of the incoming entity."
-  [ingested]
-  (-> ingested :serdes/meta :model))
+(defmethod load-find-local :default [path]
+  (let [{id :id model-name :model} (last path)
+        model                      (db/resolve-model (symbol model-name))
+        pk                         (models/primary-key model)]
+    (some-> model
+            (lookup-by-id id)
+            (get pk))))
 
 (defmulti serdes-dependencies
   "Given an entity map as ingested (not a Toucan entity) returns a (possibly empty) list of its dependencies, where each
-  dependency is represented by either the entity ID or identity hash of the target entity.
+  dependency is represented by its abstract path (its `:serdes/meta` value).
 
-  Keyed on the model name.
+  Keyed on the model name for this entity.
   Default implementation returns an empty vector, so only models that have dependencies need to implement this."
   ingested-model)
 
@@ -276,8 +352,7 @@
 
 (defmulti load-update!
   "Called by the default [[load-one!]] if there is a corresponding entity already in the appdb.
-  The first argument is the model name, the second the incoming map we're deserializing, and the third is the Toucan
-  entity found in the appdb.
+  `(load-update! \"ModelName\" ingested-and-xformed local-Toucan-entity)`
 
   Defaults to a straightforward [[db/update!]], and you may not need to update it.
 
@@ -289,20 +364,17 @@
 (defmethod load-update! :default [model-name ingested local]
   (let [model (db/resolve-model (symbol model-name))
         pk    (models/primary-key model)
-        id    (get local pk)
-        ; Get a WHERE clause, but then strip off the WHERE part to include it in the JDBC call below.
-        ;where (update (db/honeysql->sql {:where [:= pk id]}) 0
-        ;              #(.substring 5))
-        ]
+        id    (get local pk)]
     (log/tracef "Upserting %s %d: old %s new %s" model-name id (pr-str local) (pr-str ingested))
     ; Using the two-argument form of [[db/update!]] that takes the model and a HoneySQL form for the actual update.
     ; It works differently from the more typical `(db/update! 'Model id updates...)` form: this form doesn't run any of
     ; the pre-update magic, it just updates the database directly.
     (db/update! (symbol model-name) {:where [:= pk id] :set ingested})
-    pk))
+    id))
 
 (defmulti load-insert!
   "Called by the default [[load-one!]] if there is no corresponding entity already in the appdb.
+  `(load-insert! \"ModelName\" ingested-and-xformed)`
 
   Defaults to a straightforward [[db/simple-insert!]], and you probably don't need to implement this.
   Note that [[db/insert!]] should be avoided - we don't want to populate the `:entity_id` field if it wasn't already
@@ -315,7 +387,8 @@
 
 (defmethod load-insert! :default [model ingested]
   (log/tracef "Inserting %s: %s" model (pr-str ingested))
-  ; Toucan's simple-insert! actually does the right thing for our purposes: it doesn't call pre-insert or post-insert.
+  ; Toucan's simple-insert! actually does the right thing for our purposes: it doesn't call pre-insert or post-insert,
+  ; and it returns the new primary key.
   (db/simple-insert! (symbol model) ingested))
 
 (defmulti load-one!
@@ -347,12 +420,13 @@
 (defn entity-id?
   "Checks if the given string is a 21-character NanoID. Useful for telling entity IDs apart from identity hashes."
   [id-str]
-  (boolean (re-matches #"^[A-Za-z0-9_-]{21}$" id-str)))
+  (boolean (and id-str (re-matches #"^[A-Za-z0-9_-]{21}$" id-str))))
 
 (defn- find-by-identity-hash
   "Given a model and a target identity hash, this scans the appdb for any instance of the model corresponding to the
   hash. Does a complete scan, so this should be called sparingly!"
   ;; TODO This should be able to use a cache of identity-hash values from the start of the deserialization process.
+  ;; Note that it needs to include either updates (or worst-case, invalidation) at [[load-one!]] time.
   [model id-hash]
   (->> (db/select-reducible model)
        (into [] (comp (filter #(= id-hash (serdes.hash/identity-hash %)))
diff --git a/src/metabase/models/setting.clj b/src/metabase/models/setting.clj
index df3f9a0a1f3..e453a9a3b7c 100644
--- a/src/metabase/models/setting.clj
+++ b/src/metabase/models/setting.clj
@@ -151,10 +151,13 @@
 (defmethod serdes.base/extract-all "Setting" [_model _opts]
   (for [{:keys [key value]} (admin-writable-site-wide-settings
                               :getter (partial get-value-of-type :string))]
-    {:serdes/meta {:model "Setting" :id (name key)}
+    {:serdes/meta [{:model "Setting" :id (name key)}]
      :key key
      :value value}))
 
+(defmethod serdes.base/load-find-local "Setting" [[{:keys [id]}]]
+  (get-value-of-type :string (keyword id)))
+
 (defmethod serdes.base/load-one! "Setting" [{:keys [key value]} _]
   (set-value-of-type! :string key value))
 
diff --git a/src/metabase/models/table.clj b/src/metabase/models/table.clj
index 7ad9cd2ef6e..9b010d0a789 100644
--- a/src/metabase/models/table.clj
+++ b/src/metabase/models/table.clj
@@ -11,6 +11,7 @@
             [metabase.models.metric :refer [Metric retrieve-metrics]]
             [metabase.models.permissions :as perms :refer [Permissions]]
             [metabase.models.segment :refer [retrieve-segments Segment]]
+            [metabase.models.serialization.base :as serdes.base]
             [metabase.models.serialization.hash :as serdes.hash]
             [metabase.util :as u]
             [toucan.db :as db]
@@ -208,7 +209,6 @@
         {:order-by       field-order-rule}))
     tables))
 
-
 ;;; ------------------------------------------------ Convenience Fns -------------------------------------------------
 
 (defn qualified-identifier
@@ -230,3 +230,36 @@
    (fn [table-id]
      {:pre [(integer? table-id)]}
      (db/select-one-field :db_id Table, :id table-id))))
+
+;;; ------------------------------------------------- Serialization -------------------------------------------------
+(defmethod serdes.base/serdes-dependencies "Table" [table]
+  [[{:model "Database" :id (:db_id table)}]])
+
+(defmethod serdes.base/serdes-generate-path "Table" [_ table]
+  (let [db-name (db/select-one-field :name 'Database :id (:db_id table))]
+    (filterv some? [{:model "Database" :id db-name}
+                    (when (:schema table)
+                      {:model "Schema" :id (:schema table)})
+                    {:model "Table" :id (:name table)}])))
+
+(defmethod serdes.base/serdes-entity-id "Table" [_ {:keys [name]}]
+  name)
+
+(defmethod serdes.base/load-find-local "Table"
+  [path]
+  (let [db-name     (-> path first :id)
+        schema-name (when (= 3 (count path))
+                      (-> path second :id))
+        table-name  (-> path last :id)
+        db-id       (db/select-one-field :id Database :name db-name)]
+    (db/select-one-field :id Table :name table-name :db_id db-id :schema schema-name)))
+
+(defmethod serdes.base/extract-one "Table"
+  [_ _ {:keys [db_id] :as table}]
+  (-> (serdes.base/extract-one-basics "Table" table)
+      (assoc :db_id (db/select-one-field :name 'Database :id db_id))))
+
+(defmethod serdes.base/load-xform "Table"
+  [{:keys [db_id] :as table}]
+  (-> (serdes.base/load-xform-basics table)
+      (assoc :db_id (db/select-one-field :id 'Database :name db_id))))
diff --git a/test/metabase/test/generate.clj b/test/metabase/test/generate.clj
index a7fa4f8e1aa..8f6d4f6a0c5 100644
--- a/test/metabase/test/generate.clj
+++ b/test/metabase/test/generate.clj
@@ -2,6 +2,7 @@
   (:require [clojure.spec.alpha :as s]
             [clojure.test.check.generators :as gen]
             [java-time :as t]
+            [metabase.mbql.util :as mbql.u]
             [metabase.models :refer [Activity Card Collection Dashboard DashboardCard DashboardCardSeries Database
                                      Dimension Field Metric NativeQuerySnippet PermissionsGroup
                                      PermissionsGroupMembership Pulse PulseCard PulseChannel Table User]]
@@ -80,9 +81,11 @@
             (rand-nth reserved-words)
             (cond-> (random-desc)
               (coin-toss 0.1)
-              (str (rand-nth "áîë磢™🍒"))
+              (str (rand-nth "áîë磢™"))
               (coin-toss 0.01)
-              (str (subs (tt/lorem-ipsum) 1 200))
+              (str "🍒") ; This one can't be merged with the above, `rand-nth` treats it as two (broken) characters.
+              (coin-toss 0.01)
+              (str (subs (tt/lorem-ipsum) 1 120))
               (coin-toss 0.01)
               (-> first str))))
         (gen/return nil)))))
@@ -205,8 +208,7 @@
    :field                        {:prefix      :field
                                   :spec        ::field
                                   :insert!     {:model Field}
-                                  :relations   {:table_id [:table :id]}
-                                  :constraints {:table_id #{:uniq}}}
+                                  :relations   {:table_id [:table :id]}}
    :metric                       {:prefix    :metric
                                   :spec      ::metric
                                   :insert!   {:model Metric}
@@ -241,6 +243,8 @@
   [query]
   (rsg/ent-db-spec-gen {:schema schema} query))
 
+(def ^:private unique-name (mbql.u/unique-name-generator))
+
 (def ^:private field-positions (atom {:table-fields {}}))
 (defn- adjust
   "Some fields have to be semantically correct, or db correct. fields have position, and they do have to be unique.
@@ -255,6 +259,14 @@
            (-> (swap! field-positions update-in [:table-fields (:table_id visit-val)] (fnil inc 0))
                (get-in [:table-fields (:table_id visit-val)])))
 
+    ;; Table names need to be unique within their database. This enforces it, and appends junk to names if needed.
+    (= :table ent-type)
+    (update :name unique-name)
+
+    ;; Field names need to be unique within their table. This enforces it, and appends junk to names if needed.
+    (= :field ent-type)
+    (update :name unique-name)
+
     (and (:description visit-val) (coin-toss 0.2))
     (dissoc :description)))
 
-- 
GitLab