Skip to content
Snippets Groups Projects
Unverified Commit 3f294234 authored by Braden Shepherdson's avatar Braden Shepherdson Committed by GitHub
Browse files

YAML files for serialization (#23491)

Write `storage.yaml` and `ingest.yaml` to serialize all the way to YAML files and back.

Lots of generative testing to check it's isomorphic.
parent 1808b586
No related branches found
No related tags found
No related merge requests found
Showing with 244 additions and 28 deletions
......@@ -366,6 +366,7 @@
metabase.test/with-user-in-groups clojure.core/let
metabase.test.data.interface/defdataset clojure.core/def
metabase.test.data.interface/defdataset-edn clojure.core/def
metabase-enterprise.serialization.test-util/with-random-dump-dir clojure.core/fn
metabase.driver.mongo.util/with-mongo-connection clojure.core/let
metabase.driver.mongo.query-processor/mongo-let clojure.core/let
toucan.db/with-call-counting clojure.core/fn
......
(ns metabase-enterprise.serialization.v2.ingest.yaml
(:require [clojure.java.io :as io]
[metabase-enterprise.serialization.v2.ingest :as ingest]
[yaml.core :as yaml])
(:import java.io.File))
(defmulti ^:private build-metas
(fn [^File file] (.getName file)))
(defmethod build-metas "settings.yaml" [file]
(let [settings (yaml/from-file file)]
(for [[k _] settings]
{:model "Setting" :id (name k)})))
(defmethod build-metas :default [^File file]
(let [model-name (-> file .getParentFile .getName)
[_ id label] (re-matches #"^([A-Za-z0-9_-]+)(?:\+(.*))?.yaml$" (.getName file))]
[(cond-> {:model model-name :id id}
label (assoc :label label))]))
(defn- ingest-entity [root-dir {:keys [model id label] :as meta-map}]
(let [filename (if label
(str id "+" label ".yaml")
(str id ".yaml"))]
(-> (io/file root-dir model filename)
yaml/from-file
(assoc :serdes/meta meta-map))))
(deftype YamlIngestion [^File root-dir settings]
ingest/Ingestable
(ingest-list [_]
(eduction (comp (filter (fn [^File f] (.isFile f)))
(mapcat build-metas))
(file-seq root-dir)))
(ingest-one [_ {:keys [model id] :as meta-map}]
(if (= "Setting" model)
{:serdes/meta meta-map :key (keyword id) :value (get settings (keyword id))}
(ingest-entity root-dir meta-map))))
(defn ingest-yaml
"Creates a new Ingestable on a directory of YAML files, as created by
[[metabase-enterprise.serialization.v2.storage.yaml]]."
[root-dir]
(->YamlIngestion (io/file root-dir) (yaml/from-file (io/file root-dir "settings.yaml"))))
......@@ -56,12 +56,12 @@
[[metabase.models.serialization.base/load-one!]] and its various overridable parts, which see.
Circular dependencies are not allowed, and are detected and thrown as an error."
[{:keys [expanding ingestion seen] :as ctx} {:keys [id type] :as meta-map}]
[{:keys [expanding ingestion seen] :as ctx} {id :id model-name :model :as meta-map}]
(cond
(expanding id) (throw (ex-info (format "Circular dependency on %s %s" type id) {}))
(expanding id) (throw (ex-info (format "Circular dependency on %s %s" model-name id) {}))
(seen id) ctx ; Already been done, just skip it.
:else (let [ingested (serdes.ingest/ingest-one ingestion meta-map)
model (db/resolve-model (symbol type))
model (db/resolve-model (symbol model-name))
deps (serdes.base/serdes-dependencies ingested)
ctx (-> ctx
(update :expanding conj id)
......
(ns metabase-enterprise.serialization.v2.storage
"A multimethod entry point for storage sinks. Storage is the second phase of serialization.
See [[metabase.models.serialization.base]] for detailed documentation of the serialization process.
Implementations of storage should live in [[metabase-enterprise.serialization.v2.storage.yaml]] and similar.")
(defmulti store-all!
"`(store-all! stream opts)`
`stream` is a reducible stream of portable maps with `:serdes/meta` keys.
`opts` is a map of options, such as the path to the root directory.
See [[metabase.models.serialization.base]] for detailed documentation of the serialization process, and the maps in
the stream.
Keyed on the only required key in `opts`: `{:storage/target ...}`."
(fn [_ {target :storage/target}] target))
(ns metabase-enterprise.serialization.v2.storage.yaml
(:require [clojure.java.io :as io]
[metabase-enterprise.serialization.v2.storage :as storage]
[yaml.core :as yaml]))
(defn- spit-yaml
[path obj]
(apply io/make-parents path)
(spit (apply io/file path) (yaml/generate-string obj :dumper-options {:flow-style :block})))
(defn- store-entity! [{:keys [root-dir]} {{:keys [id model label]} :serdes/meta :as entity}]
(let [basename (if (nil? label)
(str id ".yaml")
;; + is a legal, unescaped character on all common filesystems, but not `identity-hash` or NanoID!
(str id "+" label ".yaml"))
path [root-dir model basename]]
(spit-yaml path (dissoc entity :serdes/meta))))
(defn- store-settings! [{:keys [root-dir]} settings]
(let [as-map (into (sorted-map)
(for [{:keys [key value]} settings]
[key value]))]
(spit-yaml [root-dir "settings.yaml"] as-map)))
(defmethod storage/store-all! :yaml [stream opts]
(when-not (or (string? (:root-dir opts))
(instance? java.io.File (:root-dir opts)))
(throw (ex-info ":yaml storage requires the :root-dir option to be a string or File"
{:opts opts})))
(let [settings (atom [])]
(doseq [entity stream]
(if (-> entity :serdes/meta :model (= "Setting"))
(swap! settings conj entity)
(store-entity! opts entity)))
(store-settings! opts @settings)))
(defn store!
"Helper for storing a serialized database to a tree of YAML files."
[stream root-dir]
(storage/store-all! stream {:storage/target :yaml
:root-dir root-dir}))
......@@ -28,7 +28,7 @@
(testing "a top-level collection is extracted correctly"
(let [ser (serdes.base/extract-one "Collection" (select-one "Collection" [:= :id coll-id]))]
(is (= {:type "Collection" :id coll-eid :label coll-slug} (:serdes/meta ser)))
(is (= {:model "Collection" :id coll-eid :label coll-slug} (:serdes/meta ser)))
(is (not (contains? ser :location)))
(is (not (contains? ser :id)))
(is (nil? (:personal_owner_id ser)))
......@@ -37,7 +37,7 @@
(testing "a nested collection is extracted with the right parent_id"
(let [ser (serdes.base/extract-one "Collection" (select-one "Collection" [:= :id child-id]))]
(is (= {:type "Collection" :id child-eid :label child-slug} (:serdes/meta ser)))
(is (= {:model "Collection" :id child-eid :label child-slug} (:serdes/meta ser)))
(is (not (contains? ser :location)))
(is (not (contains? ser :id)))
(is (= coll-eid (:parent_id ser)))
......@@ -45,7 +45,7 @@
(testing "personal collections are extracted with email as key"
(let [ser (serdes.base/extract-one "Collection" (select-one "Collection" [:= :id pc-id]))]
(is (= {:type "Collection" :id pc-eid :label pc-slug} (:serdes/meta ser)))
(is (= {:model "Collection" :id pc-eid :label pc-slug} (:serdes/meta ser)))
(is (not (contains? ser :location)))
(is (not (contains? ser :id)))
(is (nil? (:parent_id ser)))
......@@ -55,7 +55,7 @@
(letfn [(collections [extraction] (->> extraction
(into [])
(map :serdes/meta)
(filter #(= "Collection" (:type %)))
(filter #(= "Collection" (:model %)))
(map :id)
set))]
(testing "no user specified"
......
......@@ -9,16 +9,16 @@
[toucan.db :as db]))
(defn- ingestion-in-memory [extractions]
(let [mapped (into {} (for [{{:keys [type id]} :serdes/meta :as m} (into [] extractions)]
[[type id] m]))]
(let [mapped (into {} (for [{{:keys [model id]} :serdes/meta :as m} (into [] extractions)]
[[model id] m]))]
(reify
serdes.ingest/Ingestable
(ingest-list [_]
(eduction (map :serdes/meta) (vals mapped)))
(ingest-one [_ {:keys [type id]}]
(or (get mapped [type id])
(throw (ex-info (format "Unknown ingestion target: %s %s" type id)
{:type type :id id :world mapped})))))))
(ingest-one [_ {:keys [model id]}]
(or (get mapped [model id])
(throw (ex-info (format "Unknown ingestion target: %s %s" model id)
{:model model :id id :world mapped})))))))
;;; WARNING for test authors: [[extract/extract-metabase]] returns a lazy reducible value. To make sure you don't
;;; confound your tests with data from your dev appdb, remember to eagerly
......@@ -33,8 +33,8 @@
(ts/with-source-db
(ts/create! Collection :name "Basic Collection" :entity_id eid1)
(reset! serialized (into [] (serdes.extract/extract-metabase {})))
(is (some (fn [{{:keys [type id]} :serdes/meta}]
(and (= type "Collection") (= id eid1)))
(is (some (fn [{{:keys [model id]} :serdes/meta}]
(and (= model "Collection") (= id eid1)))
@serialized))))
(testing "loading into an empty database succeeds"
......@@ -113,7 +113,7 @@
(serdes.hash/identity-hash @c2b)}
(->> @serialized
(map :serdes/meta)
(filter #(= "Collection" (:type %)))
(filter #(= "Collection" (:model %)))
(map :id)
set))))
......
(ns metabase-enterprise.serialization.v2.yaml-test
(:require [clojure.java.io :as io]
[clojure.test :refer :all]
[metabase-enterprise.serialization.test-util :as ts]
[metabase-enterprise.serialization.v2.extract :as extract]
[metabase-enterprise.serialization.v2.ingest :as ingest]
[metabase-enterprise.serialization.v2.ingest.yaml :as ingest.yaml]
[metabase-enterprise.serialization.v2.storage.yaml :as storage.yaml]
[metabase.models.collection :refer [Collection]]
[metabase.test.generate :as test-gen]
[reifyhealth.specmonstah.core :as rs]
[yaml.core :as yaml]))
(defn- dir->file-set [dir]
(->> dir
.listFiles
(filter #(.isFile %))
(map #(.getName %))
set))
(deftest basic-dump-test
(ts/with-random-dump-dir [dump-dir]
(ts/with-empty-h2-app-db
(ts/with-temp-dpc [Collection [parent {:name "Some Collection"}]
Collection [child {:name "Child Collection" :location (format "/%d/" (:id parent))}]]
(let [export (into [] (extract/extract-metabase nil))
parent-filename (format "%s+some_collection.yaml" (:entity_id parent))
child-filename (format "%s+child_collection.yaml" (:entity_id child))]
(storage.yaml/store! export dump-dir)
(testing "the right files in the right places"
(is (= #{parent-filename child-filename}
(dir->file-set (io/file dump-dir "Collection")))
"Entities go in subdirectories")
(is (= #{"settings.yaml"}
(dir->file-set (io/file dump-dir)))
"A few top-level files are expected"))
(testing "the Collections properly exported"
(is (= (-> (into {} (Collection (:id parent)))
(dissoc :id :location)
(assoc :parent_id nil))
(yaml/from-file (io/file dump-dir "Collection" parent-filename))))
(is (= (-> (into {} (Collection (:id child)))
(dissoc :id :location)
(assoc :parent_id (:entity_id parent)))
(yaml/from-file (io/file dump-dir "Collection" child-filename))))))))))
(deftest basic-ingest-test
(ts/with-random-dump-dir [dump-dir]
(io/make-parents dump-dir "Collection" "fake") ; Prepare the right directories.
(spit (io/file dump-dir "settings.yaml")
(yaml/generate-string {:some-key "with string value"
:another-key 7
:blank-key nil}))
(spit (io/file dump-dir "Collection" "fake-id+the_label.yaml")
(yaml/generate-string {:some "made up" :data "here"}))
(spit (io/file dump-dir "Collection" "no-label.yaml")
(yaml/generate-string {:some "other" :data "in this one"}))
(let [ingestable (ingest.yaml/ingest-yaml dump-dir)
meta-maps (into [] (ingest/ingest-list ingestable))
exp-files {{:model "Collection" :id "fake-id" :label "the_label"} {:some "made up" :data "here"}
{:model "Collection" :id "no-label"} {:some "other" :data "in this one"}
{:model "Setting" :id "some-key"} {:key :some-key :value "with string value"}
{:model "Setting" :id "another-key"} {:key :another-key :value 7}
{:model "Setting" :id "blank-key"} {:key :blank-key :value nil}}]
(testing "the right set of file is returned by ingest-list"
(is (= (set (keys exp-files))
(set meta-maps))))
(testing "individual reads in any order are correct"
(doseq [meta-map (->> exp-files
keys
(repeat 10)
(into [] cat)
shuffle)]
(is (= (-> exp-files
(get meta-map)
(assoc :serdes/meta meta-map))
(ingest/ingest-one ingestable meta-map))))))))
(deftest e2e-storage-ingestion-test
(ts/with-random-dump-dir [dump-dir]
(ts/with-empty-h2-app-db
(test-gen/insert! {:collection [[100 {:refs {:personal_owner_id ::rs/omit}}]]})
(let [extraction (into [] (extract/extract-metabase {}))
entities (reduce (fn [m {{:keys [model id]} :serdes/meta :as entity}]
(assoc-in m [model id] entity))
{} extraction)]
(is (= 100 (-> entities (get "Collection") vals count)))
(testing "storage"
(storage.yaml/store! (seq extraction) dump-dir)
(testing "for Collections"
(is (= 100 (count (dir->file-set (io/file dump-dir "Collection")))))
(doseq [{:keys [entity_id slug] :as coll} (vals (get entities "Collection"))
:let [filename (str entity_id "+" slug ".yaml")]]
(is (= (dissoc coll :serdes/meta)
(yaml/from-file (io/file dump-dir "Collection" filename))))))
(testing "for settings"
(is (= (into {} (for [{:keys [key value]} (vals (get entities "Setting"))]
[key value]))
(yaml/from-file (io/file dump-dir "settings.yaml"))))))
(testing "ingestion"
(let [ingestable (ingest.yaml/ingest-yaml dump-dir)]
(testing "ingest-list is accurate"
(is (= (into #{} (comp (map vals) cat (map :serdes/meta)) (vals entities))
(into #{} (ingest/ingest-list ingestable)))))
(testing "each entity matches its in-memory original"
(doseq [entity extraction]
(is (= entity (ingest/ingest-one ingestable (:serdes/meta entity))))))))))))
......@@ -61,7 +61,7 @@
(fn [model _] model))
(defmulti extract-query
"Performs the select query, possibly filtered, for all the entities of this type that should be serialized. Called
"Performs the select query, possibly filtered, for all the entities of this model that should be serialized. Called
from [[extract-all]]'s default implementation.
`(extract-query \"ModelName\" opts)`
......@@ -87,8 +87,8 @@
(defmulti extract-one
"Extracts a single entity retrieved from the database into a portable map with `:serdes/meta` attached.
The default implementation uses the model name as the `:type` and either `:entity_id` or [[serdes.hash/identity-hash]]
as the `:id`. It also strips off the database's numeric primary key.
The default implementation uses the model name as the `:model` and either `:entity_id` or
[[serdes.hash/identity-hash]] as the `:id`. It also strips off the database's numeric primary key.
That suffices for a few simple entities, but most entities will need to override this.
They should follow the pattern of:
......@@ -132,9 +132,9 @@
(let [model (db/resolve-model (symbol model-name))
pk (models/primary-key model)]
(-> entity
(assoc :serdes/meta {:type model-name
:id (or (:entity_id entity)
(serdes.hash/identity-hash (model (get entity pk))))})
(assoc :serdes/meta {:model model-name
:id (or (:entity_id entity)
(serdes.hash/identity-hash (model (get entity pk))))})
(dissoc pk))))
(defmethod extract-one :default [model-name entity]
......@@ -198,10 +198,10 @@
;;; +----------------------------------------------------------------------------------------------------------------+
;;; The Clojure maps from extraction and ingestion always include a special key `:serdes/meta` giving some information
;;; about the serialized entity. The value is always a map like:
;;; `{:type "ModelName" :id "entity ID or identity hash string" :label "Human-readable name"}`
;;; `:type` and `:id` are required; `:label` is optional.
;;; `{:model "ModelName" :id "entity ID or identity hash string" :label "Human-readable name"}`
;;; `:model` and `:id` are required; `:label` is optional.
;;;
;;; Many of the multimethods are keyed on the `:type` field.
;;; Many of the multimethods are keyed on the `:model` field.
(defmulti load-prescan-all
"Returns a reducible stream of `[entity_id identity-hash primary-key]` triples for the entire table.
......@@ -235,9 +235,9 @@
key]))
(defn- ingested-model
"The dispatch function for several of the load multimethods: dispatching on the type of the incoming entity."
"The dispatch function for several of the load multimethods: dispatching on the model of the incoming entity."
[ingested]
(-> ingested :serdes/meta :type))
(-> ingested :serdes/meta :model))
(defmulti serdes-dependencies
"Given an entity map as ingested (not a Toucan entity) returns a (possibly empty) list of its dependencies, where each
......
......@@ -151,7 +151,7 @@
(defmethod serdes.base/extract-all "Setting" [_model _opts]
(for [{:keys [key value]} (admin-writable-site-wide-settings
:getter (partial get-value-of-type :string))]
{:serdes/meta {:type "Setting" :id (name key)}
{:serdes/meta {:model "Setting" :id (name key)}
:key key
:value value}))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment