diff --git a/enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj b/enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj index 8b96c90184904f60a8eec28d3583e01ae57f6452..df63f2e6a0cc8b5083e4cce83d8ce4fd511e96e2 100644 --- a/enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj +++ b/enterprise/backend/test/metabase_enterprise/serialization/v2/extract_test.clj @@ -1,5 +1,6 @@ (ns metabase-enterprise.serialization.v2.extract-test - (:require [clojure.test :refer :all] + (:require [cheshire.core :as json] + [clojure.test :refer :all] [metabase-enterprise.serialization.test-util :as ts] [metabase-enterprise.serialization.v2.extract :as extract] [metabase.models :refer [Card Collection Dashboard DashboardCard Database Dimension Field Metric @@ -98,16 +99,23 @@ Database [{db-id :id} {:name "My Database"}] Table [{no-schema-id :id} {:name "Schemaless Table" :db_id db-id}] + Field [{field-id :id} {:name "Some Field" :table_id no-schema-id}] Table [{schema-id :id} {:name "Schema'd Table" :db_id db-id :schema "PUBLIC"}] Card [{c1-id :id - c1-eid :entity_id} {:name "Some Question" + c1-eid :entity_id + :as c1} {:name "Some Question" :database_id db-id :table_id no-schema-id :collection_id coll-id :creator_id mark-id - :dataset_query "{\"json\": \"string values\"}"}] + :dataset_query + (json/generate-string + {:query {:source-table no-schema-id + :filter [:>= [:field field-id nil] 18] + :aggregation [[:count]]} + :database db-id})}] Card [{c2-id :id c2-eid :entity_id} {:name "Second Question" :database_id db-id @@ -134,16 +142,23 @@ :table_id (s/eq ["My Database" nil "Schemaless Table"]) :creator_id (s/eq "mark@direstrai.ts") :collection_id (s/eq coll-eid) - :dataset_query (s/eq "{\"json\": \"string values\"}") + :dataset_query (s/eq {:query {:source-table ["My Database" nil "Schemaless Table"] + :filter [">=" [:field ["My Database" nil "Schemaless Table" "Some Field"] nil] 18] + :aggregation [[:count]]} + :database "My Database"}) :created_at LocalDateTime (s/optional-key :updated_at) LocalDateTime s/Keyword s/Any} ser)) (is (not (contains? ser :id))) - (testing "cards depend on their Table and Collection" - (is (= #{[{:model "Database" :id "My Database"} + (testing "cards depend on their Table and Collection, and also anything referenced in the query" + (is (= #{[{:model "Database" :id "My Database"}] + [{:model "Database" :id "My Database"} {:model "Table" :id "Schemaless Table"}] + [{:model "Database" :id "My Database"} + {:model "Table" :id "Schemaless Table"} + {:model "Field" :id "Some Field"}] [{:model "Collection" :id coll-eid}]} (set (serdes.base/serdes-dependencies ser)))))) @@ -152,7 +167,7 @@ :table_id (s/eq ["My Database" "PUBLIC" "Schema'd Table"]) :creator_id (s/eq "mark@direstrai.ts") :collection_id (s/eq coll-eid) - :dataset_query (s/eq "{}") ; Undecoded, still a string. + :dataset_query (s/eq {}) :created_at LocalDateTime (s/optional-key :updated_at) LocalDateTime s/Keyword s/Any} diff --git a/enterprise/backend/test/metabase_enterprise/serialization/v2/load_test.clj b/enterprise/backend/test/metabase_enterprise/serialization/v2/load_test.clj index 401562b9607288967f52963fc332df5a595a0676..2079e194bfbe0df4eb7c4f5c7e0a95d561395e89 100644 --- a/enterprise/backend/test/metabase_enterprise/serialization/v2/load_test.clj +++ b/enterprise/backend/test/metabase_enterprise/serialization/v2/load_test.clj @@ -4,7 +4,8 @@ [metabase-enterprise.serialization.v2.extract :as serdes.extract] [metabase-enterprise.serialization.v2.ingest :as serdes.ingest] [metabase-enterprise.serialization.v2.load :as serdes.load] - [metabase.models :refer [Collection Database Pulse PulseChannel PulseChannelRecipient Table User]] + [metabase.models :refer [Card Collection Database Field Pulse PulseChannel PulseChannelRecipient Table + User]] [metabase.models.serialization.base :as serdes.base] [metabase.models.serialization.hash :as serdes.hash] [toucan.db :as db])) @@ -254,3 +255,91 @@ (is (= 3 (db/count PulseChannelRecipient))) (is (= #{(:id @u1d) (:id @u2d) (:id @u3d)} (db/select-field :user_id PulseChannelRecipient))))))))) + +(deftest card-dataset-query-test + ;; Card.dataset_query is a JSON-encoded MBQL query, which contain database, table, and field IDs - these need to be + ;; converted to a portable form and read back in. + ;; This test has a database, table and fields, that exist on both sides with different IDs, and expects a card that + ;; references those fields to be correctly loaded with the dest IDs. + (testing "embedded MBQL in Card :dataset-query is portable" + (let [serialized (atom nil) + coll1s (atom nil) + db1s (atom nil) + table1s (atom nil) + field1s (atom nil) + card1s (atom nil) + user1s (atom nil) + db1d (atom nil) + table1d (atom nil) + field1d (atom nil) + user1d (atom nil) + card1d (atom nil) + db2d (atom nil) + table2d (atom nil) + field2d (atom nil)] + + + (ts/with-source-and-dest-dbs + (testing "serializing the original database, table, field and card" + (ts/with-source-db + (reset! coll1s (ts/create! Collection :name "pop! minis")) + (reset! db1s (ts/create! Database :name "my-db")) + (reset! table1s (ts/create! Table :name "customers" :db_id (:id @db1s))) + (reset! field1s (ts/create! Field :name "age" :table_id (:id @table1s))) + (reset! user1s (ts/create! User :first_name "Tom" :last_name "Scholz" :email "tom@bost.on")) + (reset! card1s (ts/create! Card + :database_id (:id @db1s) + :table_id (:id @table1s) + :collection_id (:id @coll1s) + :creator_id (:id @user1s) + :query_type :query + :name "Example Card" + :dataset_query {:type :query + :query {:source-table (:id @table1s) + :filter [:>= [:field (:id @field1s) nil] 18] + :aggregation [[:count]]} + :database (:id @db1s)} + :display :line)) + (reset! serialized (into [] (serdes.extract/extract-metabase {}))))) + + (testing "the serialized form is as desired" + (is (= {:type "query" + :query {:source-table ["my-db" nil "customers"] + :filter [">=" [:field ["my-db" nil "customers" "age"] nil] 18] + :aggregation [[:count]]} + :database "my-db"} + (->> (by-model @serialized "Card") + first + :dataset_query)))) + + (testing "deserializing adjusts the IDs properly" + (ts/with-dest-db + ;; A different database and tables, so the IDs don't match. + (reset! db2d (ts/create! Database :name "other-db")) + (reset! table2d (ts/create! Table :name "orders" :db_id (:id @db2d))) + (reset! field2d (ts/create! Field :name "subtotal" :table_id (:id @table2d))) + (reset! user1d (ts/create! User :first_name "Tom" :last_name "Scholz" :email "tom@bost.on")) + + ;; Load the serialized content. + (serdes.load/load-metabase (ingestion-in-memory @serialized)) + + ;; Fetch the relevant bits + (reset! db1d (db/select-one Database :name "my-db")) + (reset! table1d (db/select-one Table :name "customers")) + (reset! field1d (db/select-one Field :table_id (:id @table1d) :name "age")) + (reset! card1d (db/select-one Card :name "Example Card")) + + (testing "the main Database, Table, and Field have different IDs now" + (is (not= (:id @db1s) (:id @db1d))) + (is (not= (:id @table1s) (:id @table1d))) + (is (not= (:id @field1s) (:id @field1d)))) + + (is (not= (:dataset_query @card1s) + (:dataset_query @card1d))) + (testing "the Card's query is based on the new Database, Table, and Field IDs" + (is (= {:type :query + :query {:source-table (:id @table1d) + :filter [:>= [:field (:id @field1d) nil] 18] + :aggregation [[:count]]} + :database (:id @db1d)} + (:dataset_query @card1d)))))))))) diff --git a/src/metabase/models/card.clj b/src/metabase/models/card.clj index 71bb1f5d8d75e1d033ed16780bf5e4d69f48f60b..197e5e0fa8f7d3c2c9c1e3984aea1a87b956a0b9 100644 --- a/src/metabase/models/card.clj +++ b/src/metabase/models/card.clj @@ -324,7 +324,9 @@ (update :database_id serdes.util/export-fk-keyed 'Database :name) (update :table_id serdes.util/export-table-fk) (update :collection_id serdes.util/export-fk 'Collection) - (update :creator_id serdes.util/export-fk-keyed 'User :email))) + (update :creator_id serdes.util/export-fk-keyed 'User :email) + (update :dataset_query serdes.util/export-json-mbql) + (dissoc :result_metadata))) ; Not portable, and can be rebuilt on the other side. (defmethod serdes.base/load-xform "Card" [card] @@ -333,10 +335,12 @@ (update :database_id serdes.util/import-fk-keyed 'Database :name) (update :table_id serdes.util/import-table-fk) (update :creator_id serdes.util/import-fk-keyed 'User :email) - (update :collection_id serdes.util/import-fk 'Collection))) + (update :collection_id serdes.util/import-fk 'Collection) + (update :dataset_query serdes.util/import-json-mbql))) (defmethod serdes.base/serdes-dependencies "Card" - [{:keys [collection_id table_id]}] + [{:keys [collection_id dataset_query table_id]}] ;; The Table implicitly depends on the Database. - [(serdes.util/table->path table_id) - [{:model "Collection" :id collection_id}]]) + (into [] (set/union #{(serdes.util/table->path table_id) + [{:model "Collection" :id collection_id}]} + (serdes.util/mbql-deps dataset_query)))) diff --git a/src/metabase/models/serialization/util.clj b/src/metabase/models/serialization/util.clj index 7e0d29beefd8cfebff6713cbad99c85c27b911bb..964d3225262a4e674669e89bef6073e42fddedf1 100644 --- a/src/metabase/models/serialization/util.clj +++ b/src/metabase/models/serialization/util.clj @@ -2,7 +2,16 @@ "Helpers intended to be shared by various models. Most of these are common operations done while (de)serializing several models, like handling a foreign key on a Table or user." - (:require [metabase.models.serialization.base :as serdes.base] + (:require [cheshire.core :as json] + [clojure.core.match :refer [match]] + [clojure.set :as set] + [clojure.string :as str] + [medley.core :as m] + [metabase.mbql.normalize :as mbql.normalize] + [metabase.mbql.schema :as mbql.s] + [metabase.mbql.util :as mbql.u] + [metabase.models.serialization.base :as serdes.base] + [metabase.shared.models.visualization-settings :as mb.viz] [toucan.db :as db] [toucan.models :as models])) @@ -66,7 +75,7 @@ (defn import-table-fk "Given a `table_id` as exported by [[export-table-fk]], resolve it back into a numeric `table_id`." [[db-name schema table-name]] - (db/select-one-field :id 'Table :name table-name :schema schema :db_id (db/select-one-id 'Database :name db-name))) + (db/select-one-field :id 'Table :name table-name :schema schema :db_id (db/select-one-field :id 'Database :name db-name))) (defn table->path "Given a `table_id` as exported by [[export-table-fk]], turn it into a `[{:model ...}]` path for the Table. @@ -100,3 +109,153 @@ (when schema {:model "Schema" :id schema}) {:model "Table" :id table-name} {:model "Field" :id field-name}])) + +;; ---------------------------------------------- JSON-encoded MBQL -------------------------------------------------- +(defn- mbql-entity-reference? + "Is given form an MBQL entity reference?" + [form] + (mbql.normalize/is-clause? #{:field :field-id :fk-> :metric :segment} form)) + +(defn- mbql-id->fully-qualified-name + [mbql] + (-> mbql + mbql.normalize/normalize-tokens + (mbql.u/replace + ;; `integer?` guard is here to make the operation idempotent + [:field (id :guard integer?) opts] + [:field (export-field-fk id) (mbql-id->fully-qualified-name opts)] + + ;; field-id is still used within parameter mapping dimensions + ;; example relevant clause - [:dimension [:fk-> [:field-id 1] [:field-id 2]]] + [:field-id (id :guard integer?)] + [:field-id (export-field-fk id)] + + {:source-table (id :guard integer?)} + (assoc &match :source-table (export-table-fk id)) + + ;; source-field is also used within parameter mapping dimensions + ;; example relevant clause - [:field 2 {:source-field 1}] + {:source-field (id :guard integer?)} + (assoc &match :source-field (export-field-fk id)) + + [:metric (id :guard integer?)] + [:metric (export-fk id 'Metric)] + + [:segment (id :guard integer?)] + [:segment (export-fk id 'Segment)]))) + +(defn- ids->fully-qualified-names + [entity] + (mbql.u/replace entity + mbql-entity-reference? + (mbql-id->fully-qualified-name &match) + + map? + (as-> &match entity + (m/update-existing entity :database (fn [db-id] + (if (= db-id mbql.s/saved-questions-virtual-database-id) + "database/__virtual" + (db/select-one-field :name 'Database :id db-id)))) + (m/update-existing entity :card_id #(export-fk % 'Card)) ; attibutes that refer to db fields use _ + (m/update-existing entity :card-id #(export-fk % 'Card)) ; template-tags use dash + (m/update-existing entity :source-table (fn [source-table] + (if (and (string? source-table) + (str/starts-with? source-table "card__")) + (export-fk (-> source-table + (str/split #"__") + second + Integer/parseInt) + 'Card) + (export-table-fk source-table)))) + (m/update-existing entity :breakout (fn [breakout] + (map mbql-id->fully-qualified-name breakout))) + (m/update-existing entity :aggregation (fn [aggregation] + (m/map-vals mbql-id->fully-qualified-name aggregation))) + (m/update-existing entity :filter (fn [filter] + (m/map-vals mbql-id->fully-qualified-name filter))) + (m/update-existing entity ::mb.viz/param-mapping-source export-field-fk) + (m/update-existing entity :snippet-id export-fk 'NativeQuerySnippet) + (m/map-vals ids->fully-qualified-names entity)))) + +(defn export-json-mbql + "Given a JSON string with an MBQL expression inside it, convert it to an EDN structure and turn the non-portable + Database, Table and Field IDs inside it into portable references. Returns it as an EDN structure, which is more + human-fiendly in YAML." + [encoded] + (-> encoded + (json/parse-string true) + ids->fully-qualified-names)) + +(defn- mbql-fully-qualified-names->ids* + [entity] + (mbql.u/replace entity + ;; handle legacy `:field-id` forms encoded prior to 0.39.0 + ;; and also *current* expresion forms used in parameter mapping dimensions + ;; example relevant clause - [:dimension [:fk-> [:field-id 1] [:field-id 2]]] + [:field-id (fully-qualified-name :guard string?)] + (mbql-fully-qualified-names->ids* [:field fully-qualified-name nil]) + + [:field (fully-qualified-name :guard vector?) opts] + [:field (import-field-fk fully-qualified-name) (mbql-fully-qualified-names->ids* opts)] + + ;; source-field is also used within parameter mapping dimensions + ;; example relevant clause - [:field 2 {:source-field 1}] + {:source-field (fully-qualified-name :guard vector?)} + (assoc &match :source-field (import-field-fk fully-qualified-name)) + + {:database (fully-qualified-name :guard string?)} + (-> &match + (assoc :database (db/select-one-id 'Database :name fully-qualified-name)) + mbql-fully-qualified-names->ids*) ; Process other keys + + [:metric (fully-qualified-name :guard serdes.base/entity-id?)] + [:metric (import-fk fully-qualified-name 'Metric)] + + [:segment (fully-qualified-name :guard serdes.base/entity-id?)] + [:segment (import-fk fully-qualified-name 'Segment)] + + (_ :guard (every-pred map? #(vector? (:source-table %)))) + (-> &match + (assoc :source-table (import-table-fk (:source-table &match))) + mbql-fully-qualified-names->ids*))) ;; process other keys + +(defn- mbql-fully-qualified-names->ids + [entity] + (mbql-fully-qualified-names->ids* entity)) + +(defn import-json-mbql + "Given an MBQL expression as an EDN structure with portable IDs embedded, convert the IDs back to raw numeric IDs + and then convert the result back into a JSON string." + [exported] + (-> exported + mbql-fully-qualified-names->ids + json/generate-string)) + +(declare ^:private mbql-deps-map) + +(defn- mbql-deps-vector [entity] + (match entity + [:field (field :guard vector?) tail] (into #{(field->path field)} (mbql-deps-map tail)) + :else (reduce #(cond + (map? %2) (into %1 (mbql-deps-map %2)) + (vector? %2) (into %1 (mbql-deps-vector %2)) + :else %1) + #{} + entity))) + +(defn- mbql-deps-map [entity] + (->> (for [[k v] entity] + (cond + (and (= k :database) (string? v)) #{[{:model "Database" :id v}]} + (and (= k :source-table) (vector? v)) #{(table->path v)} + (and (= k :source-field) (vector? v)) #{(field->path v)} + (map? v) (mbql-deps-map v) + (vector? v) (mbql-deps-vector v))) + (reduce set/union #{}))) + +(defn mbql-deps + "Given an MBQL expression as exported, with qualified names like `[\"some-db\" \"schema\" \"table_name\"]` instead of + raw IDs, return the corresponding set of serdes-dependencies. The query can't be imported until all the referenced + databases, tables and fields are loaded." + [entity] + (mbql-deps-map entity)) ;; process other keys