From 10a527db601b846afb83fbec35a7296149ca4a78 Mon Sep 17 00:00:00 2001 From: Howon Lee <hlee.howon@gmail.com> Date: Wed, 23 Mar 2022 01:11:06 -0700 Subject: [PATCH] Postgres JSON nested field columns (#21007) This is the first vertical slice of #708. There is a material ways to go, including mysql implementation, plinking away at the data model stuff, and frontend. There are also big putative sync speed gains I think I should chip away at. --- .../metabase/driver/googleanalytics_test.clj | 1 + .../test/metabase/driver/redshift_test.clj | 2 + resources/migrations/000_migrations.yaml | 15 +++++++ src/metabase/driver/postgres.clj | 23 ++++++++-- src/metabase/models/field.clj | 3 +- .../middleware/add_source_metadata.clj | 2 +- src/metabase/query_processor/store.clj | 2 + src/metabase/sync/fetch_metadata.clj | 8 ++++ src/metabase/sync/interface.clj | 1 + .../sync_metadata/fields/fetch_metadata.clj | 21 +++------ .../sync_metadata/fields/sync_instances.clj | 6 ++- test/metabase/driver/postgres_test.clj | 11 +++++ test/metabase/query_processor/async_test.clj | 1 + .../middleware/add_source_metadata_test.clj | 2 +- .../middleware/annotate_test.clj | 2 + test/metabase/query_processor_test.clj | 1 + .../nested_queries_test.clj | 4 +- .../fields/fetch_metadata_test.clj | 45 ------------------- test/metabase/test/mock/util.clj | 1 + 19 files changed, 81 insertions(+), 70 deletions(-) diff --git a/modules/drivers/googleanalytics/test/metabase/driver/googleanalytics_test.clj b/modules/drivers/googleanalytics/test/metabase/driver/googleanalytics_test.clj index ff5d155e679..236a65fb75c 100644 --- a/modules/drivers/googleanalytics/test/metabase/driver/googleanalytics_test.clj +++ b/modules/drivers/googleanalytics/test/metabase/driver/googleanalytics_test.clj @@ -301,6 +301,7 @@ :name "ga:eventLabel" :settings nil :source :breakout + :nfc_path nil :parent_id nil :visibility_type :normal :display_name "ga:eventLabel" diff --git a/modules/drivers/redshift/test/metabase/driver/redshift_test.clj b/modules/drivers/redshift/test/metabase/driver/redshift_test.clj index ae0509e3c50..bc7e85cbbb8 100644 --- a/modules/drivers/redshift/test/metabase/driver/redshift_test.clj +++ b/modules/drivers/redshift/test/metabase/driver/redshift_test.clj @@ -121,6 +121,7 @@ :settings nil :source :fields :field_ref [:field (mt/id :extsales :buyerid) nil] + :nfc_path nil :parent_id nil :id (mt/id :extsales :buyerid) :visibility_type :normal @@ -135,6 +136,7 @@ :settings nil :source :fields :field_ref [:field (mt/id :extsales :salesid) nil] + :nfc_path nil :parent_id nil :id (mt/id :extsales :salesid) :visibility_type :normal diff --git a/resources/migrations/000_migrations.yaml b/resources/migrations/000_migrations.yaml index 76f08148d00..94743f5fc82 100644 --- a/resources/migrations/000_migrations.yaml +++ b/resources/migrations/000_migrations.yaml @@ -11238,6 +11238,21 @@ databaseChangeLog: AND db.object = p.object WHERE p.object IS NULL; + - changeSet: + id: v43.00-043 + author: howonlee + comment: Added 0.43.0 - Nested field columns in fields + changes: + - addColumn: + columns: + - column: + remarks: Nested field column paths, flattened + name: nfc_path + type: varchar(254) + constraints: + nullable: true + tableName: metabase_field + - changeSet: id: v43.00-044 author: noahmoss diff --git a/src/metabase/driver/postgres.clj b/src/metabase/driver/postgres.clj index c2a75eca49b..8b3c0d68342 100644 --- a/src/metabase/driver/postgres.clj +++ b/src/metabase/driver/postgres.clj @@ -32,6 +32,8 @@ (driver/register! :postgres, :parent :sql-jdbc) +(defmethod driver/database-supports? [:postgres :nested-field-columns] [_ _ _] true) + ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | metabase.driver impls | ;;; +----------------------------------------------------------------------------------------------------------------+ @@ -342,14 +344,27 @@ (pretty [_] (format "%s::%s" (pr-str expr) (name psql-type))))) +(defn- json-query [identifier nfc-path] + (letfn [(handle-name [x] (if (number? x) (str x) (name x)))] + (apply hsql/call [:json_extract_path_text + (hx/cast :json (keyword (first nfc-path))) + (mapv #(hx/cast :text (handle-name %)) (rest nfc-path))]))) + (defmethod sql.qp/->honeysql [:postgres :field] [driver [_ id-or-name _opts :as clause]] - (let [{database-type :database_type} (when (integer? id-or-name) - (qp.store/field id-or-name)) + (let [stored-field (when (integer? id-or-name) + (qp.store/field id-or-name)) parent-method (get-method sql.qp/->honeysql [:sql :field]) - identifier (parent-method driver clause)] - (if (= database-type "money") + identifier (parent-method driver clause) + nfc-path (:nfc_path stored-field)] + (cond + (= (:database_type stored-field) "money") (pg-conversion identifier :numeric) + + (some? nfc-path) + (json-query identifier nfc-path) + + :else identifier))) (defmethod unprepare/unprepare-value [:postgres Date] diff --git a/src/metabase/models/field.clj b/src/metabase/models/field.clj index 0569206769d..1054532fb05 100644 --- a/src/metabase/models/field.clj +++ b/src/metabase/models/field.clj @@ -172,7 +172,8 @@ :visibility_type :keyword :has_field_values :keyword :fingerprint :json-for-fingerprints - :settings :json}) + :settings :json + :nfc_path :json}) :properties (constantly {:timestamped? true}) :pre-insert pre-insert}) diff --git a/src/metabase/query_processor/middleware/add_source_metadata.clj b/src/metabase/query_processor/middleware/add_source_metadata.clj index 0083abe5f6d..2b3024964a5 100644 --- a/src/metabase/query_processor/middleware/add_source_metadata.clj +++ b/src/metabase/query_processor/middleware/add_source_metadata.clj @@ -56,7 +56,7 @@ :query (assoc-in source-query [:middleware :disable-remaps?] true)}))] (for [col cols] (select-keys col [:name :id :table_id :display_name :base_type :effective_type :coercion_strategy - :semantic_type :unit :fingerprint :settings :source_alias :field_ref :parent_id]))) + :semantic_type :unit :fingerprint :settings :source_alias :field_ref :nfc_path :parent_id]))) (catch Throwable e (log/error e (str (trs "Error determining expected columns for query: {0}" (ex-message e)))) nil))) diff --git a/src/metabase/query_processor/store.clj b/src/metabase/query_processor/store.clj index 934b3a63bea..84b4e01544c 100644 --- a/src/metabase/query_processor/store.clj +++ b/src/metabase/query_processor/store.clj @@ -96,6 +96,7 @@ :fingerprint :id :name + :nfc_path :parent_id :semantic_type :settings @@ -117,6 +118,7 @@ :semantic_type (s/maybe su/FieldSemanticOrRelationType) :fingerprint (s/maybe su/Map) :parent_id (s/maybe su/IntGreaterThanZero) + :nfc_path (s/maybe [su/NonBlankString]) s/Any s/Any})) diff --git a/src/metabase/sync/fetch_metadata.clj b/src/metabase/sync/fetch_metadata.clj index 1841b1051c5..e5f2b1eae14 100644 --- a/src/metabase/sync/fetch_metadata.clj +++ b/src/metabase/sync/fetch_metadata.clj @@ -3,6 +3,7 @@ tables, schemas, and fields, and their types. For example, with SQL databases, these functions use the JDBC DatabaseMetaData to get this information." (:require [metabase.driver :as driver] + [metabase.driver.sql-jdbc.sync :as sql-jdbc.sync] [metabase.driver.util :as driver.u] [metabase.sync.interface :as i] [schema.core :as s])) @@ -23,3 +24,10 @@ (let [driver (driver.u/database->driver database)] (when (driver/supports? driver :foreign-keys) (driver/describe-table-fks driver database table)))) + +(s/defn nfc-metadata :- (s/maybe #{i/TableMetadataField}) + "Get information about the nested field column fields within `table`." + [database :- i/DatabaseInstance, table :- i/TableInstance] + (let [driver (driver.u/database->driver database)] + (when (driver/supports? driver :nested-field-columns) + (sql-jdbc.sync/describe-nested-field-columns driver database table)))) diff --git a/src/metabase/sync/interface.clj b/src/metabase/sync/interface.clj index bfab0baeb2f..19c3f9f252e 100644 --- a/src/metabase/sync/interface.clj +++ b/src/metabase/sync/interface.clj @@ -32,6 +32,7 @@ (s/optional-key :field-comment) (s/maybe su/NonBlankString) (s/optional-key :pk?) s/Bool (s/optional-key :nested-fields) #{(s/recursive #'TableMetadataField)} + (s/optional-key :nfc-path) [s/Any] (s/optional-key :custom) {s/Any s/Any} ;; for future backwards compatability, when adding things s/Keyword s/Any}) diff --git a/src/metabase/sync/sync_metadata/fields/fetch_metadata.clj b/src/metabase/sync/sync_metadata/fields/fetch_metadata.clj index 5080a5aa2b2..fe57f568d94 100644 --- a/src/metabase/sync/sync_metadata/fields/fetch_metadata.clj +++ b/src/metabase/sync/sync_metadata/fields/fetch_metadata.clj @@ -3,7 +3,9 @@ about Fields in a Table, and for fetching the DB metadata itself. This metadata is used by the logic in other `metabase.sync.sync-metadata.fields.*` namespaces to determine what sync operations need to be performed by comparing the differences in the two sets of Metadata." - (:require [medley.core :as m] + (:require [clojure.set :as set] + [medley.core :as m] + [metabase.driver :as driver] [metabase.models.field :as field :refer [Field]] [metabase.models.table :as table] [metabase.sync.fetch-metadata :as fetch-metadata] @@ -48,17 +50,6 @@ (assoc metabase-field :nested-fields (set (for [nested-field nested-fields] (add-nested-fields nested-field parent-id->fields))))))) -(s/defn ^:private add-nested-field-columns :- common/TableMetadataFieldWithID - "Nested field columns are flattened, unlike the ordinary nested fields. - Add the pertinent nested fields to the parent column" - [mb-field :- common/TableMetadataFieldWithID, - nfc-fields :- #{common/TableMetadataFieldWithID}] - (let [column-fields (filter #(= (keyword (mb-field :name)) - (get-in % [:nfc-path 0])) nfc-fields)] - (if-not (seq column-fields) - mb-field - (assoc mb-field :nested-fields (set column-fields))))) - (s/defn fields->our-metadata :- #{common/TableMetadataFieldWithID} "Given a sequence of Metabase Fields, format them and return them in a hierachy so the format matches the one `db-metadata` comes back in." @@ -75,7 +66,7 @@ "Fetch active Fields from the Metabase application database for a given `table`." [table :- i/TableInstance] (db/select [Field :name :database_type :base_type :effective_type :coercion_strategy :semantic_type - :parent_id :id :description :database_position] + :parent_id :id :description :database_position :nfc_path] :table_id (u/the-id table) :active true {:order-by table/field-order-rule})) @@ -94,4 +85,6 @@ "Fetch metadata about Fields belonging to a given `table` directly from an external database by calling its driver's implementation of `describe-table`." [database :- i/DatabaseInstance table :- i/TableInstance] - (:fields (fetch-metadata/table-metadata database table))) + (cond-> (:fields (fetch-metadata/table-metadata database table)) + (driver/database-supports? (:engine database) :nested-field-columns database) + (set/union (fetch-metadata/nfc-metadata database table)))) diff --git a/src/metabase/sync/sync_metadata/fields/sync_instances.clj b/src/metabase/sync/sync_metadata/fields/sync_instances.clj index 7207bb706eb..8163a94ad8d 100644 --- a/src/metabase/sync/sync_metadata/fields/sync_instances.clj +++ b/src/metabase/sync/sync_metadata/fields/sync_instances.clj @@ -39,7 +39,7 @@ [table :- i/TableInstance, new-field-metadatas :- [i/TableMetadataField], parent-id :- common/ParentID] (when (seq new-field-metadatas) (db/insert-many! Field - (for [{:keys [database-type base-type effective-type coercion-strategy field-comment database-position], field-name :name :as field} new-field-metadatas] + (for [{:keys [database-type base-type effective-type coercion-strategy field-comment database-position nfc-path], field-name :name :as field} new-field-metadatas] (do (when (and effective-type base-type @@ -62,6 +62,7 @@ :coercion_strategy (when effective-type coercion-strategy) :semantic_type (common/semantic-type field) :parent_id parent-id + :nfc_path nfc-path :description field-comment :position database-position :database_position database-position}))))) @@ -174,7 +175,8 @@ (s/defn ^:private sync-nested-field-instances! :- (s/maybe su/IntGreaterThanOrEqualToZero) "Recursively sync Field instances (i.e., rows in application DB) for *all* the nested Fields of all Fields in - `db-metadata` and `our-metadata`." + `db-metadata` and `our-metadata`. + Not for the flattened nested fields for JSON columns in normal RDBMSes (nested field columns)" [table :- i/TableInstance db-metadata :- #{i/TableMetadataField} our-metadata :- #{common/TableMetadataFieldWithID}] diff --git a/test/metabase/driver/postgres_test.clj b/test/metabase/driver/postgres_test.clj index d0bf486a717..9226cbaaaba 100644 --- a/test/metabase/driver/postgres_test.clj +++ b/test/metabase/driver/postgres_test.clj @@ -285,6 +285,17 @@ (is (= :type/SerializedJSON (db/select-one-field :semantic_type Field, :id (mt/id :venues :address)))))))) +(deftest json-query-test + (testing "Transforming MBQL query with JSON in it to postgres query works" + (is (= ["json_extract_path_text(CAST(bleh AS json), (CAST(? AS text)))" "meh"] + (hsql/format (#'postgres/json-query 'mlep [:bleh :meh]))))) + (testing "What if types are weird and we have lists" + (is (= ["json_extract_path_text(CAST(bleh AS json), (CAST(? AS text), CAST(? AS text), CAST(? AS text)))" + "meh" + "foobar" + "1234"] + (hsql/format (#'postgres/json-query 'mlep '(:bleh "meh" :foobar 1234))))))) + (deftest describe-nested-field-columns-test (mt/test-driver :postgres (testing "flattened-row" diff --git a/test/metabase/query_processor/async_test.clj b/test/metabase/query_processor/async_test.clj index 3bf1ecbe5f8..88e48dbc717 100644 --- a/test/metabase/query_processor/async_test.clj +++ b/test/metabase/query_processor/async_test.clj @@ -28,6 +28,7 @@ :table_id (mt/id :venues) :settings nil :source :fields + :nfc_path nil :parent_id nil :visibility_type :normal :id (mt/id :venues :name) diff --git a/test/metabase/query_processor/middleware/add_source_metadata_test.clj b/test/metabase/query_processor/middleware/add_source_metadata_test.clj index e340b6f3cb4..99bcf3a4905 100644 --- a/test/metabase/query_processor/middleware/add_source_metadata_test.clj +++ b/test/metabase/query_processor/middleware/add_source_metadata_test.clj @@ -17,7 +17,7 @@ (select-keys col [:id :table_id :name :display_name :base_type :effective_type :coercion_strategy - :semantic_type :unit :fingerprint :settings :field_ref :parent_id]))) + :semantic_type :unit :fingerprint :settings :field_ref :nfc_path :parent_id]))) (defn- venues-source-metadata ([] diff --git a/test/metabase/query_processor/middleware/annotate_test.clj b/test/metabase/query_processor/middleware/annotate_test.clj index 94f75e96e1b..68f1f24e86d 100644 --- a/test/metabase/query_processor/middleware/annotate_test.clj +++ b/test/metabase/query_processor/middleware/annotate_test.clj @@ -220,6 +220,7 @@ :name "parent.child" :settings nil :field_ref [:field (u/the-id child) nil] + :nfc_path nil :parent_id (u/the-id parent) :id (u/the-id child) :visibility_type :normal @@ -241,6 +242,7 @@ :name "grandparent.parent.child" :settings nil :field_ref [:field (u/the-id child) nil] + :nfc_path nil :parent_id (u/the-id parent) :id (u/the-id child) :visibility_type :normal diff --git a/test/metabase/query_processor_test.clj b/test/metabase/query_processor_test.clj index 79d7bade959..b25aec98cc2 100644 --- a/test/metabase/query_processor_test.clj +++ b/test/metabase/query_processor_test.clj @@ -69,6 +69,7 @@ {:description nil :visibility_type :normal :settings nil + :nfc_path nil :parent_id nil :source :fields}) diff --git a/test/metabase/query_processor_test/nested_queries_test.clj b/test/metabase/query_processor_test/nested_queries_test.clj index 26e16985f51..e6352720ee8 100644 --- a/test/metabase/query_processor_test/nested_queries_test.clj +++ b/test/metabase/query_processor_test/nested_queries_test.clj @@ -70,7 +70,7 @@ native-source? (-> (assoc :field_ref [:field "PRICE" {:base-type :type/Integer}] :effective_type :type/Integer) - (dissoc :description :parent_id :visibility_type)) + (dissoc :description :parent_id :nfc_path :visibility_type)) (not has-source-metadata?) (dissoc :id :semantic_type :settings :fingerprint :table_id :coercion_strategy)) @@ -446,7 +446,7 @@ ;; because this field literal comes from a native query that does not include `:source-metadata` it won't have ;; the usual extra keys (dissoc :semantic_type :coercion_strategy :table_id - :id :settings :fingerprint)) + :id :settings :fingerprint :nfc_path)) (qp.test/aggregate-col :count)] (mt/cols (mt/with-temp Card [card {:dataset_query {:database (mt/id) diff --git a/test/metabase/sync/sync_metadata/fields/fetch_metadata_test.clj b/test/metabase/sync/sync_metadata/fields/fetch_metadata_test.clj index 6a9e446682d..05835d46b30 100644 --- a/test/metabase/sync/sync_metadata/fields/fetch_metadata_test.clj +++ b/test/metabase/sync/sync_metadata/fields/fetch_metadata_test.clj @@ -11,51 +11,6 @@ [metabase.util :as u] [toucan.db :as db])) -(deftest add-nested-field-columns-test - (testing "adds nested field columns for one field" - (let [our-field {:name "coherent_json_val" - :base-type :type/* - :database-type "blah", - :database-position 0, - :id 1} - nfc-fields '#{{:name "incoherent_json_val → b", - :database-type "blah", - :base-type :type/*, - :database-position 0, - :id 2, - :nfc-path [:incoherent_json_val "b"]} - {:name "coherent_json_val → a", - :database-type "blah", - :base-type :type/*, - :database-position 0, - :id 3, - :nfc-path [:coherent_json_val "a"]} - {:name "coherent_json_val → b", - :database-type "blah", - :base-type :type/*, - :database-position 0, - :id 4, - :nfc-path [:coherent_json_val "b"]}}] - (is (= (#'sync-fields.fetch-metadata/add-nested-field-columns our-field nfc-fields) - {:name "coherent_json_val", - :base-type :type/*, - :database-type "blah", - :database-position 0, - :id 1, - :nested-fields - #{{:name "coherent_json_val → b", - :database-type "blah", - :base-type :type/*, - :database-position 0, - :id 4, - :nfc-path [:coherent_json_val "b"]} - {:name "coherent_json_val → a", - :database-type "blah", - :base-type :type/*, - :database-position 0, - :id 3, - :nfc-path [:coherent_json_val "a"]}}}))))) - ;; `our-metadata` should match up with what we have in the DB (deftest does-metadata-match-test (mt/with-temp Database [db {:engine ::toucanery/toucanery}] diff --git a/test/metabase/test/mock/util.clj b/test/metabase/test/mock/util.clj index 2883f4e00fa..9e19dcd8ff3 100644 --- a/test/metabase/test/mock/util.clj +++ b/test/metabase/test/mock/util.clj @@ -24,6 +24,7 @@ :fk_target_field_id false :updated_at true :active true + :nfc_path nil :parent_id false :semantic_type nil :id true -- GitLab