From 37b50677ed0b029708d6aa3b9db797f1b6a66e64 Mon Sep 17 00:00:00 2001 From: Cal Herries <39073188+calherries@users.noreply.github.com> Date: Thu, 14 Mar 2024 13:54:29 +0200 Subject: [PATCH] Faster sync-fields (#38828) --- .clj-kondo/config.edn | 2 +- docs/developers-guide/driver-changelog.md | 10 ++ .../redshift/src/metabase/driver/redshift.clj | 62 +++++++++--- .../test/metabase/driver/redshift_test.clj | 10 +- src/metabase/driver.clj | 33 +++++-- src/metabase/driver/postgres.clj | 11 ++- src/metabase/driver/sql_jdbc.clj | 4 + src/metabase/driver/sql_jdbc/sync.clj | 2 + .../driver/sql_jdbc/sync/describe_table.clj | 98 +++++++++++++------ src/metabase/sync/fetch_metadata.clj | 18 +++- src/metabase/sync/interface.clj | 21 ++-- src/metabase/sync/sync_metadata/fields.clj | 57 ++++++++--- .../{fetch_metadata.clj => our_metadata.clj} | 10 +- .../sync_metadata/fields/sync_instances.clj | 4 +- src/metabase/sync/util.clj | 3 +- src/metabase/util/malli/registry.cljc | 5 + src/metabase/util/malli/schema.clj | 7 ++ .../sql_jdbc/sync/describe_table_test.clj | 64 ++++++++++-- test/metabase/driver/sql_jdbc_test.clj | 14 +++ ...etadata_test.clj => our_metadata_test.clj} | 6 +- 20 files changed, 328 insertions(+), 113 deletions(-) rename src/metabase/sync/sync_metadata/fields/{fetch_metadata.clj => our_metadata.clj} (94%) rename test/metabase/sync/sync_metadata/fields/{fetch_metadata_test.clj => our_metadata_test.clj} (96%) diff --git a/.clj-kondo/config.edn b/.clj-kondo/config.edn index dc496940010..22a0d858140 100644 --- a/.clj-kondo/config.edn +++ b/.clj-kondo/config.edn @@ -355,7 +355,7 @@ metabase.sync.field-values field-values metabase.sync.interface i metabase.sync.schedules sync.schedules - metabase.sync.sync-metadata.fields.fetch-metadata fetch-metadata + metabase.sync.sync-metadata.fields.our-metadata fields.our-metadata metabase.sync.util sync-util metabase.sync.util-test sync.util-test metabase.task.sync-databases task.sync-databases diff --git a/docs/developers-guide/driver-changelog.md b/docs/developers-guide/driver-changelog.md index 70c8d85ac93..798016222d5 100644 --- a/docs/developers-guide/driver-changelog.md +++ b/docs/developers-guide/driver-changelog.md @@ -4,6 +4,16 @@ title: Driver interface changelog # Driver Interface Changelog +## Metabase 0.49.1 + +- Another driver feature has been added: `describe-fields`. If a driver opts-in to supporting this feature, The + multimethod `metabase.driver/describe-fields` must be implemented, as a replacement for + `metabase.driver/describe-table`. + +- The multimethod `metabase.driver.sql-jdbc.sync.describe-table/describe-fields-sql` has been added. The method needs + to be implemented if the driver supports `describe-fields` and you want to use the default JDBC implementation of + `metabase.driver/describe-fields`. + ## Metabase 0.49.0 - The multimethod `metabase.driver/describe-table-fks` has been deprecated in favor of `metabase.driver/describe-fks`. diff --git a/modules/drivers/redshift/src/metabase/driver/redshift.clj b/modules/drivers/redshift/src/metabase/driver/redshift.clj index 9b35a9bdc2d..5d5c20274ea 100644 --- a/modules/drivers/redshift/src/metabase/driver/redshift.clj +++ b/modules/drivers/redshift/src/metabase/driver/redshift.clj @@ -13,7 +13,6 @@ [metabase.driver.sql-jdbc.execute :as sql-jdbc.execute] [metabase.driver.sql-jdbc.execute.legacy-impl :as sql-jdbc.legacy] [metabase.driver.sql-jdbc.sync :as sql-jdbc.sync] - [metabase.driver.sql-jdbc.sync.describe-table :as sql-jdbc.describe-table] [metabase.driver.sql.query-processor :as sql.qp] [metabase.lib.metadata :as lib.metadata] [metabase.mbql.util :as mbql.u] @@ -37,6 +36,7 @@ (doseq [[feature supported?] {:test/jvm-timezone-setting false :nested-field-columns false + :describe-fields true :describe-fks true :connection-impersonation true}] (defmethod driver/database-supports? [:redshift feature] [_driver _feat _db] supported?)) @@ -68,6 +68,7 @@ [:pg_namespace :pk_ns] [:= :pk_table.relnamespace :pk_ns.oid] [:pg_attribute :pk_column] [:= :c.confrelid :pk_column.attrelid]] :where [:and + [:raw "fk_ns.nspname !~ '^information_schema|catalog_history|pg_'"] [:= :c.contype [:raw "'f'::char"]] [:= :fk_column.attnum [:raw "ANY(c.conkey)"]] [:= :pk_column.attnum [:raw "ANY(c.confkey)"]] @@ -76,6 +77,41 @@ :order-by [:fk-table-schema :fk-table-name]} :dialect (sql.qp/quote-style driver))) +(defmethod sql-jdbc.sync/describe-fields-sql :redshift + ;; The implementation is based on `getColumns` in https://github.com/aws/amazon-redshift-jdbc-driver/blob/master/src/main/java/com/amazon/redshift/jdbc/RedshiftDatabaseMetaData.java + ;; The `database-is-auto-increment` and `database-required` columns are currently missing because they are only + ;; needed for actions, which redshift doesn't support yet. + [driver & {:keys [schema-names table-names]}] + (sql/format {:select [[:c.column_name :name] + [:c.data_type :database-type] + [[:- :c.ordinal_position 1] :database-position] + [:c.schema_name :table-schema] + [:c.table_name :table-name] + [[:not= :pk.column_name nil] :pk?] + [[:case [:not= :c.remarks ""] :c.remarks :else nil] :field-comment]] + :from [[:svv_all_columns :c]] + :left-join [[{:select [:tc.table_schema + :tc.table_name + :kc.column_name] + :from [[:information_schema.table_constraints :tc]] + :join [[:information_schema.key_column_usage :kc] + [:and + [:= :tc.constraint_name :kc.constraint_name] + [:= :tc.table_schema :kc.table_schema] + [:= :tc.table_name :kc.table_name]]] + :where [:= :tc.constraint_type "PRIMARY KEY"]} + :pk] + [:and + [:= :c.schema_name :pk.table_schema] + [:= :c.table_name :pk.table_name] + [:= :c.column_name :pk.column_name]]] + :where [:and + [:raw "c.schema_name !~ '^information_schema|catalog_history|pg_'"] + (when schema-names [:in :c.schema_name schema-names]) + (when table-names [:in :c.table_name table-names])] + :order-by [:table-schema :table-name :database-position]} + :dialect (sql.qp/quote-style driver))) + (defmethod driver/db-start-of-week :redshift [_] :sunday) @@ -87,9 +123,15 @@ ;; custom Redshift type handling (def ^:private database-type->base-type - (sql-jdbc.sync/pattern-based-database-type->base-type - [[#"(?i)CHARACTER VARYING" :type/Text] ; Redshift uses CHARACTER VARYING (N) as a synonym for VARCHAR(N) - [#"(?i)NUMERIC" :type/Decimal]])) ; and also has a NUMERIC(P,S) type, which is the same as DECIMAL(P,S) + (some-fn (sql-jdbc.sync/pattern-based-database-type->base-type + [[#"(?i)CHARACTER VARYING" :type/Text] ; Redshift uses CHARACTER VARYING (N) as a synonym for VARCHAR(N) + [#"(?i)NUMERIC" :type/Decimal]]) ; and also has a NUMERIC(P,S) type, which is the same as DECIMAL(P,S) + {:super :type/* ; (requested support in metabase#36642) + :varbyte :type/* ; represents variable-length binary strings + :geometry :type/* ; spatial data + :geography :type/* ; spatial data + :intervaly2m :type/* ; interval literal + :intervald2s :type/*})) ; interval literal (defmethod sql-jdbc.sync/database-type->base-type :redshift [driver column-type] @@ -359,18 +401,6 @@ schema-inclusion-patterns schema-exclusion-patterns)))) -(defmethod sql-jdbc.describe-table/describe-table-fields :redshift - [driver conn {schema :schema, table-name :name :as table} db-name-or-nil] - (let [parent-method (get-method sql-jdbc.describe-table/describe-table-fields :sql-jdbc)] - (try (parent-method driver conn table db-name-or-nil) - (catch Exception e - (log/error e (trs "Error fetching field metadata for table {0}" table-name)) - ;; Use the fallback method (a SELECT * query) if the JDBC driver throws an exception (#21215) - (into - #{} - (sql-jdbc.describe-table/describe-table-fields-xf driver table) - (sql-jdbc.describe-table/fallback-fields-metadata-from-select-query driver conn db-name-or-nil schema table-name)))))) - (defmethod sql-jdbc.execute/set-parameter [:redshift java.time.ZonedDateTime] [driver ps i t] (sql-jdbc.execute/set-parameter driver ps i (t/sql-timestamp (t/with-zone-same-instant t (t/zone-id "UTC"))))) diff --git a/modules/drivers/redshift/test/metabase/driver/redshift_test.clj b/modules/drivers/redshift/test/metabase/driver/redshift_test.clj index 2b59e04cfb3..ccd9a35906f 100644 --- a/modules/drivers/redshift/test/metabase/driver/redshift_test.clj +++ b/modules/drivers/redshift/test/metabase/driver/redshift_test.clj @@ -241,8 +241,8 @@ view-nm)) (let [table-id (t2/select-one-pk Table :db_id (u/the-id database), :name view-nm)] ;; and its columns' :base_type should have been identified correctly - (is (= [{:name "numeric_col", :database_type "numeric(10,2)", :base_type :type/Decimal} - {:name "weird_varchar", :database_type "character varying(50)", :base_type :type/Text}] + (is (= [{:name "numeric_col", :database_type "numeric", :base_type :type/Decimal} + {:name "weird_varchar", :database_type "character varying", :base_type :type/Text}] (map mt/derecordize (t2/select [Field :name :database_type :base_type] :table_id table-id {:order-by [:name]})))))))))) @@ -270,9 +270,9 @@ view-nm)) (let [table-id (t2/select-one-pk Table :db_id (u/the-id database), :name view-nm)] ;; and its columns' :base_type should have been identified correctly - (is (= [{:name "case_when_numeric_inc_nulls", :database_type "numeric", :base_type :type/Decimal} - {:name "raw_null", :database_type "varchar", :base_type :type/Text} - {:name "raw_var", :database_type "character varying(5)", :base_type :type/Text}] + (is (= [{:name "case_when_numeric_inc_nulls", :database_type "numeric", :base_type :type/Decimal} + {:name "raw_null", :database_type "character varying", :base_type :type/Text} + {:name "raw_var", :database_type "character varying", :base_type :type/Text}] (t2/select [Field :name :database_type :base_type] :table_id table-id {:order-by [:name]})))) (finally (execute! (str "DROP VIEW IF EXISTS %s;") diff --git a/src/metabase/driver.clj b/src/metabase/driver.clj index 8d75e0199e2..4b2af349a75 100644 --- a/src/metabase/driver.clj +++ b/src/metabase/driver.clj @@ -311,14 +311,29 @@ :hierarchy #'hierarchy) (defmulti describe-table - "Return a map containing information that describes the physical schema of `table` (i.e. the fields contained - therein). `database` will be an instance of the `Database` model; and `table`, an instance of the `Table` model. It - is expected that this function will be peformant and avoid draining meaningful resources of the database. Results - should match the [[metabase.sync.interface/TableMetadata]] schema." + "Return a map containing a single field `:fields` that describes the fields in a `table`. `database` will be an + instance of the `Database` model; and `table`, an instance of the `Table` model. It is expected that this function + will be peformant and avoid draining meaningful resources of the database. The value of `:fields` should be a set of + values matching the [[metabase.sync.interface/TableMetadataField]] schema." {:added "0.32.0" :arglists '([driver database table])} dispatch-on-initialized-driver :hierarchy #'hierarchy) +(defmulti describe-fields + "Returns a reducible collection of maps, each containing information about fields. It includes which keys are + primary keys, but not foreign keys. It does not include nested fields (e.g. fields within a JSON column). + + Takes keyword arguments to narrow down the results to a set of + `schema-names` or `table-names`. + + Results match [[metabase.sync.interface/FieldMetadataEntry]]. + Results are optionally filtered by `schema-names` and `table-names` provided. + Results are ordered by `table-schema`, `table-name`, and `database-position` in ascending order." + {:added "0.49.1" + :arglists '([driver database & {:keys [schema-names table-names]}])} + dispatch-on-initialized-driver + :hierarchy #'hierarchy) + (defmulti describe-table-indexes "Returns a set of map containing information about the indexes of a table. Currently we only sync single column indexes or the first column of a composite index. @@ -444,8 +459,7 @@ (def features "Set of all features a driver can support." - #{ - ;; Does this database support foreign key relationships? + #{;; Does this database support foreign key relationships? :foreign-keys ;; Does this database support nested fields for any and every field except primary key (e.g. Mongo)? @@ -567,7 +581,12 @@ :index-info ;; Does the driver support a faster `sync-fks` step by fetching all FK metadata in a single collection? - :describe-fks}) + ;; if so, `metabase.driver/describe-fks` must be implemented instead of `metabase.driver/describe-table-fks` + :describe-fks + + ;; Does the driver support a faster `sync-fields` step by fetching all FK metadata in a single collection? + ;; if so, `metabase.driver/describe-fields` must be implemented instead of `metabase.driver/describe-table` + :describe-fields}) (defmulti database-supports? "Does this driver and specific instance of a database support a certain `feature`? diff --git a/src/metabase/driver/postgres.clj b/src/metabase/driver/postgres.clj index 046dd79132d..23effc9ce06 100644 --- a/src/metabase/driver/postgres.clj +++ b/src/metabase/driver/postgres.clj @@ -222,7 +222,7 @@ ;;; +----------------------------------------------------------------------------------------------------------------+ (defn- ->timestamp [honeysql-form] - (h2x/cast-unless-type-in "timestamp" #{"timestamp" "timestamptz" "date"} honeysql-form)) + (h2x/cast-unless-type-in "timestamp" #{"timestamp" "timestamptz" "timestamp with time zone" "date"} honeysql-form)) (defn- format-interval "Generate a Postgres 'INTERVAL' literal. @@ -347,7 +347,8 @@ [driver [_ arg target-timezone source-timezone]] (let [expr (sql.qp/->honeysql driver (cond-> arg (string? arg) u.date/parse)) - timestamptz? (h2x/is-of-type? expr "timestamptz") + timestamptz? (or (h2x/is-of-type? expr "timestamptz") + (h2x/is-of-type? expr "timestamp with time zone")) _ (sql.u/validate-convert-timezone-args timestamptz? target-timezone source-timezone) expr [:timezone target-timezone (if (not timestamptz?) [:timezone source-timezone expr] @@ -575,7 +576,8 @@ (def ^:private default-base-types "Map of default Postgres column types -> Field base types. Add more mappings here as you come across them." - {:bigint :type/BigInteger + {:array :type/* + :bigint :type/BigInteger :bigserial :type/BigInteger :bit :type/* :bool :type/Boolean @@ -586,6 +588,8 @@ :cidr :type/Structured ; IPv4/IPv6 network address :circle :type/* :citext :type/Text ; case-insensitive text + :char :type/Text + :character :type/Text :date :type/Date :decimal :type/Decimal :float4 :type/Float @@ -595,6 +599,7 @@ :int :type/Integer :int2 :type/Integer :int4 :type/Integer + :integer :type/Integer :int8 :type/BigInteger :interval :type/* ; time span :json :type/JSON diff --git a/src/metabase/driver/sql_jdbc.clj b/src/metabase/driver/sql_jdbc.clj index 2295ee3f877..f377fa4028d 100644 --- a/src/metabase/driver/sql_jdbc.clj +++ b/src/metabase/driver/sql_jdbc.clj @@ -93,6 +93,10 @@ [driver database table] (sql-jdbc.sync/describe-table driver database table)) +(defmethod driver/describe-fields :sql-jdbc + [driver database & {:as args}] + (sql-jdbc.sync/describe-fields driver database args)) + #_{:clj-kondo/ignore [:deprecated-var]} (defmethod driver/describe-table-fks :sql-jdbc [driver database table] diff --git a/src/metabase/driver/sql_jdbc/sync.clj b/src/metabase/driver/sql_jdbc/sync.clj index 0083926c8da..e492afe25b0 100644 --- a/src/metabase/driver/sql_jdbc/sync.clj +++ b/src/metabase/driver/sql_jdbc/sync.clj @@ -28,6 +28,8 @@ [sql-jdbc.describe-table add-table-pks + describe-fields + describe-fields-sql describe-fks describe-fks-sql describe-table diff --git a/src/metabase/driver/sql_jdbc/sync/describe_table.clj b/src/metabase/driver/sql_jdbc/sync/describe_table.clj index 3c4037c7616..8ee116fc884 100644 --- a/src/metabase/driver/sql_jdbc/sync/describe_table.clj +++ b/src/metabase/driver/sql_jdbc/sync/describe_table.clj @@ -1,5 +1,5 @@ (ns metabase.driver.sql-jdbc.sync.describe-table - "SQL JDBC impl for `describe-table`, `describe-fks`, `describe-table-fks`, and `describe-nested-field-columns`. + "SQL JDBC impl for `describe-fields`, `describe-table`, `describe-fks`, `describe-table-fks`, and `describe-nested-field-columns`. `describe-table-fks` is deprecated and will be replaced by `describe-fks` in the future." (:require [cheshire.core :as json] @@ -77,7 +77,7 @@ honeysql (sql.qp/apply-top-level-clause driver :limit honeysql {:limit 0})] (sql.qp/format-honeysql driver honeysql))) -(defn fallback-fields-metadata-from-select-query +(defn- fallback-fields-metadata-from-select-query "In some rare cases `:column_name` is blank (eg. SQLite's views with group by) fallback to sniffing the type from a SELECT * query." [driver ^Connection conn db-name-or-nil schema table] @@ -166,24 +166,37 @@ init [jdbc-metadata fallback-metadata]))))) +(defn describe-fields-xf + "Returns a transducer for computing metadata about the fields in `db`." + [driver db] + (map (fn [col] + (let [base-type (database-type->base-type-or-warn driver (:database-type col)) + semantic-type (calculated-semantic-type driver (:name col) (:database-type col)) + json? (isa? base-type :type/JSON)] + (merge + (u/select-non-nil-keys col [:table-schema + :table-name + :pk? + :name + :database-type + :database-position + :field-comment + :database-required + :database-is-auto-increment]) + {:base-type base-type + ;; json-unfolding is true by default for JSON fields, but this can be overridden at the DB level + :json-unfolding json?} + (when semantic-type + {:semantic-type semantic-type}) + (when (and json? (driver/database-supports? driver :nested-field-columns db)) + {:visibility-type :details-only})))))) + (defn describe-table-fields-xf - "Returns a transducer for computing metadata about the fields in `table`." - [driver table] - (map-indexed (fn [i {:keys [database-type], column-name :name, :as col}] - (let [base-type (database-type->base-type-or-warn driver database-type) - semantic-type (calculated-semantic-type driver column-name database-type) - db (table/database table) - json? (isa? base-type :type/JSON)] - (merge - (u/select-non-nil-keys col [:name :database-type :field-comment :database-required :database-is-auto-increment]) - {:base-type base-type - :database-position i - ;; json-unfolding is true by default for JSON fields, but this can be overridden at the DB level - :json-unfolding json?} - (when semantic-type - {:semantic-type semantic-type}) - (when (and json? (driver/database-supports? driver :nested-field-columns db)) - {:visibility-type :details-only})))))) + "Returns a transducer for computing metadata about the fields in a table, given the database `db`." + [driver db] + (comp + (describe-fields-xf driver db) + (map-indexed (fn [i col] (assoc col :database-position i))))) (defmulti describe-table-fields "Returns a set of column metadata for `table` using JDBC Connection `conn`." @@ -196,7 +209,7 @@ [driver conn table db-name-or-nil] (into #{} - (describe-table-fields-xf driver table) + (describe-table-fields-xf driver (table/database table)) (fields-metadata driver conn table db-name-or-nil))) (defmulti get-table-pks @@ -204,7 +217,9 @@ The PKs should be ordered by column names if there are multiple PKs. Ref: https://docs.oracle.com/javase/8/docs/api/java/sql/DatabaseMetaData.html#getPrimaryKeys-java.lang.String-java.lang.String-java.lang.String- - Note: If db-name, schema, and table-name are not passed, this may return _all_ pks that the metadata's connection can access." + Note: If db-name, schema, and table-name are not passed, this may return _all_ pks that the metadata's connection can access. + + This does not need to be implemented for drivers that support the [[driver/describe-fields]] multimethod." {:changelog-test/ignore true :added "0.45.0" :arglists '([driver ^Connection conn db-name-or-nil table])} @@ -240,15 +255,31 @@ (defn describe-table "Default implementation of `driver/describe-table` for SQL JDBC drivers. Uses JDBC DatabaseMetaData." - [driver db-or-id-or-spec-or-conn table] - (if (instance? Connection db-or-id-or-spec-or-conn) - (describe-table* driver db-or-id-or-spec-or-conn table) - (sql-jdbc.execute/do-with-connection-with-options - driver - db-or-id-or-spec-or-conn - nil - (fn [^Connection conn] - (describe-table* driver conn table))))) + [driver db table] + (sql-jdbc.execute/do-with-connection-with-options + driver + db + nil + (fn [^Connection conn] + (describe-table* driver conn table)))) + +(defmulti describe-fields-sql + "Returns a SQL query ([sql & params]) for use in the default JDBC implementation of [[metabase.driver/describe-fields]], + i.e. [[describe-fields]]." + {:added "0.49.1" + :arglists '([driver & {:keys [schema-names table-names]}])} + driver/dispatch-on-initialized-driver + :hierarchy #'driver/hierarchy) + +(defn describe-fields + "Default implementation of [[metabase.driver/describe-fields]] for JDBC drivers. Uses JDBC DatabaseMetaData." + [driver db & {:keys [schema-names table-names] :as args}] + (if (or (and schema-names (empty? schema-names)) + (and table-names (empty? table-names))) + [] + (eduction + (describe-fields-xf driver db) + (sql-jdbc.execute/reducible-query db (describe-fields-sql driver args))))) (defn- describe-table-fks* [_driver ^Connection conn {^String schema :schema, ^String table-name :name} & [^String db-name-or-nil]] @@ -282,8 +313,11 @@ (defn describe-fks "Default implementation of [[metabase.driver/describe-fks]] for JDBC drivers. Uses JDBC DatabaseMetaData." - [driver db & args] - (sql-jdbc.execute/reducible-query db (describe-fks-sql driver args))) + [driver db & {:keys [schema-names table-names] :as args}] + (if (or (and schema-names (empty? schema-names)) + (and table-names (empty? table-names))) + [] + (sql-jdbc.execute/reducible-query db (describe-fks-sql driver args)))) (defn describe-table-indexes "Default implementation of [[metabase.driver/describe-table-indexes]] for SQL JDBC drivers. Uses JDBC DatabaseMetaData." diff --git a/src/metabase/sync/fetch_metadata.clj b/src/metabase/sync/fetch_metadata.clj index f6f181eba4a..b0dfd145423 100644 --- a/src/metabase/sync/fetch_metadata.clj +++ b/src/metabase/sync/fetch_metadata.clj @@ -3,7 +3,6 @@ tables, schemas, and fields, and their types. For example, with SQL databases, these functions use the JDBC DatabaseMetaData to get this information." (:require - [metabase.config :as config] [metabase.driver :as driver] [metabase.driver.sql-jdbc.sync :as sql-jdbc.sync] [metabase.driver.util :as driver.u] @@ -16,19 +15,28 @@ [database :- i/DatabaseInstance] (driver/describe-database (driver.u/database->driver database) database)) -(mu/defn table-metadata :- i/TableMetadata +(mu/defn fields-metadata + "Effectively a wrapper for [[metabase.driver/describe-fields]] that also validates the output against the schema." + [database :- i/DatabaseInstance & {:as args}] + (cond->> (driver/describe-fields (driver.u/database->driver database) database args) + ;; This is a workaround for the fact that [[mu/defn]] can't check reducible collections yet + (mu.fn/instrument-ns? *ns*) + (eduction (map #(mu.fn/validate-output {} i/FieldMetadataEntry %))))) + +(mu/defn table-fields-metadata :- [:set i/TableMetadataField] "Get more detailed information about a `table` belonging to `database`. Includes information about the Fields." [database :- i/DatabaseInstance table :- i/TableInstance] - (driver/describe-table (driver.u/database->driver database) database table)) + (if (driver/database-supports? (driver.u/database->driver database) :describe-fields database) + (set (fields-metadata database :table-names [(:name table)] :schema-names [(:schema table)])) + (:fields (driver/describe-table (driver.u/database->driver database) database table)))) (mu/defn fk-metadata "Effectively a wrapper for [[metabase.driver/describe-fks]] that also validates the output against the schema." [database :- i/DatabaseInstance & {:as args}] (cond->> (driver/describe-fks (driver.u/database->driver database) database args) - ;; Validate the output against the schema, except in prod. ;; This is a workaround for the fact that [[mu/defn]] can't check reducible collections yet - (not config/is-prod?) + (mu.fn/instrument-ns? *ns*) (eduction (map #(mu.fn/validate-output {} i/FKMetadataEntry %))))) (mu/defn table-fk-metadata :- [:maybe [:sequential i/FKMetadataEntry]] diff --git a/src/metabase/sync/interface.clj b/src/metabase/sync/interface.clj index e14f8db9515..28a7a64f69d 100644 --- a/src/metabase/sync/interface.clj +++ b/src/metabase/sync/interface.clj @@ -2,6 +2,7 @@ "Schemas and constants used by the sync code." (:require [clojure.string :as str] + [malli.util :as mut] [metabase.lib.schema.common :as lib.schema.common] [metabase.util.malli.registry :as mr] [metabase.util.malli.schema :as ms])) @@ -38,7 +39,7 @@ [:effective-type {:optional true} [:maybe ::lib.schema.common/base-type]] [:coercion-strategy {:optional true} [:maybe ms/CoercionStrategy]] [:field-comment {:optional true} [:maybe ::lib.schema.common/non-blank-string]] - [:pk? {:optional true} :boolean] + [:pk? {:optional true} :boolean] ; optional for databases that don't support PKs [:nested-fields {:optional true} [:set [:ref ::TableMetadataField]]] [:json-unfolding {:optional true} :boolean] [:nfc-path {:optional true} [:any]] @@ -65,16 +66,14 @@ "Schema for a given Table as provided in [[metabase.driver/describe-table-indexes]]." [:ref ::TableIndexMetadata]) -(mr/def ::TableMetadata - [:map - [:name ::lib.schema.common/non-blank-string] - [:schema [:maybe ::lib.schema.common/non-blank-string]] - [:fields [:set TableMetadataField]] - [:description {:optional true} [:maybe :string]]]) - -(def TableMetadata - "Schema for the expected output of [[metabase.driver/describe-table]]." - [:ref ::TableMetadata]) +(mr/def ::FieldMetadataEntry + (-> (mr/schema ::TableMetadataField) + (mut/assoc :table-schema [:maybe ::lib.schema.common/non-blank-string]) + (mut/assoc :table-name ::lib.schema.common/non-blank-string))) + +(def FieldMetadataEntry + "Schema for an item in the expected output of [[metabase.driver/describe-fields]]." + [:ref ::FieldMetadataEntry]) ;;; not actually used; leaving here for now because it serves as documentation (comment diff --git a/src/metabase/sync/sync_metadata/fields.clj b/src/metabase/sync/sync_metadata/fields.clj index 5d785251cbe..561602ad7da 100644 --- a/src/metabase/sync/sync_metadata/fields.clj +++ b/src/metabase/sync/sync_metadata/fields.clj @@ -6,7 +6,7 @@ both sets of Metadata and perform whatever steps are needed to make sure the things in the DB match the things that came back from `describe-table`. These steps are broken out into three main parts: - * Fetch Metadata - logic is in `metabase.sync.sync-metadata.fields.fetch-metadata`. Construct a map of metadata from + * Fetch Our Metadata - logic is in `metabase.sync.sync-metadata.fields.our-metadata`. Construct a map of metadata from the Metabase application database that matches the form of DB metadata about Fields in a Table. This metadata is used to next two steps to determine what sync operations need to be performed by comparing the differences in the two sets of Metadata. @@ -39,14 +39,19 @@ * In general the methods in these namespaces return the number of rows updated; these numbers are summed and used for logging purposes by higher-level sync logic." (:require + [metabase.driver :as driver] + [metabase.driver.util :as driver.u] [metabase.models.table :as table] + [metabase.sync.fetch-metadata :as fetch-metadata] [metabase.sync.interface :as i] - [metabase.sync.sync-metadata.fields.fetch-metadata :as fetch-metadata] + [metabase.sync.sync-metadata.fields.our-metadata :as fields.our-metadata] [metabase.sync.sync-metadata.fields.sync-instances :as sync-instances] [metabase.sync.sync-metadata.fields.sync-metadata :as sync-metadata] [metabase.sync.util :as sync-util] + [metabase.util.log :as log] [metabase.util.malli :as mu] - [metabase.util.malli.schema :as ms])) + [metabase.util.malli.schema :as ms] + [toucan2.core :as t2])) ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | PUTTING IT ALL TOGETHER | @@ -57,11 +62,39 @@ properties (e.g. base type and comment/remark) as needed. Returns number of Fields synced." [table :- i/TableInstance db-metadata :- [:set i/TableMetadataField]] - (+ (sync-instances/sync-instances! table db-metadata (fetch-metadata/our-metadata table)) + (+ (sync-instances/sync-instances! table db-metadata (fields.our-metadata/our-metadata table)) ;; Now that tables are synced and fields created as needed make sure field properties are in sync. ;; Re-fetch our metadata because there might be somethings that have changed after calling ;; `sync-instances` - (sync-metadata/update-metadata! table db-metadata (fetch-metadata/our-metadata table)))) + (sync-metadata/update-metadata! table db-metadata (fields.our-metadata/our-metadata table)))) + +(mu/defn sync-fields-for-db! + "Sync the Fields in the Metabase application database for a specific `table`." + [database :- i/DatabaseInstance] + (sync-util/with-error-handling (format "Error syncing Fields for Database ''%s''" (sync-util/name-for-logging database)) + (let [schema-names (sync-util/db->sync-schemas database) + fields-metadata (fetch-metadata/fields-metadata database :schema-names schema-names)] + (transduce (comp + (partition-by (juxt :table-name :table-schema)) + (map (fn [table-metadata] + (let [fst (first table-metadata) + table (t2/select-one :model/Table + :db_id (:id database) + :%lower.name (:table-name fst) + :%lower.schema (:table-schema fst) + {:where sync-util/sync-tables-clause}) + updated (if table + (try (sync-and-update! table (set table-metadata)) + (catch Exception e + (log/error e) + 0)) + 0)] + {:total-fields (count table-metadata) + :updated-fields updated})))) + (partial merge-with +) + {:total-fields 0 + :updated-fields 0} + fields-metadata)))) (mu/defn sync-fields-for-table! "Sync the Fields in the Metabase application database for a specific `table`." @@ -71,19 +104,19 @@ ([database :- i/DatabaseInstance table :- i/TableInstance] (sync-util/with-error-handling (format "Error syncing Fields for Table ''%s''" (sync-util/name-for-logging table)) - (let [db-metadata (fetch-metadata/db-metadata database table)] + (let [db-metadata (fields.our-metadata/db-metadata database table)] {:total-fields (count db-metadata) :updated-fields (sync-and-update! table db-metadata)})))) - (mu/defn sync-fields! :- [:maybe [:map [:updated-fields ms/IntGreaterThanOrEqualToZero] [:total-fields ms/IntGreaterThanOrEqualToZero]]] "Sync the Fields in the Metabase application database for all the Tables in a `database`." [database :- i/DatabaseInstance] - (->> database - sync-util/db->sync-tables - (map (partial sync-fields-for-table! database)) - (remove (partial instance? Throwable)) - (apply merge-with +))) + (if (driver/database-supports? (driver.u/database->driver database) :describe-fields database) + (sync-fields-for-db! database) + (->> (sync-util/db->sync-tables database) + (map (partial sync-fields-for-table! database)) + (remove (partial instance? Throwable)) + (apply merge-with +)))) diff --git a/src/metabase/sync/sync_metadata/fields/fetch_metadata.clj b/src/metabase/sync/sync_metadata/fields/our_metadata.clj similarity index 94% rename from src/metabase/sync/sync_metadata/fields/fetch_metadata.clj rename to src/metabase/sync/sync_metadata/fields/our_metadata.clj index a5c93192c6e..170e74c1952 100644 --- a/src/metabase/sync/sync_metadata/fields/fetch_metadata.clj +++ b/src/metabase/sync/sync_metadata/fields/our_metadata.clj @@ -1,4 +1,4 @@ -(ns metabase.sync.sync-metadata.fields.fetch-metadata +(ns metabase.sync.sync-metadata.fields.our-metadata "Logic for constructing a map of metadata from the Metabase application database that matches the form of DB metadata about Fields in a Table, and for fetching the DB metadata itself. This metadata is used by the logic in other `metabase.sync.sync-metadata.fields.*` namespaces to determine what sync operations need to be performed by @@ -89,9 +89,9 @@ (mu/defn db-metadata :- [:set i/TableMetadataField] "Fetch metadata about Fields belonging to a given `table` directly from an external database by calling its driver's - implementation of `describe-table`." + implementation of `describe-table` or `describe-fields` if supported." [database :- i/DatabaseInstance table :- i/TableInstance] - (cond-> (:fields (fetch-metadata/table-metadata database table)) - (driver/database-supports? (:engine database) :nested-field-columns database) - (set/union (fetch-metadata/nfc-metadata database table)))) + (cond-> (fetch-metadata/table-fields-metadata database table) + (driver/database-supports? (:engine database) :nested-field-columns database) + (set/union (fetch-metadata/nfc-metadata database table)))) diff --git a/src/metabase/sync/sync_metadata/fields/sync_instances.clj b/src/metabase/sync/sync_metadata/fields/sync_instances.clj index aead4998e22..e52f87a7e53 100644 --- a/src/metabase/sync/sync_metadata/fields/sync_instances.clj +++ b/src/metabase/sync/sync_metadata/fields/sync_instances.clj @@ -13,7 +13,7 @@ [metabase.models.humanization :as humanization] [metabase.sync.interface :as i] [metabase.sync.sync-metadata.fields.common :as common] - [metabase.sync.sync-metadata.fields.fetch-metadata :as fetch-metadata] + [metabase.sync.sync-metadata.fields.our-metadata :as fields.our-metadata] [metabase.sync.util :as sync-util] [metabase.util :as u] [metabase.util.log :as log] @@ -133,7 +133,7 @@ new-fields (remove known-field? db-field-chunk) new-field-instances (create-or-reactivate-fields! table new-fields parent-id)] ;; save any updates to `our-metadata` - (swap! our-metadata into (fetch-metadata/fields->our-metadata new-field-instances parent-id)) + (swap! our-metadata into (fields.our-metadata/fields->our-metadata new-field-instances parent-id)) ;; now return count of rows updated (count new-fields)))) diff --git a/src/metabase/sync/util.clj b/src/metabase/sync/util.clj index be7c5ca470d..539f885bcf8 100644 --- a/src/metabase/sync/util.clj +++ b/src/metabase/sync/util.clj @@ -330,7 +330,8 @@ ;;; | OTHER SYNC UTILITY FUNCTIONS | ;;; +----------------------------------------------------------------------------------------------------------------+ -(def ^:private sync-tables-clause +(def sync-tables-clause + "Returns a clause that can be used inside a HoneySQL :where clause to select all the Tables that should be synced" (into [:and] (for [[k v] sync-tables-kv-args] [:= k v]))) diff --git a/src/metabase/util/malli/registry.cljc b/src/metabase/util/malli/registry.cljc index 5dacfe40517..e793b68f98e 100644 --- a/src/metabase/util/malli/registry.cljc +++ b/src/metabase/util/malli/registry.cljc @@ -62,6 +62,11 @@ (reset! cache {}) nil) +(defn schema + "Get the Malli schema for `type` from the registry." + [type] + (malli.registry/schema registry type)) + #?(:clj (defmacro def "Like [[clojure.spec.alpha/def]]; add a Malli schema to our registry." diff --git a/src/metabase/util/malli/schema.clj b/src/metabase/util/malli/schema.clj index fcb6f718f3b..7b812998a9a 100644 --- a/src/metabase/util/malli/schema.clj +++ b/src/metabase/util/malli/schema.clj @@ -368,3 +368,10 @@ (mu/with-api-error-message [:re u/uuid-regex] (deferred-tru "value must be a valid UUID."))) + +(defn CollectionOf + "Helper for creating schemas to check whether something is an instance of a collection." + [item-schema] + [:fn + {:error/message (format "Collection of %s" item-schema)} + #(and (coll? %) (every? (partial mc/validate item-schema) %))]) diff --git a/test/metabase/driver/sql_jdbc/sync/describe_table_test.clj b/test/metabase/driver/sql_jdbc/sync/describe_table_test.clj index e50b8676048..23f3e545d1a 100644 --- a/test/metabase/driver/sql_jdbc/sync/describe_table_test.clj +++ b/test/metabase/driver/sql_jdbc/sync/describe_table_test.clj @@ -7,6 +7,7 @@ [clojure.test :refer :all] [metabase.db.metadata-queries :as metadata-queries] [metabase.driver :as driver] + [metabase.driver.ddl.interface :as ddl.i] [metabase.driver.mysql :as mysql] [metabase.driver.mysql-test :as mysql-test] [metabase.driver.sql-jdbc.connection :as sql-jdbc.conn] @@ -15,6 +16,7 @@ [metabase.driver.sql-jdbc.sync.describe-table :as sql-jdbc.describe-table] [metabase.driver.sql-jdbc.sync.interface :as sql-jdbc.sync.interface] + [metabase.driver.util :as driver.u] [metabase.models.table :refer [Table]] [metabase.sync :as sync] [metabase.test :as mt] @@ -23,15 +25,26 @@ [metabase.util :as u] [toucan2.core :as t2])) -(defn- sql-jdbc-drivers-with-default-describe-table-impl - "All SQL JDBC drivers that use the default SQL JDBC implementation of `describe-table`. (As far as I know, this is - all of them.)" +(defn- sql-jdbc-drivers-using-default-describe-table-or-fields-impl + "All SQL JDBC drivers that use the default SQL JDBC implementation of `describe-table`, or `describe-fields`." [] (set (filter - #(identical? (get-method driver/describe-table :sql-jdbc) (get-method driver/describe-table %)) + (fn [driver] + (let [uses-default-describe-table? (and (identical? (get-method driver/describe-table :sql-jdbc) (get-method driver/describe-table driver)) + (not (driver/database-supports? driver :describe-fields nil))) + uses-default-describe-fields? (and (identical? (get-method driver/describe-fields :sql-jdbc) (get-method driver/describe-fields driver)) + (driver/database-supports? driver :describe-fields nil))] + (or uses-default-describe-table? + uses-default-describe-fields?))) (descendants driver/hierarchy :sql-jdbc)))) +(deftest ^:parallel describe-fields-nested-field-columns-test + (testing (str "Drivers that support describe-fields should not support the nested field columns feature." + "It is possible to support both in the future but this has not been implemented yet.") + (is (empty? (filter #(driver/database-supports? % :describe-fields nil) + (mt/normal-drivers-with-feature :nested-field-columns)))))) + (deftest ^:parallel describe-table-test (is (= {:name "VENUES", :fields @@ -79,6 +92,16 @@ :json-unfolding false}}} (sql-jdbc.describe-table/describe-table :h2 (mt/id) {:name "VENUES"})))) +(defn- describe-fields-for-table [db table] + (let [driver (driver.u/database->driver db)] + (sort-by :database-position + (if (driver/database-supports? driver :describe-fields db) + (vec (sql-jdbc.describe-table/describe-fields driver db + :schema-names [(:schema table)] + :table-names [(:name table)])) + (:fields (sql-jdbc.describe-table/describe-table driver db table)))))) + + (deftest describe-auto-increment-on-non-pk-field-test (testing "a non-pk field with auto-increment should be have metabase_field.database_is_auto_increment=true" (one-off-dbs/with-blank-db @@ -126,8 +149,31 @@ {:fk-column-name "VENUE_ID", :dest-table {:name "VENUES", :schema "PUBLIC"}, :dest-column-name "ID"}} (sql-jdbc.describe-table/describe-table-fks :h2 (mt/id) {:name "CHECKINS"})))) +(deftest describe-fields-or-table-test + (testing "test `describe-fields` or `describe-table` returns some basic metadata" + (mt/test-drivers (sql-jdbc-drivers-using-default-describe-table-or-fields-impl) + (mt/dataset daily-bird-counts + (let [table (t2/select-one :model/Table :id (mt/id :bird-count)) + format-name #(ddl.i/format-name driver/*driver* %)] + (is (=? [{:name (format-name "id"), + :database-type string?, + :database-position 0, + :base-type #(isa? % :type/Number), + :json-unfolding false} + {:name (format-name "date"), + :database-type string?, + :database-position 1, + :base-type #(isa? % :type/Date), + :json-unfolding false} + {:name (format-name "count"), + :database-type string?, + :database-position 2, + :base-type #(isa? % :type/Number), + :json-unfolding false}] + (describe-fields-for-table (mt/db) table)))))))) + (deftest database-types-fallback-test - (mt/test-drivers (sql-jdbc-drivers-with-default-describe-table-impl) + (mt/test-drivers (sql-jdbc-drivers-using-default-describe-table-or-fields-impl) (let [org-result-set-seq jdbc/result-set-seq] (with-redefs [jdbc/result-set-seq (fn [& args] (map #(dissoc % :type_name) (apply org-result-set-seq args)))] @@ -137,8 +183,7 @@ {:name "latitude" :base-type :type/Float} {:name "name" :base-type :type/Text} {:name "id" :base-type :type/Integer}} - (->> (sql-jdbc.describe-table/describe-table driver/*driver* (mt/id) (t2/select-one Table :id (mt/id :venues))) - :fields + (->> (describe-fields-for-table (mt/db) (t2/select-one Table :id (mt/id :venues))) (map (fn [{:keys [name base-type]}] {:name (u/lower-case-en name) :base-type (if (or (isa? base-type :type/Integer) @@ -148,13 +193,12 @@ set))))))) (deftest calculated-semantic-type-test - (mt/test-drivers (sql-jdbc-drivers-with-default-describe-table-impl) + (mt/test-drivers (sql-jdbc-drivers-using-default-describe-table-or-fields-impl) (with-redefs [sql-jdbc.sync.interface/column->semantic-type (fn [_ _ column-name] (when (= (u/lower-case-en column-name) "longitude") :type/Longitude))] (is (= [["longitude" :type/Longitude]] - (->> (sql-jdbc.describe-table/describe-table (or driver/*driver* :h2) (mt/id) (t2/select-one Table :id (mt/id :venues))) - :fields + (->> (describe-fields-for-table (mt/db) (t2/select-one Table :id (mt/id :venues))) (filter :semantic-type) (map (juxt (comp u/lower-case-en :name) :semantic-type)))))))) diff --git a/test/metabase/driver/sql_jdbc_test.clj b/test/metabase/driver/sql_jdbc_test.clj index 2a13d0d2a33..802be5aa907 100644 --- a/test/metabase/driver/sql_jdbc_test.clj +++ b/test/metabase/driver/sql_jdbc_test.clj @@ -12,6 +12,7 @@ [metabase.query-processor :as qp] [metabase.query-processor.compile :as qp.compile] [metabase.test :as mt] + [metabase.test.data.dataset-definition-test :as dataset-definition-test] [metabase.util :as u] [toucan2.core :as t2] [toucan2.tools.with-temp :as t2.with-temp])) @@ -79,6 +80,19 @@ #_{:clj-kondo/ignore [:deprecated-var]} (driver/describe-table-fks :h2 (mt/db) (t2/select-one Table :id (mt/id :venues)))))) +(deftest describe-fields-sync-with-composite-pks-test + (testing "Make sure syncing a table that has a composite pks works" + (mt/test-driver (mt/normal-drivers-with-feature :describe-fields) + (mt/dataset dataset-definition-test/composite-pk + (let [songs (t2/select-one :model/Table (mt/id :songs)) + fk-metadata (driver/describe-fields :redshift (mt/db) + :table-names [(:name songs)] + :schema-names [(:schema songs)])] + (is (= #{{:name "song_id", :pk? true} {:name "artist_id", :pk? true}} + (into #{} + (map #(select-keys % [:name :pk?])) + fk-metadata)))))))) + (deftest ^:parallel table-rows-sample-test (mt/test-drivers (sql-jdbc.tu/sql-jdbc-drivers) (is (= [["20th Century Cafe"] diff --git a/test/metabase/sync/sync_metadata/fields/fetch_metadata_test.clj b/test/metabase/sync/sync_metadata/fields/our_metadata_test.clj similarity index 96% rename from test/metabase/sync/sync_metadata/fields/fetch_metadata_test.clj rename to test/metabase/sync/sync_metadata/fields/our_metadata_test.clj index 290349c0a0e..8f0edbb071e 100644 --- a/test/metabase/sync/sync_metadata/fields/fetch_metadata_test.clj +++ b/test/metabase/sync/sync_metadata/fields/our_metadata_test.clj @@ -1,4 +1,4 @@ -(ns metabase.sync.sync-metadata.fields.fetch-metadata-test +(ns metabase.sync.sync-metadata.fields.our-metadata-test (:require [clojure.test :refer :all] [clojure.walk :as walk] @@ -6,7 +6,7 @@ [metabase.models.database :refer [Database]] [metabase.models.table :refer [Table]] [metabase.sync.sync-metadata :as sync-metadata] - [metabase.sync.sync-metadata.fields.fetch-metadata :as fetch-metadata] + [metabase.sync.sync-metadata.fields.our-metadata :as fields.our-metadata] [metabase.test.mock.toucanery :as toucanery] [metabase.util :as u] [toucan2.core :as t2] @@ -107,4 +107,4 @@ ;; defined in sets. changing keys will change the ;; order in the set implementation. (and position depends on database-position) (m/filter-vals some? (dissoc % :id :database-position :position))))] - (remove-ids-and-nil-vals (#'fetch-metadata/our-metadata (t2/select-one Table :id transactions-table-id)))))))) + (remove-ids-and-nil-vals (#'fields.our-metadata/our-metadata (t2/select-one Table :id transactions-table-id)))))))) -- GitLab