Skip to content
Snippets Groups Projects
Unverified Commit f5370610 authored by metabase-bot[bot]'s avatar metabase-bot[bot] Committed by GitHub
Browse files

Faster sync-fields (#38828) (#40123)

parent 46c668b1
No related branches found
No related tags found
No related merge requests found
Showing
with 328 additions and 113 deletions
......@@ -356,7 +356,7 @@
metabase.sync.field-values field-values
metabase.sync.interface i
metabase.sync.schedules sync.schedules
metabase.sync.sync-metadata.fields.fetch-metadata fetch-metadata
metabase.sync.sync-metadata.fields.our-metadata fields.our-metadata
metabase.sync.util sync-util
metabase.sync.util-test sync.util-test
metabase.task.sync-databases task.sync-databases
......
......@@ -4,6 +4,16 @@ title: Driver interface changelog
# Driver Interface Changelog
## Metabase 0.49.1
- Another driver feature has been added: `describe-fields`. If a driver opts-in to supporting this feature, The
multimethod `metabase.driver/describe-fields` must be implemented, as a replacement for
`metabase.driver/describe-table`.
- The multimethod `metabase.driver.sql-jdbc.sync.describe-table/describe-fields-sql` has been added. The method needs
to be implemented if the driver supports `describe-fields` and you want to use the default JDBC implementation of
`metabase.driver/describe-fields`.
## Metabase 0.49.0
- The multimethod `metabase.driver/describe-table-fks` has been deprecated in favor of `metabase.driver/describe-fks`.
......
......@@ -13,7 +13,6 @@
[metabase.driver.sql-jdbc.execute :as sql-jdbc.execute]
[metabase.driver.sql-jdbc.execute.legacy-impl :as sql-jdbc.legacy]
[metabase.driver.sql-jdbc.sync :as sql-jdbc.sync]
[metabase.driver.sql-jdbc.sync.describe-table :as sql-jdbc.describe-table]
[metabase.driver.sql.query-processor :as sql.qp]
[metabase.lib.metadata :as lib.metadata]
[metabase.mbql.util :as mbql.u]
......@@ -37,6 +36,7 @@
(doseq [[feature supported?] {:test/jvm-timezone-setting false
:nested-field-columns false
:describe-fields true
:describe-fks true
:connection-impersonation true}]
(defmethod driver/database-supports? [:redshift feature] [_driver _feat _db] supported?))
......@@ -68,6 +68,7 @@
[:pg_namespace :pk_ns] [:= :pk_table.relnamespace :pk_ns.oid]
[:pg_attribute :pk_column] [:= :c.confrelid :pk_column.attrelid]]
:where [:and
[:raw "fk_ns.nspname !~ '^information_schema|catalog_history|pg_'"]
[:= :c.contype [:raw "'f'::char"]]
[:= :fk_column.attnum [:raw "ANY(c.conkey)"]]
[:= :pk_column.attnum [:raw "ANY(c.confkey)"]]
......@@ -76,6 +77,41 @@
:order-by [:fk-table-schema :fk-table-name]}
:dialect (sql.qp/quote-style driver)))
(defmethod sql-jdbc.sync/describe-fields-sql :redshift
;; The implementation is based on `getColumns` in https://github.com/aws/amazon-redshift-jdbc-driver/blob/master/src/main/java/com/amazon/redshift/jdbc/RedshiftDatabaseMetaData.java
;; The `database-is-auto-increment` and `database-required` columns are currently missing because they are only
;; needed for actions, which redshift doesn't support yet.
[driver & {:keys [schema-names table-names]}]
(sql/format {:select [[:c.column_name :name]
[:c.data_type :database-type]
[[:- :c.ordinal_position 1] :database-position]
[:c.schema_name :table-schema]
[:c.table_name :table-name]
[[:not= :pk.column_name nil] :pk?]
[[:case [:not= :c.remarks ""] :c.remarks :else nil] :field-comment]]
:from [[:svv_all_columns :c]]
:left-join [[{:select [:tc.table_schema
:tc.table_name
:kc.column_name]
:from [[:information_schema.table_constraints :tc]]
:join [[:information_schema.key_column_usage :kc]
[:and
[:= :tc.constraint_name :kc.constraint_name]
[:= :tc.table_schema :kc.table_schema]
[:= :tc.table_name :kc.table_name]]]
:where [:= :tc.constraint_type "PRIMARY KEY"]}
:pk]
[:and
[:= :c.schema_name :pk.table_schema]
[:= :c.table_name :pk.table_name]
[:= :c.column_name :pk.column_name]]]
:where [:and
[:raw "c.schema_name !~ '^information_schema|catalog_history|pg_'"]
(when schema-names [:in :c.schema_name schema-names])
(when table-names [:in :c.table_name table-names])]
:order-by [:table-schema :table-name :database-position]}
:dialect (sql.qp/quote-style driver)))
(defmethod driver/db-start-of-week :redshift
[_]
:sunday)
......@@ -87,9 +123,15 @@
;; custom Redshift type handling
(def ^:private database-type->base-type
(sql-jdbc.sync/pattern-based-database-type->base-type
[[#"(?i)CHARACTER VARYING" :type/Text] ; Redshift uses CHARACTER VARYING (N) as a synonym for VARCHAR(N)
[#"(?i)NUMERIC" :type/Decimal]])) ; and also has a NUMERIC(P,S) type, which is the same as DECIMAL(P,S)
(some-fn (sql-jdbc.sync/pattern-based-database-type->base-type
[[#"(?i)CHARACTER VARYING" :type/Text] ; Redshift uses CHARACTER VARYING (N) as a synonym for VARCHAR(N)
[#"(?i)NUMERIC" :type/Decimal]]) ; and also has a NUMERIC(P,S) type, which is the same as DECIMAL(P,S)
{:super :type/* ; (requested support in metabase#36642)
:varbyte :type/* ; represents variable-length binary strings
:geometry :type/* ; spatial data
:geography :type/* ; spatial data
:intervaly2m :type/* ; interval literal
:intervald2s :type/*})) ; interval literal
(defmethod sql-jdbc.sync/database-type->base-type :redshift
[driver column-type]
......@@ -359,18 +401,6 @@
schema-inclusion-patterns
schema-exclusion-patterns))))
(defmethod sql-jdbc.describe-table/describe-table-fields :redshift
[driver conn {schema :schema, table-name :name :as table} db-name-or-nil]
(let [parent-method (get-method sql-jdbc.describe-table/describe-table-fields :sql-jdbc)]
(try (parent-method driver conn table db-name-or-nil)
(catch Exception e
(log/error e (trs "Error fetching field metadata for table {0}" table-name))
;; Use the fallback method (a SELECT * query) if the JDBC driver throws an exception (#21215)
(into
#{}
(sql-jdbc.describe-table/describe-table-fields-xf driver table)
(sql-jdbc.describe-table/fallback-fields-metadata-from-select-query driver conn db-name-or-nil schema table-name))))))
(defmethod sql-jdbc.execute/set-parameter [:redshift java.time.ZonedDateTime]
[driver ps i t]
(sql-jdbc.execute/set-parameter driver ps i (t/sql-timestamp (t/with-zone-same-instant t (t/zone-id "UTC")))))
......
......@@ -241,8 +241,8 @@
view-nm))
(let [table-id (t2/select-one-pk Table :db_id (u/the-id database), :name view-nm)]
;; and its columns' :base_type should have been identified correctly
(is (= [{:name "numeric_col", :database_type "numeric(10,2)", :base_type :type/Decimal}
{:name "weird_varchar", :database_type "character varying(50)", :base_type :type/Text}]
(is (= [{:name "numeric_col", :database_type "numeric", :base_type :type/Decimal}
{:name "weird_varchar", :database_type "character varying", :base_type :type/Text}]
(map
mt/derecordize
(t2/select [Field :name :database_type :base_type] :table_id table-id {:order-by [:name]}))))))))))
......@@ -270,9 +270,9 @@
view-nm))
(let [table-id (t2/select-one-pk Table :db_id (u/the-id database), :name view-nm)]
;; and its columns' :base_type should have been identified correctly
(is (= [{:name "case_when_numeric_inc_nulls", :database_type "numeric", :base_type :type/Decimal}
{:name "raw_null", :database_type "varchar", :base_type :type/Text}
{:name "raw_var", :database_type "character varying(5)", :base_type :type/Text}]
(is (= [{:name "case_when_numeric_inc_nulls", :database_type "numeric", :base_type :type/Decimal}
{:name "raw_null", :database_type "character varying", :base_type :type/Text}
{:name "raw_var", :database_type "character varying", :base_type :type/Text}]
(t2/select [Field :name :database_type :base_type] :table_id table-id {:order-by [:name]}))))
(finally
(execute! (str "DROP VIEW IF EXISTS %s;")
......
......@@ -311,14 +311,29 @@
:hierarchy #'hierarchy)
(defmulti describe-table
"Return a map containing information that describes the physical schema of `table` (i.e. the fields contained
therein). `database` will be an instance of the `Database` model; and `table`, an instance of the `Table` model. It
is expected that this function will be peformant and avoid draining meaningful resources of the database. Results
should match the [[metabase.sync.interface/TableMetadata]] schema."
"Return a map containing a single field `:fields` that describes the fields in a `table`. `database` will be an
instance of the `Database` model; and `table`, an instance of the `Table` model. It is expected that this function
will be peformant and avoid draining meaningful resources of the database. The value of `:fields` should be a set of
values matching the [[metabase.sync.interface/TableMetadataField]] schema."
{:added "0.32.0" :arglists '([driver database table])}
dispatch-on-initialized-driver
:hierarchy #'hierarchy)
(defmulti describe-fields
"Returns a reducible collection of maps, each containing information about fields. It includes which keys are
primary keys, but not foreign keys. It does not include nested fields (e.g. fields within a JSON column).
Takes keyword arguments to narrow down the results to a set of
`schema-names` or `table-names`.
Results match [[metabase.sync.interface/FieldMetadataEntry]].
Results are optionally filtered by `schema-names` and `table-names` provided.
Results are ordered by `table-schema`, `table-name`, and `database-position` in ascending order."
{:added "0.49.1"
:arglists '([driver database & {:keys [schema-names table-names]}])}
dispatch-on-initialized-driver
:hierarchy #'hierarchy)
(defmulti describe-table-indexes
"Returns a set of map containing information about the indexes of a table.
Currently we only sync single column indexes or the first column of a composite index.
......@@ -443,8 +458,7 @@
;; TODO -- I think we should rename this to `features` since `driver/driver-features` is a bit redundant.
(def driver-features
"Set of all features a driver can support."
#{
;; Does this database support foreign key relationships?
#{;; Does this database support foreign key relationships?
:foreign-keys
;; Does this database support nested fields for any and every field except primary key (e.g. Mongo)?
......@@ -566,7 +580,12 @@
:index-info
;; Does the driver support a faster `sync-fks` step by fetching all FK metadata in a single collection?
:describe-fks})
;; if so, `metabase.driver/describe-fks` must be implemented instead of `metabase.driver/describe-table-fks`
:describe-fks
;; Does the driver support a faster `sync-fields` step by fetching all FK metadata in a single collection?
;; if so, `metabase.driver/describe-fields` must be implemented instead of `metabase.driver/describe-table`
:describe-fields})
(defmulti supports?
......
......@@ -222,7 +222,7 @@
;;; +----------------------------------------------------------------------------------------------------------------+
(defn- ->timestamp [honeysql-form]
(h2x/cast-unless-type-in "timestamp" #{"timestamp" "timestamptz" "date"} honeysql-form))
(h2x/cast-unless-type-in "timestamp" #{"timestamp" "timestamptz" "timestamp with time zone" "date"} honeysql-form))
(defn- format-interval
"Generate a Postgres 'INTERVAL' literal.
......@@ -347,7 +347,8 @@
[driver [_ arg target-timezone source-timezone]]
(let [expr (sql.qp/->honeysql driver (cond-> arg
(string? arg) u.date/parse))
timestamptz? (h2x/is-of-type? expr "timestamptz")
timestamptz? (or (h2x/is-of-type? expr "timestamptz")
(h2x/is-of-type? expr "timestamp with time zone"))
_ (sql.u/validate-convert-timezone-args timestamptz? target-timezone source-timezone)
expr [:timezone target-timezone (if (not timestamptz?)
[:timezone source-timezone expr]
......@@ -575,7 +576,8 @@
(def ^:private default-base-types
"Map of default Postgres column types -> Field base types.
Add more mappings here as you come across them."
{:bigint :type/BigInteger
{:array :type/*
:bigint :type/BigInteger
:bigserial :type/BigInteger
:bit :type/*
:bool :type/Boolean
......@@ -586,6 +588,8 @@
:cidr :type/Structured ; IPv4/IPv6 network address
:circle :type/*
:citext :type/Text ; case-insensitive text
:char :type/Text
:character :type/Text
:date :type/Date
:decimal :type/Decimal
:float4 :type/Float
......@@ -595,6 +599,7 @@
:int :type/Integer
:int2 :type/Integer
:int4 :type/Integer
:integer :type/Integer
:int8 :type/BigInteger
:interval :type/* ; time span
:json :type/JSON
......
......@@ -93,6 +93,10 @@
[driver database table]
(sql-jdbc.sync/describe-table driver database table))
(defmethod driver/describe-fields :sql-jdbc
[driver database & {:as args}]
(sql-jdbc.sync/describe-fields driver database args))
#_{:clj-kondo/ignore [:deprecated-var]}
(defmethod driver/describe-table-fks :sql-jdbc
[driver database table]
......
......@@ -27,6 +27,8 @@
[sql-jdbc.describe-table
add-table-pks
describe-fields
describe-fields-sql
describe-fks
describe-fks-sql
describe-table
......
(ns metabase.driver.sql-jdbc.sync.describe-table
"SQL JDBC impl for `describe-table`, `describe-fks`, `describe-table-fks`, and `describe-nested-field-columns`.
"SQL JDBC impl for `describe-fields`, `describe-table`, `describe-fks`, `describe-table-fks`, and `describe-nested-field-columns`.
`describe-table-fks` is deprecated and will be replaced by `describe-fks` in the future."
(:require
[cheshire.core :as json]
......@@ -77,7 +77,7 @@
honeysql (sql.qp/apply-top-level-clause driver :limit honeysql {:limit 0})]
(sql.qp/format-honeysql driver honeysql)))
(defn fallback-fields-metadata-from-select-query
(defn- fallback-fields-metadata-from-select-query
"In some rare cases `:column_name` is blank (eg. SQLite's views with group by) fallback to sniffing the type from a
SELECT * query."
[driver ^Connection conn db-name-or-nil schema table]
......@@ -166,24 +166,37 @@
init
[jdbc-metadata fallback-metadata])))))
(defn describe-fields-xf
"Returns a transducer for computing metadata about the fields in `db`."
[driver db]
(map (fn [col]
(let [base-type (database-type->base-type-or-warn driver (:database-type col))
semantic-type (calculated-semantic-type driver (:name col) (:database-type col))
json? (isa? base-type :type/JSON)]
(merge
(u/select-non-nil-keys col [:table-schema
:table-name
:pk?
:name
:database-type
:database-position
:field-comment
:database-required
:database-is-auto-increment])
{:base-type base-type
;; json-unfolding is true by default for JSON fields, but this can be overridden at the DB level
:json-unfolding json?}
(when semantic-type
{:semantic-type semantic-type})
(when (and json? (driver/database-supports? driver :nested-field-columns db))
{:visibility-type :details-only}))))))
(defn describe-table-fields-xf
"Returns a transducer for computing metadata about the fields in `table`."
[driver table]
(map-indexed (fn [i {:keys [database-type], column-name :name, :as col}]
(let [base-type (database-type->base-type-or-warn driver database-type)
semantic-type (calculated-semantic-type driver column-name database-type)
db (table/database table)
json? (isa? base-type :type/JSON)]
(merge
(u/select-non-nil-keys col [:name :database-type :field-comment :database-required :database-is-auto-increment])
{:base-type base-type
:database-position i
;; json-unfolding is true by default for JSON fields, but this can be overridden at the DB level
:json-unfolding json?}
(when semantic-type
{:semantic-type semantic-type})
(when (and json? (driver/database-supports? driver :nested-field-columns db))
{:visibility-type :details-only}))))))
"Returns a transducer for computing metadata about the fields in a table, given the database `db`."
[driver db]
(comp
(describe-fields-xf driver db)
(map-indexed (fn [i col] (assoc col :database-position i)))))
(defmulti describe-table-fields
"Returns a set of column metadata for `table` using JDBC Connection `conn`."
......@@ -196,7 +209,7 @@
[driver conn table db-name-or-nil]
(into
#{}
(describe-table-fields-xf driver table)
(describe-table-fields-xf driver (table/database table))
(fields-metadata driver conn table db-name-or-nil)))
(defmulti get-table-pks
......@@ -204,7 +217,9 @@
The PKs should be ordered by column names if there are multiple PKs.
Ref: https://docs.oracle.com/javase/8/docs/api/java/sql/DatabaseMetaData.html#getPrimaryKeys-java.lang.String-java.lang.String-java.lang.String-
Note: If db-name, schema, and table-name are not passed, this may return _all_ pks that the metadata's connection can access."
Note: If db-name, schema, and table-name are not passed, this may return _all_ pks that the metadata's connection can access.
This does not need to be implemented for drivers that support the [[driver/describe-fields]] multimethod."
{:changelog-test/ignore true
:added "0.45.0"
:arglists '([driver ^Connection conn db-name-or-nil table])}
......@@ -240,15 +255,31 @@
(defn describe-table
"Default implementation of `driver/describe-table` for SQL JDBC drivers. Uses JDBC DatabaseMetaData."
[driver db-or-id-or-spec-or-conn table]
(if (instance? Connection db-or-id-or-spec-or-conn)
(describe-table* driver db-or-id-or-spec-or-conn table)
(sql-jdbc.execute/do-with-connection-with-options
driver
db-or-id-or-spec-or-conn
nil
(fn [^Connection conn]
(describe-table* driver conn table)))))
[driver db table]
(sql-jdbc.execute/do-with-connection-with-options
driver
db
nil
(fn [^Connection conn]
(describe-table* driver conn table))))
(defmulti describe-fields-sql
"Returns a SQL query ([sql & params]) for use in the default JDBC implementation of [[metabase.driver/describe-fields]],
i.e. [[describe-fields]]."
{:added "0.49.1"
:arglists '([driver & {:keys [schema-names table-names]}])}
driver/dispatch-on-initialized-driver
:hierarchy #'driver/hierarchy)
(defn describe-fields
"Default implementation of [[metabase.driver/describe-fields]] for JDBC drivers. Uses JDBC DatabaseMetaData."
[driver db & {:keys [schema-names table-names] :as args}]
(if (or (and schema-names (empty? schema-names))
(and table-names (empty? table-names)))
[]
(eduction
(describe-fields-xf driver db)
(sql-jdbc.execute/reducible-query db (describe-fields-sql driver args)))))
(defn- describe-table-fks*
[_driver ^Connection conn {^String schema :schema, ^String table-name :name} & [^String db-name-or-nil]]
......@@ -281,8 +312,11 @@
(defn describe-fks
"Default implementation of [[metabase.driver/describe-fks]] for JDBC drivers. Uses JDBC DatabaseMetaData."
[driver db & args]
(sql-jdbc.execute/reducible-query db (describe-fks-sql driver args)))
[driver db & {:keys [schema-names table-names] :as args}]
(if (or (and schema-names (empty? schema-names))
(and table-names (empty? table-names)))
[]
(sql-jdbc.execute/reducible-query db (describe-fks-sql driver args))))
(defn describe-table-indexes
"Default implementation of [[metabase.driver/describe-table-indexes]] for SQL JDBC drivers. Uses JDBC DatabaseMetaData."
......
......@@ -3,7 +3,6 @@
tables, schemas, and fields, and their types. For example, with SQL databases, these functions use the JDBC
DatabaseMetaData to get this information."
(:require
[metabase.config :as config]
[metabase.driver :as driver]
[metabase.driver.sql-jdbc.sync :as sql-jdbc.sync]
[metabase.driver.util :as driver.u]
......@@ -16,19 +15,28 @@
[database :- i/DatabaseInstance]
(driver/describe-database (driver.u/database->driver database) database))
(mu/defn table-metadata :- i/TableMetadata
(mu/defn fields-metadata
"Effectively a wrapper for [[metabase.driver/describe-fields]] that also validates the output against the schema."
[database :- i/DatabaseInstance & {:as args}]
(cond->> (driver/describe-fields (driver.u/database->driver database) database args)
;; This is a workaround for the fact that [[mu/defn]] can't check reducible collections yet
(mu.fn/instrument-ns? *ns*)
(eduction (map #(mu.fn/validate-output {} i/FieldMetadataEntry %)))))
(mu/defn table-fields-metadata :- [:set i/TableMetadataField]
"Get more detailed information about a `table` belonging to `database`. Includes information about the Fields."
[database :- i/DatabaseInstance
table :- i/TableInstance]
(driver/describe-table (driver.u/database->driver database) database table))
(if (driver/database-supports? (driver.u/database->driver database) :describe-fields database)
(set (fields-metadata database :table-names [(:name table)] :schema-names [(:schema table)]))
(:fields (driver/describe-table (driver.u/database->driver database) database table))))
(mu/defn fk-metadata
"Effectively a wrapper for [[metabase.driver/describe-fks]] that also validates the output against the schema."
[database :- i/DatabaseInstance & {:as args}]
(cond->> (driver/describe-fks (driver.u/database->driver database) database args)
;; Validate the output against the schema, except in prod.
;; This is a workaround for the fact that [[mu/defn]] can't check reducible collections yet
(not config/is-prod?)
(mu.fn/instrument-ns? *ns*)
(eduction (map #(mu.fn/validate-output {} i/FKMetadataEntry %)))))
(mu/defn table-fk-metadata :- [:maybe [:sequential i/FKMetadataEntry]]
......
......@@ -2,6 +2,7 @@
"Schemas and constants used by the sync code."
(:require
[clojure.string :as str]
[malli.util :as mut]
[metabase.lib.schema.common :as lib.schema.common]
[metabase.util.malli.registry :as mr]
[metabase.util.malli.schema :as ms]))
......@@ -38,7 +39,7 @@
[:effective-type {:optional true} [:maybe ::lib.schema.common/base-type]]
[:coercion-strategy {:optional true} [:maybe ms/CoercionStrategy]]
[:field-comment {:optional true} [:maybe ::lib.schema.common/non-blank-string]]
[:pk? {:optional true} :boolean]
[:pk? {:optional true} :boolean] ; optional for databases that don't support PKs
[:nested-fields {:optional true} [:set [:ref ::TableMetadataField]]]
[:json-unfolding {:optional true} :boolean]
[:nfc-path {:optional true} [:any]]
......@@ -65,16 +66,14 @@
"Schema for a given Table as provided in [[metabase.driver/describe-table-indexes]]."
[:ref ::TableIndexMetadata])
(mr/def ::TableMetadata
[:map
[:name ::lib.schema.common/non-blank-string]
[:schema [:maybe ::lib.schema.common/non-blank-string]]
[:fields [:set TableMetadataField]]
[:description {:optional true} [:maybe :string]]])
(def TableMetadata
"Schema for the expected output of [[metabase.driver/describe-table]]."
[:ref ::TableMetadata])
(mr/def ::FieldMetadataEntry
(-> (mr/schema ::TableMetadataField)
(mut/assoc :table-schema [:maybe ::lib.schema.common/non-blank-string])
(mut/assoc :table-name ::lib.schema.common/non-blank-string)))
(def FieldMetadataEntry
"Schema for an item in the expected output of [[metabase.driver/describe-fields]]."
[:ref ::FieldMetadataEntry])
;;; not actually used; leaving here for now because it serves as documentation
(comment
......
......@@ -6,7 +6,7 @@
both sets of Metadata and perform whatever steps are needed to make sure the things in the DB match the things that
came back from `describe-table`. These steps are broken out into three main parts:
* Fetch Metadata - logic is in `metabase.sync.sync-metadata.fields.fetch-metadata`. Construct a map of metadata from
* Fetch Our Metadata - logic is in `metabase.sync.sync-metadata.fields.our-metadata`. Construct a map of metadata from
the Metabase application database that matches the form of DB metadata about Fields in a Table. This metadata is
used to next two steps to determine what sync operations need to be performed by comparing the differences in the
two sets of Metadata.
......@@ -39,14 +39,19 @@
* In general the methods in these namespaces return the number of rows updated; these numbers are summed and used
for logging purposes by higher-level sync logic."
(:require
[metabase.driver :as driver]
[metabase.driver.util :as driver.u]
[metabase.models.table :as table]
[metabase.sync.fetch-metadata :as fetch-metadata]
[metabase.sync.interface :as i]
[metabase.sync.sync-metadata.fields.fetch-metadata :as fetch-metadata]
[metabase.sync.sync-metadata.fields.our-metadata :as fields.our-metadata]
[metabase.sync.sync-metadata.fields.sync-instances :as sync-instances]
[metabase.sync.sync-metadata.fields.sync-metadata :as sync-metadata]
[metabase.sync.util :as sync-util]
[metabase.util.log :as log]
[metabase.util.malli :as mu]
[metabase.util.malli.schema :as ms]))
[metabase.util.malli.schema :as ms]
[toucan2.core :as t2]))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | PUTTING IT ALL TOGETHER |
......@@ -57,11 +62,39 @@
properties (e.g. base type and comment/remark) as needed. Returns number of Fields synced."
[table :- i/TableInstance
db-metadata :- [:set i/TableMetadataField]]
(+ (sync-instances/sync-instances! table db-metadata (fetch-metadata/our-metadata table))
(+ (sync-instances/sync-instances! table db-metadata (fields.our-metadata/our-metadata table))
;; Now that tables are synced and fields created as needed make sure field properties are in sync.
;; Re-fetch our metadata because there might be somethings that have changed after calling
;; `sync-instances`
(sync-metadata/update-metadata! table db-metadata (fetch-metadata/our-metadata table))))
(sync-metadata/update-metadata! table db-metadata (fields.our-metadata/our-metadata table))))
(mu/defn sync-fields-for-db!
"Sync the Fields in the Metabase application database for a specific `table`."
[database :- i/DatabaseInstance]
(sync-util/with-error-handling (format "Error syncing Fields for Database ''%s''" (sync-util/name-for-logging database))
(let [schema-names (sync-util/db->sync-schemas database)
fields-metadata (fetch-metadata/fields-metadata database :schema-names schema-names)]
(transduce (comp
(partition-by (juxt :table-name :table-schema))
(map (fn [table-metadata]
(let [fst (first table-metadata)
table (t2/select-one :model/Table
:db_id (:id database)
:%lower.name (:table-name fst)
:%lower.schema (:table-schema fst)
{:where sync-util/sync-tables-clause})
updated (if table
(try (sync-and-update! table (set table-metadata))
(catch Exception e
(log/error e)
0))
0)]
{:total-fields (count table-metadata)
:updated-fields updated}))))
(partial merge-with +)
{:total-fields 0
:updated-fields 0}
fields-metadata))))
(mu/defn sync-fields-for-table!
"Sync the Fields in the Metabase application database for a specific `table`."
......@@ -71,19 +104,19 @@
([database :- i/DatabaseInstance
table :- i/TableInstance]
(sync-util/with-error-handling (format "Error syncing Fields for Table ''%s''" (sync-util/name-for-logging table))
(let [db-metadata (fetch-metadata/db-metadata database table)]
(let [db-metadata (fields.our-metadata/db-metadata database table)]
{:total-fields (count db-metadata)
:updated-fields (sync-and-update! table db-metadata)}))))
(mu/defn sync-fields! :- [:maybe
[:map
[:updated-fields ms/IntGreaterThanOrEqualToZero]
[:total-fields ms/IntGreaterThanOrEqualToZero]]]
"Sync the Fields in the Metabase application database for all the Tables in a `database`."
[database :- i/DatabaseInstance]
(->> database
sync-util/db->sync-tables
(map (partial sync-fields-for-table! database))
(remove (partial instance? Throwable))
(apply merge-with +)))
(if (driver/database-supports? (driver.u/database->driver database) :describe-fields database)
(sync-fields-for-db! database)
(->> (sync-util/db->sync-tables database)
(map (partial sync-fields-for-table! database))
(remove (partial instance? Throwable))
(apply merge-with +))))
(ns metabase.sync.sync-metadata.fields.fetch-metadata
(ns metabase.sync.sync-metadata.fields.our-metadata
"Logic for constructing a map of metadata from the Metabase application database that matches the form of DB metadata
about Fields in a Table, and for fetching the DB metadata itself. This metadata is used by the logic in other
`metabase.sync.sync-metadata.fields.*` namespaces to determine what sync operations need to be performed by
......@@ -89,9 +89,9 @@
(mu/defn db-metadata :- [:set i/TableMetadataField]
"Fetch metadata about Fields belonging to a given `table` directly from an external database by calling its driver's
implementation of `describe-table`."
implementation of `describe-table` or `describe-fields` if supported."
[database :- i/DatabaseInstance
table :- i/TableInstance]
(cond-> (:fields (fetch-metadata/table-metadata database table))
(driver/database-supports? (:engine database) :nested-field-columns database)
(set/union (fetch-metadata/nfc-metadata database table))))
(cond-> (fetch-metadata/table-fields-metadata database table)
(driver/database-supports? (:engine database) :nested-field-columns database)
(set/union (fetch-metadata/nfc-metadata database table))))
......@@ -13,7 +13,7 @@
[metabase.models.humanization :as humanization]
[metabase.sync.interface :as i]
[metabase.sync.sync-metadata.fields.common :as common]
[metabase.sync.sync-metadata.fields.fetch-metadata :as fetch-metadata]
[metabase.sync.sync-metadata.fields.our-metadata :as fields.our-metadata]
[metabase.sync.util :as sync-util]
[metabase.util :as u]
[metabase.util.log :as log]
......@@ -133,7 +133,7 @@
new-fields (remove known-field? db-field-chunk)
new-field-instances (create-or-reactivate-fields! table new-fields parent-id)]
;; save any updates to `our-metadata`
(swap! our-metadata into (fetch-metadata/fields->our-metadata new-field-instances parent-id))
(swap! our-metadata into (fields.our-metadata/fields->our-metadata new-field-instances parent-id))
;; now return count of rows updated
(count new-fields))))
......
......@@ -330,7 +330,8 @@
;;; | OTHER SYNC UTILITY FUNCTIONS |
;;; +----------------------------------------------------------------------------------------------------------------+
(def ^:private sync-tables-clause
(def sync-tables-clause
"Returns a clause that can be used inside a HoneySQL :where clause to select all the Tables that should be synced"
(into [:and] (for [[k v] sync-tables-kv-args]
[:= k v])))
......
......@@ -62,6 +62,11 @@
(reset! cache {})
nil)
(defn schema
"Get the Malli schema for `type` from the registry."
[type]
(malli.registry/schema registry type))
#?(:clj
(defmacro def
"Like [[clojure.spec.alpha/def]]; add a Malli schema to our registry."
......
......@@ -369,3 +369,10 @@
(mu/with-api-error-message
[:re u/uuid-regex]
(deferred-tru "value must be a valid UUID.")))
(defn CollectionOf
"Helper for creating schemas to check whether something is an instance of a collection."
[item-schema]
[:fn
{:error/message (format "Collection of %s" item-schema)}
#(and (coll? %) (every? (partial mc/validate item-schema) %))])
......@@ -7,6 +7,7 @@
[clojure.test :refer :all]
[metabase.db.metadata-queries :as metadata-queries]
[metabase.driver :as driver]
[metabase.driver.ddl.interface :as ddl.i]
[metabase.driver.mysql :as mysql]
[metabase.driver.mysql-test :as mysql-test]
[metabase.driver.sql-jdbc.connection :as sql-jdbc.conn]
......@@ -15,6 +16,7 @@
[metabase.driver.sql-jdbc.sync.describe-table
:as sql-jdbc.describe-table]
[metabase.driver.sql-jdbc.sync.interface :as sql-jdbc.sync.interface]
[metabase.driver.util :as driver.u]
[metabase.models.table :refer [Table]]
[metabase.sync :as sync]
[metabase.test :as mt]
......@@ -23,15 +25,26 @@
[metabase.util :as u]
[toucan2.core :as t2]))
(defn- sql-jdbc-drivers-with-default-describe-table-impl
"All SQL JDBC drivers that use the default SQL JDBC implementation of `describe-table`. (As far as I know, this is
all of them.)"
(defn- sql-jdbc-drivers-using-default-describe-table-or-fields-impl
"All SQL JDBC drivers that use the default SQL JDBC implementation of `describe-table`, or `describe-fields`."
[]
(set
(filter
#(identical? (get-method driver/describe-table :sql-jdbc) (get-method driver/describe-table %))
(fn [driver]
(let [uses-default-describe-table? (and (identical? (get-method driver/describe-table :sql-jdbc) (get-method driver/describe-table driver))
(not (driver/database-supports? driver :describe-fields nil)))
uses-default-describe-fields? (and (identical? (get-method driver/describe-fields :sql-jdbc) (get-method driver/describe-fields driver))
(driver/database-supports? driver :describe-fields nil))]
(or uses-default-describe-table?
uses-default-describe-fields?)))
(descendants driver/hierarchy :sql-jdbc))))
(deftest ^:parallel describe-fields-nested-field-columns-test
(testing (str "Drivers that support describe-fields should not support the nested field columns feature."
"It is possible to support both in the future but this has not been implemented yet.")
(is (empty? (filter #(driver/database-supports? % :describe-fields nil)
(mt/normal-drivers-with-feature :nested-field-columns))))))
(deftest ^:parallel describe-table-test
(is (= {:name "VENUES",
:fields
......@@ -79,6 +92,16 @@
:json-unfolding false}}}
(sql-jdbc.describe-table/describe-table :h2 (mt/id) {:name "VENUES"}))))
(defn- describe-fields-for-table [db table]
(let [driver (driver.u/database->driver db)]
(sort-by :database-position
(if (driver/database-supports? driver :describe-fields db)
(vec (sql-jdbc.describe-table/describe-fields driver db
:schema-names [(:schema table)]
:table-names [(:name table)]))
(:fields (sql-jdbc.describe-table/describe-table driver db table))))))
(deftest describe-auto-increment-on-non-pk-field-test
(testing "a non-pk field with auto-increment should be have metabase_field.database_is_auto_increment=true"
(one-off-dbs/with-blank-db
......@@ -126,8 +149,31 @@
{:fk-column-name "VENUE_ID", :dest-table {:name "VENUES", :schema "PUBLIC"}, :dest-column-name "ID"}}
(sql-jdbc.describe-table/describe-table-fks :h2 (mt/id) {:name "CHECKINS"}))))
(deftest describe-fields-or-table-test
(testing "test `describe-fields` or `describe-table` returns some basic metadata"
(mt/test-drivers (sql-jdbc-drivers-using-default-describe-table-or-fields-impl)
(mt/dataset daily-bird-counts
(let [table (t2/select-one :model/Table :id (mt/id :bird-count))
format-name #(ddl.i/format-name driver/*driver* %)]
(is (=? [{:name (format-name "id"),
:database-type string?,
:database-position 0,
:base-type #(isa? % :type/Number),
:json-unfolding false}
{:name (format-name "date"),
:database-type string?,
:database-position 1,
:base-type #(isa? % :type/Date),
:json-unfolding false}
{:name (format-name "count"),
:database-type string?,
:database-position 2,
:base-type #(isa? % :type/Number),
:json-unfolding false}]
(describe-fields-for-table (mt/db) table))))))))
(deftest database-types-fallback-test
(mt/test-drivers (sql-jdbc-drivers-with-default-describe-table-impl)
(mt/test-drivers (sql-jdbc-drivers-using-default-describe-table-or-fields-impl)
(let [org-result-set-seq jdbc/result-set-seq]
(with-redefs [jdbc/result-set-seq (fn [& args]
(map #(dissoc % :type_name) (apply org-result-set-seq args)))]
......@@ -137,8 +183,7 @@
{:name "latitude" :base-type :type/Float}
{:name "name" :base-type :type/Text}
{:name "id" :base-type :type/Integer}}
(->> (sql-jdbc.describe-table/describe-table driver/*driver* (mt/id) (t2/select-one Table :id (mt/id :venues)))
:fields
(->> (describe-fields-for-table (mt/db) (t2/select-one Table :id (mt/id :venues)))
(map (fn [{:keys [name base-type]}]
{:name (u/lower-case-en name)
:base-type (if (or (isa? base-type :type/Integer)
......@@ -148,13 +193,12 @@
set)))))))
(deftest calculated-semantic-type-test
(mt/test-drivers (sql-jdbc-drivers-with-default-describe-table-impl)
(mt/test-drivers (sql-jdbc-drivers-using-default-describe-table-or-fields-impl)
(with-redefs [sql-jdbc.sync.interface/column->semantic-type (fn [_ _ column-name]
(when (= (u/lower-case-en column-name) "longitude")
:type/Longitude))]
(is (= [["longitude" :type/Longitude]]
(->> (sql-jdbc.describe-table/describe-table (or driver/*driver* :h2) (mt/id) (t2/select-one Table :id (mt/id :venues)))
:fields
(->> (describe-fields-for-table (mt/db) (t2/select-one Table :id (mt/id :venues)))
(filter :semantic-type)
(map (juxt (comp u/lower-case-en :name) :semantic-type))))))))
......
......@@ -10,6 +10,7 @@
[metabase.models :refer [Database Field Table]]
[metabase.query-processor :as qp]
[metabase.test :as mt]
[metabase.test.data.dataset-definition-test :as dataset-definition-test]
[metabase.util :as u]
[toucan2.core :as t2]
[toucan2.tools.with-temp :as t2.with-temp]))
......@@ -77,6 +78,19 @@
#_{:clj-kondo/ignore [:deprecated-var]}
(driver/describe-table-fks :h2 (mt/db) (t2/select-one Table :id (mt/id :venues))))))
(deftest describe-fields-sync-with-composite-pks-test
(testing "Make sure syncing a table that has a composite pks works"
(mt/test-driver (mt/normal-drivers-with-feature :describe-fields)
(mt/dataset dataset-definition-test/composite-pk
(let [songs (t2/select-one :model/Table (mt/id :songs))
fk-metadata (driver/describe-fields :redshift (mt/db)
:table-names [(:name songs)]
:schema-names [(:schema songs)])]
(is (= #{{:name "song_id", :pk? true} {:name "artist_id", :pk? true}}
(into #{}
(map #(select-keys % [:name :pk?]))
fk-metadata))))))))
(deftest ^:parallel table-rows-sample-test
(mt/test-drivers (sql-jdbc.tu/sql-jdbc-drivers)
(is (= [["20th Century Cafe"]
......
(ns metabase.sync.sync-metadata.fields.fetch-metadata-test
(ns metabase.sync.sync-metadata.fields.our-metadata-test
(:require
[clojure.test :refer :all]
[clojure.walk :as walk]
......@@ -6,7 +6,7 @@
[metabase.models.database :refer [Database]]
[metabase.models.table :refer [Table]]
[metabase.sync.sync-metadata :as sync-metadata]
[metabase.sync.sync-metadata.fields.fetch-metadata :as fetch-metadata]
[metabase.sync.sync-metadata.fields.our-metadata :as fields.our-metadata]
[metabase.test.mock.toucanery :as toucanery]
[metabase.util :as u]
[toucan2.core :as t2]
......@@ -107,4 +107,4 @@
;; defined in sets. changing keys will change the
;; order in the set implementation. (and position depends on database-position)
(m/filter-vals some? (dissoc % :id :database-position :position))))]
(remove-ids-and-nil-vals (#'fetch-metadata/our-metadata (t2/select-one Table :id transactions-table-id))))))))
(remove-ids-and-nil-vals (#'fields.our-metadata/our-metadata (t2/select-one Table :id transactions-table-id))))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment