From e8e77d643eb6784839982bd4b43b15a731f4b9b7 Mon Sep 17 00:00:00 2001 From: Case Nelson <case@metabase.com> Date: Tue, 5 Nov 2024 09:26:00 -0700 Subject: [PATCH] perf: Implement faster sync methods for mysql (#49011) * perf: Implement faster sync methods for postgres Fixes #48575 Pulls work from redshift into the common postgres driver. * Fix tests and formatting * Move nested-field-column sync to sync functions so describe-fields will also get them * Fix test * Fix test * Remove fixed safety test * Add test specific database-supports feature for pk metadata * Fix test * perf: faster mysql sync with describe-fields Fixes: #49010 * Adrress PR feedback * Fix tests * Fix test * Add nil table-schema * Don't use subselect for field-comment * Fix quoting weird identifiers * Make format string inline * Update src/metabase/driver/mysql.clj Co-authored-by: metamben <103100869+metamben@users.noreply.github.com> * Update src/metabase/driver/mysql.clj Co-authored-by: metamben <103100869+metamben@users.noreply.github.com> * Fix tests * Fix database-type * Fix tests * Fix test * Fix tests * Exclude mysql table_schema * Handle tinyInt1IsBit * Fix test * Only get one db at a time --------- Co-authored-by: metamben <103100869+metamben@users.noreply.github.com> --- src/metabase/driver/mysql.clj | 56 +++++++++++++++++++ .../driver/sql_jdbc/sync/describe_table.clj | 12 ++-- .../sql_jdbc/sync/describe_table_test.clj | 15 +++-- test/metabase/driver_test.clj | 7 ++- 4 files changed, 76 insertions(+), 14 deletions(-) diff --git a/src/metabase/driver/mysql.clj b/src/metabase/driver/mysql.clj index 9a467fec0eb..90ce321632d 100644 --- a/src/metabase/driver/mysql.clj +++ b/src/metabase/driver/mysql.clj @@ -58,6 +58,8 @@ ;; server and the columns themselves. Since this isn't something we can really change in ;; the query itself don't present the option to the users in the UI :case-sensitivity-string-filter-options false + :describe-fields true + :describe-fks true :convert-timezone true :datetime-diff true :full-join false @@ -917,3 +919,57 @@ (when-let [privileges (not-empty (set/union all-table-privileges (get table-privileges table-name)))] [table-name privileges]))) table-names))) + +;; Describe the Fields present in a `table`. This just hands off to the normal SQL driver implementation of the same +;; name, but coerces boolean fields since mysql returns them as 0/1 integers +(defmethod driver/describe-fields :mysql + [driver database & args] + (eduction + (map (fn [col] + (-> col + (update :pk? pos?) + (update :database-position int) ;; Comes in as biginteger + (update :database-required pos?) + (update :database-is-auto-increment pos?)))) + (apply (get-method driver/describe-fields :sql-jdbc) driver database args))) + +(defmethod sql-jdbc.sync/describe-fields-sql :mysql + [driver & {:keys [table-names details]}] + (sql/format {:select [[:c.column_name :name] + [[:- :c.ordinal_position 1] :database-position] + [nil :table-schema] + [:c.table_name :table-name] + (if (some-> details :additional-options (str/includes? "tinyInt1isBit=false")) + [[:upper :c.data_type] :database-type] + [[:if [:= :column_type [:inline "tinyint(1)"]] [:inline "BIT"] [:upper :c.data_type]] :database-type]) + [[:= :c.extra [:inline "auto_increment"]] :database-is-auto-increment] + [[:and + [:or [:= :column_default nil] [:= [:lower :column_default] [:inline "null"]]] + [:= :is_nullable [:inline "NO"]] + [:not [:= :c.extra [:inline "auto_increment"]]]] + :database-required] + [[:= :c.column_key [:inline "PRI"]] :pk?] + [[:nullif :c.column_comment [:inline ""]] :field-comment]] + :from [[:information_schema.columns :c]] + :where + [:and [:raw "c.table_schema not in ('information_schema','performance_schema','sys','mysql')"] + [:raw "c.table_name not in ('innodb_table_stats', 'innodb_index_stats')"] + (when (:db details) [:= :c.table_schema (:db details)]) + (when (seq table-names) [:in [:lower :c.table_name] (map u/lower-case-en table-names)])]} + :dialect (sql.qp/quote-style driver))) + +(defmethod sql-jdbc.sync/describe-fks-sql :mysql + [driver & {:keys [table-names]}] + (sql/format {:select [[nil :pk-table-schema] + [:a.referenced_table_name :pk-table-name] + [:a.referenced_column_name :pk-column-name] + [nil :fk-table-schema] + [:a.table_name :fk-table-name] + [:a.column_name :fk-column-name]] + :from [[:information_schema.key_column_usage :a]] + :join [[:information_schema.table_constraints :b] [:using :constraint_schema :constraint_name :table_name]] + :where [:and [:= :b.constraint_type [:inline "FOREIGN KEY"]] + [:!= :a.referenced_table_schema nil] + (when (seq table-names) [:in :a.table_name table-names])] + :order-by [:a.table_name]} + :dialect (sql.qp/quote-style driver))) diff --git a/src/metabase/driver/sql_jdbc/sync/describe_table.clj b/src/metabase/driver/sql_jdbc/sync/describe_table.clj index 371cb540ae0..6eb6c9b95ff 100644 --- a/src/metabase/driver/sql_jdbc/sync/describe_table.clj +++ b/src/metabase/driver/sql_jdbc/sync/describe_table.clj @@ -179,8 +179,7 @@ semantic-type (calculated-semantic-type driver (:name col) (:database-type col)) json? (isa? base-type :type/JSON)] (merge - (u/select-non-nil-keys col [:table-schema - :table-name + (u/select-non-nil-keys col [:table-name :pk? :name :database-type @@ -188,7 +187,8 @@ :field-comment :database-required :database-is-auto-increment]) - {:base-type base-type + {:table-schema (:table-schema col) ;; can be nil + :base-type base-type ;; json-unfolding is true by default for JSON fields, but this can be overridden at the DB level :json-unfolding json?} (when semantic-type @@ -201,7 +201,7 @@ [driver db] (comp (describe-fields-xf driver db) - (map-indexed (fn [i col] (assoc col :database-position i))))) + (map-indexed (fn [i col] (dissoc (assoc col :database-position i) :table-schema))))) (defmulti describe-table-fields "Returns a set of column metadata for `table` using JDBC Connection `conn`." @@ -298,7 +298,7 @@ "Returns a SQL query ([sql & params]) for use in the default JDBC implementation of [[metabase.driver/describe-fields]], i.e. [[describe-fields]]." {:added "0.49.1" - :arglists '([driver & {:keys [schema-names table-names]}])} + :arglists '([driver & {:keys [schema-names table-names details]}])} driver/dispatch-on-initialized-driver :hierarchy #'driver/hierarchy) @@ -310,7 +310,7 @@ [] (eduction (describe-fields-xf driver db) - (sql-jdbc.execute/reducible-query db (describe-fields-sql driver args))))) + (sql-jdbc.execute/reducible-query db (describe-fields-sql driver (assoc args :details (:details db))))))) (defn- describe-table-fks* [_driver ^Connection conn {^String schema :schema, ^String table-name :name} & [^String db-name-or-nil]] diff --git a/test/metabase/driver/sql_jdbc/sync/describe_table_test.clj b/test/metabase/driver/sql_jdbc/sync/describe_table_test.clj index 1f2dbdf8ace..41419f813d0 100644 --- a/test/metabase/driver/sql_jdbc/sync/describe_table_test.clj +++ b/test/metabase/driver/sql_jdbc/sync/describe_table_test.clj @@ -5,6 +5,7 @@ [clojure.set :as set] [clojure.string :as str] [clojure.test :refer :all] + [medley.core :as m] [metabase.db.metadata-queries :as metadata-queries] [metabase.driver :as driver] [metabase.driver.mysql :as mysql] @@ -137,10 +138,11 @@ (let [driver (driver.u/database->driver db)] (sort-by :database-position (if (driver.u/supports? driver :describe-fields db) - (vec (driver/describe-fields driver - db - :schema-names [(:schema table)] - :table-names [(:name table)])) + (vec (m/mapply driver/describe-fields + driver + db + (cond-> {:table-names [(:name table)]} + (:schema table) (assoc :schema-names [(:schema table)])))) (:fields (driver/describe-table driver db table)))))) (defmethod driver/database-supports? [::driver/driver ::describe-pks] @@ -166,7 +168,10 @@ [5 false false false]] (sort-by :first - (map (juxt :database-position :database-required :database-is-auto-increment (comp boolean :pk?)) + (map (juxt :database-position + :database-required + :database-is-auto-increment + (comp boolean :pk?)) (describe-fields-for-table (mt/db) (t2/select-one Table :id (mt/id :venues)))))))) (mt/test-drivers (mt/normal-drivers-without-feature :actions) (is (=? diff --git a/test/metabase/driver_test.clj b/test/metabase/driver_test.clj index 13e4d4be9c8..823944f5a9f 100644 --- a/test/metabase/driver_test.clj +++ b/test/metabase/driver_test.clj @@ -211,9 +211,10 @@ (defn- describe-fields-for-table [db table] (sort-by :database-position (if (driver/database-supports? driver/*driver* :describe-fields db) - (vec (driver/describe-fields driver/*driver* db - :schema-names [(:schema table)] - :table-names [(:name table)])) + (mapv #(dissoc % :table-name :table-schema) + (driver/describe-fields driver/*driver* db + :schema-names [(:schema table)] + :table-names [(:name table)])) (:fields (driver/describe-table driver/*driver* db table))))) (deftest ^:parallel describe-fields-or-table-test -- GitLab