From ccfca0f431dafd695c2fa82d3be03cb01a3c842a Mon Sep 17 00:00:00 2001 From: Tim Macdonald <tim@metabase.com> Date: Tue, 30 May 2023 15:27:51 +0100 Subject: [PATCH] Use faster inserts in MySQL (#30820) * Use faster inserts in MySQL * Add local_infile check * PR feedback * Fix boolean handling for MySQL * Fix tests; fix MySQL fallback if local_infile is turned off * Turn local_infile on for MySQL tests * Another way of doing MySQL local_infile :/ * sanitize-file-name was a misstep. Not needed for security and could cause problems * Mistaken docstring --- src/metabase/driver/mysql.clj | 65 ++++++++ test/metabase/upload_test.clj | 271 +++++++++++++++++++++------------- 2 files changed, 237 insertions(+), 99 deletions(-) diff --git a/src/metabase/driver/mysql.clj b/src/metabase/driver/mysql.clj index 72e366e2826..b158d85e4b2 100644 --- a/src/metabase/driver/mysql.clj +++ b/src/metabase/driver/mysql.clj @@ -5,6 +5,7 @@ [clojure.set :as set] [clojure.string :as str] [clojure.walk :as walk] + [honey.sql :as sql] [java-time :as t] [metabase.config :as config] [metabase.db.spec :as mdb.spec] @@ -26,12 +27,14 @@ [metabase.query-processor.store :as qp.store] [metabase.query-processor.timezone :as qp.timezone] [metabase.query-processor.util.add-alias-info :as add] + [metabase.query-processor.writeback :as qp.writeback] [metabase.upload :as upload] [metabase.util :as u] [metabase.util.honey-sql-2 :as h2x] [metabase.util.i18n :refer [deferred-tru trs]] [metabase.util.log :as log]) (:import + (java.io File) (java.sql DatabaseMetaData ResultSet ResultSetMetaData Types) (java.time LocalDateTime OffsetDateTime OffsetTime ZonedDateTime))) @@ -617,3 +620,65 @@ [_driver] ;; https://dev.mysql.com/doc/refman/8.0/en/identifier-length.html 64) + +(defn- format-load + [_clause [file-path table-name]] + [(format "LOAD DATA LOCAL INFILE '%s' INTO TABLE %s" file-path (sql/format-entity table-name))]) + +(sql/register-clause! ::load format-load :insert-into) + +(defn- sanitize-value + ;; Per https://dev.mysql.com/doc/refman/8.0/en/load-data.html#load-data-field-line-handling + ;; Backslash is the MySQL escape character within strings in SQL statements. Thus, to specify a literal backslash, + ;; you must specify two backslashes for the value to be interpreted as a single backslash. The escape sequences + ;; '\t' and '\n' specify tab and newline characters, respectively. + [v] + (cond + (string? v) + (str/replace v #"\\|\n|\r|\t" {"\\" "\\\\" + "\n" "\\n" + "\r" "\\r" + "\t" "\\t"}) + (boolean? v) + (if v 1 0) + :else + v)) + +(defn- row->tsv + [column-count row] + (when (not= column-count (count row)) + (throw (Exception. (format "ERROR: missing data in row \"%s\"" (str/join "," row))))) + (->> row + (map sanitize-value) + (str/join "\t"))) + +(defn- get-global-variable + "The value of the given global variable in the DB. Does not do any type coercion, so, e.g., booleans come back as + \"ON\" and \"OFF\"." + [db-id var-name] + (:value + (first + (jdbc/query (sql-jdbc.conn/db->pooled-connection-spec db-id) + ["show global variables like ?" var-name])))) + +(defmethod driver/insert-into :mysql + [driver db-id ^String table-name column-names values] + ;; `local_infile` must be turned on per + ;; https://dev.mysql.com/doc/refman/8.0/en/load-data.html#load-data-local + (if (not= (get-global-variable db-id "local_infile") "ON") + ;; If it isn't turned on, fall back to the generic "INSERT INTO ..." way + ((get-method driver/insert-into :sql-jdbc) driver db-id table-name column-names values) + (let [temp-file (File/createTempFile table-name ".tsv") + file-path (.getAbsolutePath temp-file)] + (try + (let [tsv (->> values + (map (partial row->tsv (count column-names))) + (str/join "\n")) + sql (sql/format {::load [file-path (keyword table-name)] + :columns (map keyword column-names)} + :quoted true + :dialect (sql.qp/quote-style driver))] + (spit file-path tsv) + (qp.writeback/execute-write-sql! db-id sql)) + (finally + (.delete temp-file)))))) diff --git a/test/metabase/upload_test.clj b/test/metabase/upload_test.clj index 71bdb77a92b..1a579fc7992 100644 --- a/test/metabase/upload_test.clj +++ b/test/metabase/upload_test.clj @@ -2,9 +2,12 @@ (:require [clj-bom.core :as bom] [clojure.java.io :as io] + [clojure.java.jdbc :as jdbc] [clojure.string :as str] [clojure.test :refer :all] [metabase.driver :as driver] + [metabase.driver.mysql :as mysql] + [metabase.driver.sql-jdbc.connection :as sql-jdbc.conn] [metabase.models :refer [Field Table]] [metabase.query-processor :as qp] [metabase.sync :as sync] @@ -25,6 +28,31 @@ (def datetime-type :metabase.upload/datetime) (def text-type :metabase.upload/text) +(defn- do-with-mysql-local-infile-activated + "Helper for [[with-mysql-local-infile-activated]]" + [thunk] + (if (or + (not= :mysql driver/*driver*) + (= "ON" (-> (sql-jdbc.conn/db->pooled-connection-spec (mt/db)) + (jdbc/query + ["show global variables like 'local_infile'"]) + first + :value))) + (thunk) + (let [conn-spec (sql-jdbc.conn/db->pooled-connection-spec (mt/db))] + (try + (jdbc/query conn-spec + "set global local_infile = 1") + (thunk) + (finally + (jdbc/query conn-spec + "set global local_infile = 0")))))) + +(defmacro with-mysql-local-infile-activated + "Turn on local_infile for MySQL" + [& body] + `(do-with-mysql-local-infile-activated (fn [] ~@body))) + (deftest type-detection-and-parse-test (doseq [[string-value expected-value expected-type seps] [["0.0" 0 float-type "."] @@ -254,13 +282,14 @@ (testing "Upload a CSV file" (mt/test-drivers (mt/normal-drivers-with-feature :uploads) (mt/with-empty-db - (upload/load-from-csv - driver/*driver* - (mt/id) - "upload_test" - (csv-file-with ["id ,nulls,string ,bool ,number ,date ,datetime" - "2\t ,, a ,true ,1.1\t ,2022-01-01,2022-01-01T00:00:00" - "\" 3\",, b,false,\"$ 1,000.1\",2022-02-01,2022-02-01T00:00:00"])) + (with-mysql-local-infile-activated + (upload/load-from-csv + driver/*driver* + (mt/id) + "upload_test" + (csv-file-with ["id ,nulls,string ,bool ,number ,date ,datetime" + "2\t ,, a ,true ,1.1\t ,2022-01-01,2022-01-01T00:00:00" + "\" 3\",, b,false,\"$ 1,000.1\",2022-02-01,2022-02-01T00:00:00"]))) (testing "Table and Fields exist after sync" (sync/sync-database! (mt/db)) (let [table (t2/select-one Table :db_id (mt/id))] @@ -297,13 +326,14 @@ (testing "Upload a CSV file with a datetime column" (mt/test-drivers (mt/normal-drivers-with-feature :uploads) (mt/with-empty-db - (upload/load-from-csv - driver/*driver* - (mt/id) - "upload_test" - (csv-file-with ["datetime" - "2022-01-01" - "2022-01-01T00:00:00"])) + (with-mysql-local-infile-activated + (upload/load-from-csv + driver/*driver* + (mt/id) + "upload_test" + (csv-file-with ["datetime" + "2022-01-01" + "2022-01-01T00:00:00"]))) (testing "Fields exists after sync" (sync/sync-database! (mt/db)) (let [table (t2/select-one Table :db_id (mt/id))] @@ -318,29 +348,30 @@ (testing "Upload a CSV file" (mt/test-drivers (mt/normal-drivers-with-feature :uploads) (mt/with-empty-db - (upload/load-from-csv - driver/*driver* - (mt/id) - "upload_test" - (csv-file-with ["id,bool" - "1,true" - "2,false" - "3,TRUE" - "4,FALSE" - "5,t " - "6, f" - "7,\tT" - "8,F\t" - "9,y" - "10,n" - "11,Y" - "12,N" - "13,yes" - "14,no" - "15,YES" - "16,NO" - "17,1" - "18,0"])) + (with-mysql-local-infile-activated + (upload/load-from-csv + driver/*driver* + (mt/id) + "upload_test" + (csv-file-with ["id,bool" + "1,true" + "2,false" + "3,TRUE" + "4,FALSE" + "5,t " + "6, f" + "7,\tT" + "8,F\t" + "9,y" + "10,n" + "11,Y" + "12,N" + "13,yes" + "14,no" + "15,YES" + "16,NO" + "17,1" + "18,0"]))) (testing "Table and Fields exist after sync" (sync/sync-database! (mt/db)) (let [table (t2/select-one Table :db_id (mt/id))] @@ -362,13 +393,14 @@ short-name (subs long-name 0 (- length-limit (count "_yyyyMMddHHmmss")))] (is (pos? length-limit) "driver/table-name-length-limit has been set") (mt/with-empty-db - (upload/load-from-csv - driver/*driver* - (mt/id) - (upload/unique-table-name driver/*driver* long-name) - (csv-file-with ["id,bool" - "1,true" - "2,false"])) + (with-mysql-local-infile-activated + (upload/load-from-csv + driver/*driver* + (mt/id) + (upload/unique-table-name driver/*driver* long-name) + (csv-file-with ["id,bool" + "1,true" + "2,false"]))) (testing "It truncates it to the right number of characters, allowing for the timestamp" (sync/sync-database! (mt/db)) (let [table (t2/select-one Table :db_id (mt/id) :%lower.name [:like (str short-name "%")]) @@ -400,13 +432,14 @@ (testing "Upload a CSV file with duplicate column names" (mt/test-drivers (mt/normal-drivers-with-feature :uploads) (mt/with-empty-db - (upload/load-from-csv - driver/*driver* - (mt/id) - "upload_test" - (csv-file-with ["unknown,unknown,unknown,unknown_2" - "1,Serenity,Malcolm Reynolds,Pistol" - "2,Millennium Falcon, Han Solo,Blaster"])) + (with-mysql-local-infile-activated + (upload/load-from-csv + driver/*driver* + (mt/id) + "upload_test" + (csv-file-with ["unknown,unknown,unknown,unknown_2" + "1,Serenity,Malcolm Reynolds,Pistol" + "2,Millennium Falcon, Han Solo,Blaster"]))) (testing "Table and Fields exist after sync" (sync/sync-database! (mt/db)) (let [table (t2/select-one Table :db_id (mt/id))] @@ -419,13 +452,14 @@ (testing "Upload a CSV file with column names that are reserved by the DB" (mt/test-drivers (mt/normal-drivers-with-feature :uploads) (mt/with-empty-db - (upload/load-from-csv - driver/*driver* - (mt/id) - "upload_test" - (csv-file-with ["id,ship,captain" - "1,Serenity,Malcolm Reynolds" - "2,Millennium Falcon, Han Solo"])) + (with-mysql-local-infile-activated + (upload/load-from-csv + driver/*driver* + (mt/id) + "upload_test" + (csv-file-with ["id,ship,captain" + "1,Serenity,Malcolm Reynolds" + "2,Millennium Falcon, Han Solo"]))) (testing "Table and Fields exist after sync" (sync/sync-database! (mt/db)) (let [table (t2/select-one Table :db_id (mt/id))] @@ -437,16 +471,18 @@ (deftest load-from-csv-failed-test (mt/test-drivers (mt/normal-drivers-with-feature :uploads) (mt/with-empty-db - (testing "Can't upload a CSV with missing values" - (is (thrown-with-msg? - clojure.lang.ExceptionInfo (if (= driver/*driver* :postgres) - #"ERROR: missing data for column \"column_that_doesnt_have_a_value\"" - #"Error executing write query: ") - (upload/load-from-csv - driver/*driver* - (mt/id) - "upload_test" - (csv-file-with ["id,column_that_doesnt_have_a_value" "2"]))))) + (with-mysql-local-infile-activated + (testing "Can't upload a CSV with missing values" + (is (thrown-with-msg? + clojure.lang.ExceptionInfo (condp = driver/*driver* + :postgres #"ERROR: missing data for column \"column_that_doesnt_have_a_value\"" + :mysql #"ERROR: missing data in row \".*\"" + #"Error executing write query: ") + (upload/load-from-csv + driver/*driver* + (mt/id) + "upload_test" + (csv-file-with ["id,column_that_doesnt_have_a_value" "2"])))))) (testing "Check that the table isn't created if the upload fails" (sync/sync-database! (mt/db)) (is (nil? (t2/select-one Table :db_id (mt/id)))))))) @@ -455,13 +491,14 @@ (testing "Upload a CSV file with tabs in the values" (mt/test-drivers (mt/normal-drivers-with-feature :uploads) (mt/with-empty-db - (upload/load-from-csv - driver/*driver* - (mt/id) - "upload_test" - (csv-file-with ["id,ship,captain" - "1,Serenity,Malcolm\tReynolds" - "2,Millennium\tFalcon,Han\tSolo"])) + (with-mysql-local-infile-activated + (upload/load-from-csv + driver/*driver* + (mt/id) + "upload_test" + (csv-file-with ["id,ship,captain" + "1,Serenity,Malcolm\tReynolds" + "2,Millennium\tFalcon,Han\tSolo"]))) (testing "Table and Fields exist after sync" (sync/sync-database! (mt/db)) (let [table (t2/select-one Table :db_id (mt/id))] @@ -477,13 +514,14 @@ (testing "Upload a CSV file with carriage returns in the values" (mt/test-drivers (mt/normal-drivers-with-feature :uploads) (mt/with-empty-db - (upload/load-from-csv - driver/*driver* - (mt/id) - "upload_test" - (csv-file-with ["id,ship,captain" - "1,Serenity,\"Malcolm\rReynolds\"" - "2,\"Millennium\rFalcon\",\"Han\rSolo\""])) + (with-mysql-local-infile-activated + (upload/load-from-csv + driver/*driver* + (mt/id) + "upload_test" + (csv-file-with ["id,ship,captain" + "1,Serenity,\"Malcolm\rReynolds\"" + "2,\"Millennium\rFalcon\",\"Han\rSolo\""]))) (testing "Table and Fields exist after sync" (sync/sync-database! (mt/db)) (let [table (t2/select-one Table :db_id (mt/id))] @@ -499,15 +537,16 @@ (testing "Upload a CSV file with a byte-order mark (BOM)" (mt/test-drivers (mt/normal-drivers-with-feature :uploads) (mt/with-empty-db - (upload/load-from-csv - driver/*driver* - (mt/id) - "upload_test" - (csv-file-with ["id,ship,captain" - "1,Serenity,Malcolm Reynolds" - "2,Millennium Falcon, Han Solo"] - "star-wars" - (partial bom/bom-writer "UTF-8"))) + (with-mysql-local-infile-activated + (upload/load-from-csv + driver/*driver* + (mt/id) + "upload_test" + (csv-file-with ["id,ship,captain" + "1,Serenity,Malcolm Reynolds" + "2,Millennium Falcon, Han Solo"] + "star-wars" + (partial bom/bom-writer "UTF-8")))) (testing "Table and Fields exist after sync" (sync/sync-database! (mt/db)) (let [table (t2/select-one Table :db_id (mt/id))] @@ -520,14 +559,15 @@ (testing "Upload a CSV file with very rude values" (mt/test-drivers (mt/normal-drivers-with-feature :uploads) (mt/with-empty-db - (upload/load-from-csv - driver/*driver* - (mt/id) - "upload_test" - (csv-file-with ["id integer); --,ship,captain" - "1,Serenity,--Malcolm Reynolds" - "2,;Millennium Falcon,Han Solo\""] - "\"; -- Very rude filename")) + (with-mysql-local-infile-activated + (upload/load-from-csv + driver/*driver* + (mt/id) + "upload_test" + (csv-file-with ["id integer); --,ship,captain" + "1,Serenity,--Malcolm Reynolds" + "2,;Millennium Falcon,Han Solo\""] + "\"; -- Very rude filename"))) (testing "Table and Fields exist after sync" (sync/sync-database! (mt/db)) (let [table (t2/select-one Table :db_id (mt/id))] @@ -558,3 +598,36 @@ (testing "Check the data was uploaded into the table correctly" (is (= [["Malcolm"] ["\\."] ["Han"]] (rows-for-table table)))))))))) + +(deftest mysql-settings-test + (testing "Ensure that local_infile is set to true for better MySQL testing" + (mt/test-drivers [:mysql] + (with-mysql-local-infile-activated + (is (= "ON" (-> (sql-jdbc.conn/db->pooled-connection-spec (mt/db)) + (jdbc/query + ["show global variables like 'local_infile'"]) + first + :value))))))) + +(deftest load-from-csv-mysql-slow-way-test + (testing "MySQL upload should work fine with local_infile disabled" + (mt/test-drivers [:mysql] + (mt/with-empty-db + (with-redefs [mysql/get-global-variable (constantly "OFF")] + (upload/load-from-csv + driver/*driver* + (mt/id) + "upload_test" + (csv-file-with ["id,ship,captain" + "1,Serenity,Malcolm Reynolds" + "2,Millennium Falcon,Han Solo"]))) + (testing "Table and Fields exist after sync" + (sync/sync-database! (mt/db)) + (let [table (t2/select-one Table :db_id (mt/id))] + (is (=? {:name #"(?i)upload_test"} table)) + (testing "Check the data was uploaded into the table correctly" + (is (= ["id", "ship", "captain"] + (column-names-for-table table))) + (is (= [[1 "Serenity" "Malcolm Reynolds"] + [2 "Millennium Falcon" "Han Solo"]] + (rows-for-table table)))))))))) -- GitLab