Skip to content
Snippets Groups Projects
Unverified Commit ccfca0f4 authored by Tim Macdonald's avatar Tim Macdonald Committed by GitHub
Browse files

Use faster inserts in MySQL (#30820)

* Use faster inserts in MySQL

* Add local_infile check

* PR feedback

* Fix boolean handling for MySQL

* Fix tests; fix MySQL fallback if local_infile is turned off

* Turn local_infile on for MySQL tests

* Another way of doing MySQL local_infile :/

* sanitize-file-name was a misstep. Not needed for security and could cause problems

* Mistaken docstring
parent 239e9992
No related branches found
No related tags found
No related merge requests found
......@@ -5,6 +5,7 @@
[clojure.set :as set]
[clojure.string :as str]
[clojure.walk :as walk]
[honey.sql :as sql]
[java-time :as t]
[metabase.config :as config]
[metabase.db.spec :as mdb.spec]
......@@ -26,12 +27,14 @@
[metabase.query-processor.store :as qp.store]
[metabase.query-processor.timezone :as qp.timezone]
[metabase.query-processor.util.add-alias-info :as add]
[metabase.query-processor.writeback :as qp.writeback]
[metabase.upload :as upload]
[metabase.util :as u]
[metabase.util.honey-sql-2 :as h2x]
[metabase.util.i18n :refer [deferred-tru trs]]
[metabase.util.log :as log])
(:import
(java.io File)
(java.sql DatabaseMetaData ResultSet ResultSetMetaData Types)
(java.time LocalDateTime OffsetDateTime OffsetTime ZonedDateTime)))
......@@ -617,3 +620,65 @@
[_driver]
;; https://dev.mysql.com/doc/refman/8.0/en/identifier-length.html
64)
(defn- format-load
[_clause [file-path table-name]]
[(format "LOAD DATA LOCAL INFILE '%s' INTO TABLE %s" file-path (sql/format-entity table-name))])
(sql/register-clause! ::load format-load :insert-into)
(defn- sanitize-value
;; Per https://dev.mysql.com/doc/refman/8.0/en/load-data.html#load-data-field-line-handling
;; Backslash is the MySQL escape character within strings in SQL statements. Thus, to specify a literal backslash,
;; you must specify two backslashes for the value to be interpreted as a single backslash. The escape sequences
;; '\t' and '\n' specify tab and newline characters, respectively.
[v]
(cond
(string? v)
(str/replace v #"\\|\n|\r|\t" {"\\" "\\\\"
"\n" "\\n"
"\r" "\\r"
"\t" "\\t"})
(boolean? v)
(if v 1 0)
:else
v))
(defn- row->tsv
[column-count row]
(when (not= column-count (count row))
(throw (Exception. (format "ERROR: missing data in row \"%s\"" (str/join "," row)))))
(->> row
(map sanitize-value)
(str/join "\t")))
(defn- get-global-variable
"The value of the given global variable in the DB. Does not do any type coercion, so, e.g., booleans come back as
\"ON\" and \"OFF\"."
[db-id var-name]
(:value
(first
(jdbc/query (sql-jdbc.conn/db->pooled-connection-spec db-id)
["show global variables like ?" var-name]))))
(defmethod driver/insert-into :mysql
[driver db-id ^String table-name column-names values]
;; `local_infile` must be turned on per
;; https://dev.mysql.com/doc/refman/8.0/en/load-data.html#load-data-local
(if (not= (get-global-variable db-id "local_infile") "ON")
;; If it isn't turned on, fall back to the generic "INSERT INTO ..." way
((get-method driver/insert-into :sql-jdbc) driver db-id table-name column-names values)
(let [temp-file (File/createTempFile table-name ".tsv")
file-path (.getAbsolutePath temp-file)]
(try
(let [tsv (->> values
(map (partial row->tsv (count column-names)))
(str/join "\n"))
sql (sql/format {::load [file-path (keyword table-name)]
:columns (map keyword column-names)}
:quoted true
:dialect (sql.qp/quote-style driver))]
(spit file-path tsv)
(qp.writeback/execute-write-sql! db-id sql))
(finally
(.delete temp-file))))))
......@@ -2,9 +2,12 @@
(:require
[clj-bom.core :as bom]
[clojure.java.io :as io]
[clojure.java.jdbc :as jdbc]
[clojure.string :as str]
[clojure.test :refer :all]
[metabase.driver :as driver]
[metabase.driver.mysql :as mysql]
[metabase.driver.sql-jdbc.connection :as sql-jdbc.conn]
[metabase.models :refer [Field Table]]
[metabase.query-processor :as qp]
[metabase.sync :as sync]
......@@ -25,6 +28,31 @@
(def datetime-type :metabase.upload/datetime)
(def text-type :metabase.upload/text)
(defn- do-with-mysql-local-infile-activated
"Helper for [[with-mysql-local-infile-activated]]"
[thunk]
(if (or
(not= :mysql driver/*driver*)
(= "ON" (-> (sql-jdbc.conn/db->pooled-connection-spec (mt/db))
(jdbc/query
["show global variables like 'local_infile'"])
first
:value)))
(thunk)
(let [conn-spec (sql-jdbc.conn/db->pooled-connection-spec (mt/db))]
(try
(jdbc/query conn-spec
"set global local_infile = 1")
(thunk)
(finally
(jdbc/query conn-spec
"set global local_infile = 0"))))))
(defmacro with-mysql-local-infile-activated
"Turn on local_infile for MySQL"
[& body]
`(do-with-mysql-local-infile-activated (fn [] ~@body)))
(deftest type-detection-and-parse-test
(doseq [[string-value expected-value expected-type seps]
[["0.0" 0 float-type "."]
......@@ -254,13 +282,14 @@
(testing "Upload a CSV file"
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
(mt/with-empty-db
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["id ,nulls,string ,bool ,number ,date ,datetime"
"2\t ,, a ,true ,1.1\t ,2022-01-01,2022-01-01T00:00:00"
"\" 3\",, b,false,\"$ 1,000.1\",2022-02-01,2022-02-01T00:00:00"]))
(with-mysql-local-infile-activated
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["id ,nulls,string ,bool ,number ,date ,datetime"
"2\t ,, a ,true ,1.1\t ,2022-01-01,2022-01-01T00:00:00"
"\" 3\",, b,false,\"$ 1,000.1\",2022-02-01,2022-02-01T00:00:00"])))
(testing "Table and Fields exist after sync"
(sync/sync-database! (mt/db))
(let [table (t2/select-one Table :db_id (mt/id))]
......@@ -297,13 +326,14 @@
(testing "Upload a CSV file with a datetime column"
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
(mt/with-empty-db
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["datetime"
"2022-01-01"
"2022-01-01T00:00:00"]))
(with-mysql-local-infile-activated
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["datetime"
"2022-01-01"
"2022-01-01T00:00:00"])))
(testing "Fields exists after sync"
(sync/sync-database! (mt/db))
(let [table (t2/select-one Table :db_id (mt/id))]
......@@ -318,29 +348,30 @@
(testing "Upload a CSV file"
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
(mt/with-empty-db
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["id,bool"
"1,true"
"2,false"
"3,TRUE"
"4,FALSE"
"5,t "
"6, f"
"7,\tT"
"8,F\t"
"9,y"
"10,n"
"11,Y"
"12,N"
"13,yes"
"14,no"
"15,YES"
"16,NO"
"17,1"
"18,0"]))
(with-mysql-local-infile-activated
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["id,bool"
"1,true"
"2,false"
"3,TRUE"
"4,FALSE"
"5,t "
"6, f"
"7,\tT"
"8,F\t"
"9,y"
"10,n"
"11,Y"
"12,N"
"13,yes"
"14,no"
"15,YES"
"16,NO"
"17,1"
"18,0"])))
(testing "Table and Fields exist after sync"
(sync/sync-database! (mt/db))
(let [table (t2/select-one Table :db_id (mt/id))]
......@@ -362,13 +393,14 @@
short-name (subs long-name 0 (- length-limit (count "_yyyyMMddHHmmss")))]
(is (pos? length-limit) "driver/table-name-length-limit has been set")
(mt/with-empty-db
(upload/load-from-csv
driver/*driver*
(mt/id)
(upload/unique-table-name driver/*driver* long-name)
(csv-file-with ["id,bool"
"1,true"
"2,false"]))
(with-mysql-local-infile-activated
(upload/load-from-csv
driver/*driver*
(mt/id)
(upload/unique-table-name driver/*driver* long-name)
(csv-file-with ["id,bool"
"1,true"
"2,false"])))
(testing "It truncates it to the right number of characters, allowing for the timestamp"
(sync/sync-database! (mt/db))
(let [table (t2/select-one Table :db_id (mt/id) :%lower.name [:like (str short-name "%")])
......@@ -400,13 +432,14 @@
(testing "Upload a CSV file with duplicate column names"
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
(mt/with-empty-db
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["unknown,unknown,unknown,unknown_2"
"1,Serenity,Malcolm Reynolds,Pistol"
"2,Millennium Falcon, Han Solo,Blaster"]))
(with-mysql-local-infile-activated
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["unknown,unknown,unknown,unknown_2"
"1,Serenity,Malcolm Reynolds,Pistol"
"2,Millennium Falcon, Han Solo,Blaster"])))
(testing "Table and Fields exist after sync"
(sync/sync-database! (mt/db))
(let [table (t2/select-one Table :db_id (mt/id))]
......@@ -419,13 +452,14 @@
(testing "Upload a CSV file with column names that are reserved by the DB"
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
(mt/with-empty-db
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["id,ship,captain"
"1,Serenity,Malcolm Reynolds"
"2,Millennium Falcon, Han Solo"]))
(with-mysql-local-infile-activated
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["id,ship,captain"
"1,Serenity,Malcolm Reynolds"
"2,Millennium Falcon, Han Solo"])))
(testing "Table and Fields exist after sync"
(sync/sync-database! (mt/db))
(let [table (t2/select-one Table :db_id (mt/id))]
......@@ -437,16 +471,18 @@
(deftest load-from-csv-failed-test
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
(mt/with-empty-db
(testing "Can't upload a CSV with missing values"
(is (thrown-with-msg?
clojure.lang.ExceptionInfo (if (= driver/*driver* :postgres)
#"ERROR: missing data for column \"column_that_doesnt_have_a_value\""
#"Error executing write query: ")
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["id,column_that_doesnt_have_a_value" "2"])))))
(with-mysql-local-infile-activated
(testing "Can't upload a CSV with missing values"
(is (thrown-with-msg?
clojure.lang.ExceptionInfo (condp = driver/*driver*
:postgres #"ERROR: missing data for column \"column_that_doesnt_have_a_value\""
:mysql #"ERROR: missing data in row \".*\""
#"Error executing write query: ")
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["id,column_that_doesnt_have_a_value" "2"]))))))
(testing "Check that the table isn't created if the upload fails"
(sync/sync-database! (mt/db))
(is (nil? (t2/select-one Table :db_id (mt/id))))))))
......@@ -455,13 +491,14 @@
(testing "Upload a CSV file with tabs in the values"
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
(mt/with-empty-db
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["id,ship,captain"
"1,Serenity,Malcolm\tReynolds"
"2,Millennium\tFalcon,Han\tSolo"]))
(with-mysql-local-infile-activated
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["id,ship,captain"
"1,Serenity,Malcolm\tReynolds"
"2,Millennium\tFalcon,Han\tSolo"])))
(testing "Table and Fields exist after sync"
(sync/sync-database! (mt/db))
(let [table (t2/select-one Table :db_id (mt/id))]
......@@ -477,13 +514,14 @@
(testing "Upload a CSV file with carriage returns in the values"
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
(mt/with-empty-db
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["id,ship,captain"
"1,Serenity,\"Malcolm\rReynolds\""
"2,\"Millennium\rFalcon\",\"Han\rSolo\""]))
(with-mysql-local-infile-activated
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["id,ship,captain"
"1,Serenity,\"Malcolm\rReynolds\""
"2,\"Millennium\rFalcon\",\"Han\rSolo\""])))
(testing "Table and Fields exist after sync"
(sync/sync-database! (mt/db))
(let [table (t2/select-one Table :db_id (mt/id))]
......@@ -499,15 +537,16 @@
(testing "Upload a CSV file with a byte-order mark (BOM)"
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
(mt/with-empty-db
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["id,ship,captain"
"1,Serenity,Malcolm Reynolds"
"2,Millennium Falcon, Han Solo"]
"star-wars"
(partial bom/bom-writer "UTF-8")))
(with-mysql-local-infile-activated
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["id,ship,captain"
"1,Serenity,Malcolm Reynolds"
"2,Millennium Falcon, Han Solo"]
"star-wars"
(partial bom/bom-writer "UTF-8"))))
(testing "Table and Fields exist after sync"
(sync/sync-database! (mt/db))
(let [table (t2/select-one Table :db_id (mt/id))]
......@@ -520,14 +559,15 @@
(testing "Upload a CSV file with very rude values"
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
(mt/with-empty-db
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["id integer); --,ship,captain"
"1,Serenity,--Malcolm Reynolds"
"2,;Millennium Falcon,Han Solo\""]
"\"; -- Very rude filename"))
(with-mysql-local-infile-activated
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["id integer); --,ship,captain"
"1,Serenity,--Malcolm Reynolds"
"2,;Millennium Falcon,Han Solo\""]
"\"; -- Very rude filename")))
(testing "Table and Fields exist after sync"
(sync/sync-database! (mt/db))
(let [table (t2/select-one Table :db_id (mt/id))]
......@@ -558,3 +598,36 @@
(testing "Check the data was uploaded into the table correctly"
(is (= [["Malcolm"] ["\\."] ["Han"]]
(rows-for-table table))))))))))
(deftest mysql-settings-test
(testing "Ensure that local_infile is set to true for better MySQL testing"
(mt/test-drivers [:mysql]
(with-mysql-local-infile-activated
(is (= "ON" (-> (sql-jdbc.conn/db->pooled-connection-spec (mt/db))
(jdbc/query
["show global variables like 'local_infile'"])
first
:value)))))))
(deftest load-from-csv-mysql-slow-way-test
(testing "MySQL upload should work fine with local_infile disabled"
(mt/test-drivers [:mysql]
(mt/with-empty-db
(with-redefs [mysql/get-global-variable (constantly "OFF")]
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["id,ship,captain"
"1,Serenity,Malcolm Reynolds"
"2,Millennium Falcon,Han Solo"])))
(testing "Table and Fields exist after sync"
(sync/sync-database! (mt/db))
(let [table (t2/select-one Table :db_id (mt/id))]
(is (=? {:name #"(?i)upload_test"} table))
(testing "Check the data was uploaded into the table correctly"
(is (= ["id", "ship", "captain"]
(column-names-for-table table)))
(is (= [[1 "Serenity" "Malcolm Reynolds"]
[2 "Millennium Falcon" "Han Solo"]]
(rows-for-table table))))))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment