Skip to content
Snippets Groups Projects
Unverified Commit d36c0633 authored by Tim Macdonald's avatar Tim Macdonald Committed by GitHub
Browse files

Tune MySQL and Postgres CSV uploads to use less memory (#33167)

parent 4431d9d4
No related branches found
No related tags found
No related merge requests found
(ns metabase.driver.mysql
"MySQL driver. Builds off of the SQL-JDBC driver."
(:require
[clojure.java.io :as jio]
[clojure.java.jdbc :as jdbc]
[clojure.set :as set]
[clojure.string :as str]
......@@ -656,14 +657,14 @@
(let [temp-file (File/createTempFile table-name ".tsv")
file-path (.getAbsolutePath temp-file)]
(try
(let [tsv (->> values
(map (partial row->tsv (count column-names)))
(str/join "\n"))
sql (sql/format {::load [file-path (keyword table-name)]
:columns (map keyword column-names)}
:quoted true
:dialect (sql.qp/quote-style driver))]
(spit file-path tsv)
(let [tsvs (map (partial row->tsv (count column-names)) values)
sql (sql/format {::load [file-path (keyword table-name)]
:columns (map keyword column-names)}
:quoted true
:dialect (sql.qp/quote-style driver))]
(with-open [^java.io.Writer writer (jio/writer file-path)]
(doseq [value (interpose \newline tsvs)]
(.write writer (str value))))
(qp.writeback/execute-write-sql! db-id sql))
(finally
(.delete temp-file))))))
......@@ -804,12 +804,14 @@
:columns (map keyword column-names)
::from-stdin "''"}
:quoted true
:dialect (sql.qp/quote-style driver))
tsvs (->> values
(map row->tsv)
(str/join "\n")
(StringReader.))]
(.copyIn copy-manager ^String sql tsvs)))))
:dialect (sql.qp/quote-style driver))]
;; There's nothing magic about 100, but it felt good in testing. There could well be a better number.
(doseq [slice-of-values (partition-all 100 values)]
(let [tsvs (->> slice-of-values
(map row->tsv)
(str/join "\n")
(StringReader.))]
(.copyIn copy-manager ^String sql tsvs)))))))
;;; ------------------------------------------------- User Impersonation --------------------------------------------------
......
......@@ -252,17 +252,16 @@
::datetime #(parse-datetime (str/trim %))))
(defn- parsed-rows
"Returns a vector of parsed rows from a `csv-file`.
"Returns a lazy seq of parsed rows from the `reader`.
Replaces empty strings with nil."
[col->upload-type csv-file]
(with-open [reader (io/reader csv-file)]
(let [[header & rows] (csv/read-csv reader)
column-count (count header)
parsers (map upload-type->parser (vals col->upload-type))]
(vec (for [row rows]
(for [[value parser] (map vector (pad column-count row) parsers)]
(when (not (str/blank? value))
(parser value))))))))
[col->upload-type reader]
(let [[header & rows] (csv/read-csv reader)
column-count (count header)
parsers (map upload-type->parser (vals col->upload-type))]
(for [row rows]
(for [[value parser] (map vector (pad column-count row) parsers)]
(when (not (str/blank? value))
(parser value))))))
;;;; +------------------+
;;;; | Public Functions |
......@@ -317,11 +316,12 @@
column-names (keys col->upload-type)]
(driver/create-table! driver db-id table-name col->database-type)
(try
(let [rows (parsed-rows col->upload-type csv-file)]
(driver/insert-into! driver db-id table-name column-names rows)
{:num-rows (count rows)
:num-columns (count column-names)
:size-mb (/ (.length csv-file) 1048576.0)})
(with-open [reader (io/reader csv-file)]
(let [rows (parsed-rows col->upload-type reader)]
(driver/insert-into! driver db-id table-name column-names rows)
{:num-rows (count rows)
:num-columns (count column-names)
:size-mb (/ (.length csv-file) 1048576.0)}))
(catch Throwable e
(driver/drop-table! driver db-id table-name)
(throw (ex-info (ex-message e) {:status-code 400}))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment