Skip to content
Snippets Groups Projects
Unverified Commit 7fdacb7e authored by Cam Saul's avatar Cam Saul Committed by GitHub
Browse files

Very fast vertica data loading (#14484)

* Remove some commas in arglists

* Clarify dox and add some arglists metadata

* EXTREMELY FAST VERTICA DATA LOADING

* Improved `u/profile` macro

* Test fixes :wrench:

* Test fix :wrench:

* Fix occasional test failures in metabase.api.dataset-test
parent 02c7621e
No related branches found
No related tags found
No related merge requests found
......@@ -28,24 +28,24 @@
(defmethod sql-jdbc.sync/database-type->base-type :vertica
[_ database-type]
({:Boolean :type/Boolean
:Integer :type/Integer
:Bigint :type/BigInteger
:Varbinary :type/*
:Binary :type/*
:Char :type/Text
:Varchar :type/Text
:Money :type/Decimal
:Numeric :type/Decimal
:Double :type/Decimal
:Float :type/Float
:Date :type/Date
:Time :type/Time
:TimeTz :type/TimeWithLocalTZ
:Timestamp :type/DateTime
:TimestampTz :type/DateTimeWithLocalTZ
:AUTO_INCREMENT :type/Integer
(keyword "Long Varchar") :type/Text
(keyword "Long Varbinary") :type/*} database-type))
:Integer :type/Integer
:Bigint :type/BigInteger
:Varbinary :type/*
:Binary :type/*
:Char :type/Text
:Varchar :type/Text
:Money :type/Decimal
:Numeric :type/Decimal
:Double :type/Decimal
:Float :type/Float
:Date :type/Date
:Time :type/Time
:TimeTz :type/TimeWithLocalTZ
:Timestamp :type/DateTime
:TimestampTz :type/DateTimeWithLocalTZ
:AUTO_INCREMENT :type/Integer
(keyword "Long Varchar") :type/Text
(keyword "Long Varbinary") :type/*} database-type))
(defmethod sql-jdbc.conn/connection-details->spec :vertica
[_ {:keys [host port db dbname]
......
(ns metabase.test.data.vertica
"Code for creating / destroying a Vertica database from a `DatabaseDefinition`."
(:require [clojure.java.jdbc :as jdbc]
[colorize.core :as colorize]
(:require [clojure.data.csv :as csv]
[clojure.java.jdbc :as jdbc]
[clojure.string :as str]
[java-time :as t]
[medley.core :as m]
[metabase.driver.sql-jdbc.connection :as sql-jdbc.conn]
[metabase.test.data.interface :as tx]
[metabase.test.data.sql :as sql.tx]
[metabase.test.data.sql-jdbc :as sql-jdbc.tx]
[metabase.test.data.sql-jdbc.execute :as execute]
[metabase.test.data.sql-jdbc.load-data :as load-data]))
[metabase.test.data.sql-jdbc.load-data :as load-data]
[metabase.util :as u]
[metabase.util.files :as files]))
(sql-jdbc.tx/add-test-extensions! :vertica)
......@@ -54,28 +59,94 @@
(defn- dbspec []
(sql-jdbc.conn/connection-details->spec :vertica @db-connection-details))
(defn- do-with-retries
"Attempt to execute `thunk` up to `num-retries` times. If it throws an Exception, execute `on-fail` and try again if
any retries remain."
[thunk on-fail num-retries]
(if-not (pos? num-retries)
(thunk)
(try
(thunk)
(catch Throwable e
(on-fail)
(do-with-retries thunk on-fail (dec num-retries))))))
;; TODO = explain...
(defmulti ^:private value->csv
{:arglists '([value])}
class)
(defmethod value->csv :default
[v]
(str v))
(defmethod value->csv java.time.ZonedDateTime
[t]
(value->csv (t/offset-date-time t)))
(defmethod value->csv String
[s]
;; escape commas
(str/escape s {\, "\\,"}))
(defmethod value->csv honeysql.types.SqlCall
[call]
(throw (ex-info "Cannot insert rows containing HoneySQL calls: insert the appropriate raw value instead"
{:call call})))
(defn- dump-table-rows-to-csv!
"Dump a sequence of rows (as vectors) to a CSV file."
[{:keys [field-definitions rows]} ^String filename]
(try
(let [column-names (cons "id" (mapv :field-name field-definitions))
rows-with-id (for [[i row] (m/indexed rows)]
(cons (inc i) (for [v row]
(value->csv v))))
csv-rows (cons column-names rows-with-id)]
(try
(with-open [writer (java.io.FileWriter. (java.io.File. filename))]
(csv/write-csv writer csv-rows))
(catch Throwable e
(throw (ex-info "Error writing rows to CSV" {:rows (take 10 csv-rows)} e)))))
(catch Throwable e
(throw (ex-info "Error dumping rows to CSV" {:filename filename} e)))))
(defn- load-rows-from-csv!
"Load rows from a CSV file into a Table."
[driver {:keys [database-name], :as dbdef} {:keys [table-name rows], :as tabledef} filename]
(let [table-identifier (sql.tx/qualify-and-quote :vertica database-name table-name)]
(with-open [conn (jdbc/get-connection (dbspec))]
(letfn [(execute! [sql]
(try
(jdbc/execute! {:connection conn} sql)
(catch Throwable e
(throw (ex-info "Error executing SQL" {:sql sql, :spec (dbspec)} e)))))
(actual-rows []
(u/ignore-exceptions
(jdbc/query {:connection conn}
(format "SELECT * FROM %s ORDER BY id ASC;" table-identifier))))]
(try
;; make sure the Table is empty
(execute! (format "TRUNCATE TABLE %s" table-identifier))
;; load the rows from the CSV file
(let [[num-rows-inserted] (execute! (format "COPY %s FROM LOCAL '%s' DELIMITER ','"
table-identifier
filename))]
;; it should return the number of rows inserted; make sure this matches what we expected
(when-not (= num-rows-inserted (count rows))
(throw (ex-info (format "Expected %d rows to be inserted, but only %d were" (count rows) num-rows-inserted)
{:inserted-rows (take 100 (actual-rows))}))))
;; make sure SELECT COUNT(*) matches as well
(let [[{actual-num-rows :count}] (jdbc/query {:connection conn}
(format "SELECT count(*) FROM %s;" table-identifier))]
(when-not (= actual-num-rows (count rows))
(throw (ex-info (format "Expected count(*) to return %d, but only got" (count rows) actual-num-rows)
{:inserted-rows (take 100 (actual-rows))}))))
;; success!
:ok
(catch Throwable e
(throw (ex-info "Error loading rows from CSV file"
{:filename filename
:rows (take 10 (str/split-lines (slurp filename)))}
e))))))))
(defmethod load-data/load-data! :vertica
[driver {:keys [database-name], :as dbdef} {:keys [table-name], :as tabledef}]
;; try a few times to load the data, Vertica is very fussy and it doesn't always work the first time
(do-with-retries
#(load-data/load-data-one-at-a-time-add-ids! driver dbdef tabledef)
(fn []
(println (colorize/red "\n\nVertica failed to load data, let's try again...\n\n"))
(let [sql (format "TRUNCATE TABLE %s" (sql.tx/qualify-and-quote :vertica database-name table-name))]
(jdbc/execute! (dbspec) sql)))
5))
[driver dbdef {:keys [rows], :as tabledef}]
(try
(let [filename (str (files/get-path (System/getProperty "java.io.tmpdir") "vertica-rows.csv"))]
(dump-table-rows-to-csv! tabledef filename)
(load-rows-from-csv! driver dbdef tabledef filename))
(catch Throwable e
(throw (ex-info "Error loading rows" {:rows (take 10 rows)} e)))))
(defmethod sql.tx/pk-sql-type :vertica [& _] "INTEGER")
......@@ -84,7 +155,6 @@
(defmethod tx/has-questionable-timezone-support? :vertica [_] true)
(defmethod tx/before-run :vertica
[_]
;; Close all existing sessions connected to our test DB
......@@ -92,17 +162,6 @@
;; Increase the connection limit; the default is 5 or so which causes tests to fail when too many connections are made
(jdbc/execute! (dbspec) (format "ALTER DATABASE \"%s\" SET MaxClientSessions = 1000;" (db-name))))
(defmethod tx/create-db! :vertica
[driver dbdef & options]
;; try a few times to create the DB. Vertica is very fussy and sometimes you need to try a few times to get it to
;; work correctly.
(do-with-retries
#(apply (get-method tx/create-db! :sql-jdbc/test-extensions) driver dbdef options)
(fn []
(println (colorize/red "\n\nVertica failed to create a DB, again. Let's try again...\n\n"))
(jdbc/query (dbspec) "SELECT CLOSE_ALL_SESSIONS();"))
5))
(defmethod tx/aggregate-column-info :vertica
([driver ag-type]
(merge
......
......@@ -795,18 +795,45 @@
^String [seconds]
(format-milliseconds (* 1000.0 seconds)))
(def ^:dynamic *profile-level*
"Impl for `profile` macro -- don't use this directly. Nesting-level for the `profile` macro e.g. 0 for a top-level
`profile` form or 1 for a form inside that."
0)
(defn profile-print-time
"Impl for `profile` macro -- don't use this directly. Prints the `___ took ___` message at the conclusion of a
`profile`d form."
[message start-time]
;; indent the message according to `*profile-level*` and add a little down-left arrow so it (hopefully) points to
;; the parent form
(println (format-color :green "%s%s took %s"
(if (pos? *profile-level*)
(str (str/join (repeat (dec *profile-level*) " ")) " ↙ ")
"")
message
(format-nanoseconds (- (System/nanoTime) start-time)))))
(defmacro profile
"Like `clojure.core/time`, but lets you specify a `message` that gets printed with the total time, and formats the
time nicely using `format-nanoseconds`."
"Like `clojure.core/time`, but lets you specify a `message` that gets printed with the total time, formats the
time nicely using `format-nanoseconds`, and indents nested calls to `profile`.
(profile \"top-level\"
(Thread/sleep 500)
(profile \"nested\"
(Thread/sleep 100)))
;; ->
↙ nested took 100.1 ms
top-level took 602.8 ms"
{:style/indent 1}
([form]
`(profile ~(str form) ~form))
([message & body]
`(let [start-time# (System/nanoTime)]
(prog1 (do ~@body)
(println (format-color '~'green "%s took %s"
~message
(format-nanoseconds (- (System/nanoTime) start-time#))))))))
`(let [message# ~message
start-time# (System/nanoTime)
result# (binding [*profile-level* (inc *profile-level*)]
~@body)]
(profile-print-time message# start-time#)
result#)))
(defn seconds->ms
"Convert `seconds` to milliseconds. More readable than doing this math inline."
......
......@@ -18,7 +18,7 @@
;;; --------------------------------------------------- Path Utils ---------------------------------------------------
(defn- get-path-in-filesystem ^Path [^FileSystem filesystem, ^String path-component & more-components]
(defn- get-path-in-filesystem ^Path [^FileSystem filesystem ^String path-component & more-components]
(.getPath filesystem path-component (u/varargs String more-components)))
(defn get-path
......
......@@ -16,11 +16,11 @@
[metabase.models.query-execution :refer [QueryExecution]]
[metabase.query-processor-test :as qp.test]
[metabase.query-processor.middleware.constraints :as constraints]
[metabase.query-processor.util :as qp-util]
[metabase.test :as mt]
[metabase.test.data.dataset-definitions :as defs]
[metabase.test.data.users :as test-users]
[metabase.test.fixtures :as fixtures]
[metabase.test.util :as tu]
[metabase.util :as u]
[schema.core :as s]
[toucan.db :as db])
......@@ -47,8 +47,10 @@
:else
[k v]))))
(defn- most-recent-query-execution []
(db/select-one QueryExecution {:order-by [[:started_at :desc]]}))
(defn- most-recent-query-execution-for-query [query]
(db/select-one QueryExecution
:hash (qp-util/query-hash query)
{:order-by [[:started_at :desc]]}))
(def ^:private query-defaults
{:middleware {:add-default-userland-constraints? true
......@@ -57,11 +59,12 @@
(deftest basic-query-test
(testing "POST /api/dataset"
(testing "\nJust a basic sanity check to make sure Query Processor endpoint is still working correctly."
(let [result ((mt/user->client :rasta) :post 202 "dataset" (mt/mbql-query checkins
{:aggregation [[:count]]}))]
(let [query (mt/mbql-query checkins
{:aggregation [[:count]]})
result (mt/user-http-request :rasta :post 202 "dataset" query)]
(testing "\nAPI Response"
(is (= {:data {:rows [[1000]]
:cols [(tu/obj->json->obj (qp.test/aggregate-col :count))]
:cols [(mt/obj->json->obj (qp.test/aggregate-col :count))]
:native_form true
:results_timezone "UTC"}
:row_count 1
......@@ -92,7 +95,7 @@
:database_id (mt/id)
:started_at true
:running_time true}
(format-response (most-recent-query-execution)))))))))
(format-response (most-recent-query-execution-for-query query)))))))))
(deftest failure-test
;; clear out recent query executions!
......@@ -107,10 +110,11 @@
error-message
(re-find #"Syntax error in SQL statement")
boolean))))
query {:database (mt/id)
:type "native"
:native {:query "foobar"}}
result (mt/suppress-output
((mt/user->client :rasta) :post 202 "dataset" {:database (mt/id)
:type "native"
:native {:query "foobar"}}))]
(mt/user-http-request :rasta :post 202 "dataset" query))]
(testing "\nAPI Response"
(is (= {:data {:rows []
:cols []}
......@@ -144,7 +148,7 @@
:pulse_id nil
:card_id nil
:dashboard_id nil}
(check-error-message (format-response (most-recent-query-execution))))))))))
(check-error-message (format-response (most-recent-query-execution-for-query query))))))))))
;;; Make sure that we're piggybacking off of the JSON encoding logic when encoding strange values in XLSX (#5145,
......@@ -184,8 +188,8 @@
["3" "2014-09-15" "8" "56"]
["4" "2014-03-11" "5" "4"]
["5" "2013-05-05" "3" "49"]]
(let [result ((mt/user->client :rasta) :post 200 "dataset/csv" :query
(json/generate-string (mt/mbql-query checkins)))]
(let [result (mt/user-http-request :rasta :post 200 "dataset/csv" :query
(json/generate-string (mt/mbql-query checkins)))]
(take 5 (parse-and-sort-csv result))))))
(deftest download-response-headers-test
......@@ -205,8 +209,8 @@
(deftest check-an-empty-date-column
(mt/dataset defs/test-data-with-null-date-checkins
(let [result ((mt/user->client :rasta) :post 200 "dataset/csv" :query
(json/generate-string (mt/mbql-query checkins)))]
(let [result (mt/user-http-request :rasta :post 200 "dataset/csv" :query
(json/generate-string (mt/mbql-query checkins)))]
(is (= [["1" "2014-04-07" "" "5" "12"]
["2" "2014-09-18" "" "1" "31"]
["3" "2014-09-15" "" "8" "56"]
......@@ -217,8 +221,8 @@
(deftest sqlite-datetime-test
(mt/test-driver :sqlite
(testing "SQLite doesn't return proper date objects but strings, they just pass through the qp untouched"
(let [result ((mt/user->client :rasta) :post 200 "dataset/csv" :query
(json/generate-string (mt/mbql-query checkins {:order-by [[:asc $id]], :limit 5})))]
(let [result (mt/user-http-request :rasta :post 200 "dataset/csv" :query
(json/generate-string (mt/mbql-query checkins {:order-by [[:asc $id]], :limit 5})))]
(is (= [["1" "2014-04-07" "5" "12"]
["2" "2014-09-18" "1" "31"]
["3" "2014-09-15" "8" "56"]
......@@ -227,8 +231,8 @@
(parse-and-sort-csv result)))))))
(deftest datetime-fields-are-untouched-when-exported
(let [result ((mt/user->client :rasta) :post 200 "dataset/csv" :query
(json/generate-string (mt/mbql-query users {:order-by [[:asc $id]], :limit 5})))]
(let [result (mt/user-http-request :rasta :post 200 "dataset/csv" :query
(json/generate-string (mt/mbql-query users {:order-by [[:asc $id]], :limit 5})))]
(is (= [["1" "Plato Yeshua" "2014-04-01T08:30:00"]
["2" "Felipinho Asklepios" "2014-12-05T15:15:00"]
["3" "Kaneonuskatew Eiran" "2014-11-06T16:15:00"]
......@@ -240,11 +244,11 @@
(mt/with-temp Card [card {:dataset_query {:database (mt/id)
:type :native
:native {:query "SELECT * FROM USERS;"}}}]
(let [result ((mt/user->client :rasta) :post 200 "dataset/csv"
:query (json/generate-string
{:database mbql.s/saved-questions-virtual-database-id
:type :query
:query {:source-table (str "card__" (u/get-id card))}}))]
(let [result (mt/user-http-request :rasta :post 200 "dataset/csv"
:query (json/generate-string
{:database mbql.s/saved-questions-virtual-database-id
:type :query
:query {:source-table (str "card__" (u/get-id card))}}))]
(is (some? result))
(when (some? result)
(is (= 16
......@@ -257,14 +261,14 @@
;; from one that had it -- see #9831)
(deftest formatted-results-ignore-query-constraints
(with-redefs [constraints/default-query-constraints {:max-results 10, :max-results-bare-rows 10}]
(let [result ((mt/user->client :rasta) :post 200 "dataset/csv"
:query (json/generate-string
{:database (mt/id)
:type :query
:query {:source-table (mt/id :venues)}
:middleware
{:add-default-userland-constraints? true
:userland-query? true}}))]
(let [result (mt/user-http-request :rasta :post 200 "dataset/csv"
:query (json/generate-string
{:database (mt/id)
:type :query
:query {:source-table (mt/id :venues)}
:middleware
{:add-default-userland-constraints? true
:userland-query? true}}))]
(is (some? result))
(when (some? result)
(is (= 101
......@@ -275,10 +279,10 @@
(deftest non--download--queries-should-still-get-the-default-constraints
(with-redefs [constraints/default-query-constraints {:max-results 10, :max-results-bare-rows 10}]
(let [{row-count :row_count, :as result}
((mt/user->client :rasta) :post 202 "dataset"
{:database (mt/id)
:type :query
:query {:source-table (mt/id :venues)}})]
(mt/user-http-request :rasta :post 202 "dataset"
{:database (mt/id)
:type :query
:query {:source-table (mt/id :venues)}})]
(is (= 10
(or row-count result))))))
......@@ -293,8 +297,8 @@
:error (s/eq "You do not have permissions to run this query.")
s/Keyword s/Any}
(mt/suppress-output
((mt/user->client :rasta) :post "dataset"
(mt/mbql-query venues {:limit 1}))))))))
(mt/user-http-request :rasta :post "dataset"
(mt/mbql-query venues {:limit 1}))))))))
(deftest query->native-test
(testing "POST /api/dataset/native"
......@@ -303,9 +307,9 @@
"FROM \"PUBLIC\".\"VENUES\" "
"LIMIT 1048576")
:params nil}
((mt/user->client :rasta) :post 200 "dataset/native"
(mt/mbql-query venues
{:fields [$id $name]}))))
(mt/user-http-request :rasta :post 200 "dataset/native"
(mt/mbql-query venues
{:fields [$id $name]}))))
(testing "\nMake sure parameters are spliced correctly"
(is (= {:query (str "SELECT \"PUBLIC\".\"CHECKINS\".\"ID\" AS \"ID\" FROM \"PUBLIC\".\"CHECKINS\" "
......@@ -313,10 +317,10 @@
" AND \"PUBLIC\".\"CHECKINS\".\"DATE\" < timestamp with time zone '2015-11-14 00:00:00.000Z') "
"LIMIT 1048576")
:params nil}
((mt/user->client :rasta) :post 200 "dataset/native"
(mt/mbql-query checkins
{:fields [$id]
:filter [:= $date "2015-11-13"]})))))
(mt/user-http-request :rasta :post 200 "dataset/native"
(mt/mbql-query checkins
{:fields [$id]
:filter [:= $date "2015-11-13"]})))))
(testing "\nshould require that the user have ad-hoc native perms for the DB"
(mt/suppress-output
......@@ -327,16 +331,16 @@
(is (schema= {:permissions-error? (s/eq true)
:message (s/eq "You do not have permissions to run this query.")
s/Any s/Any}
((mt/user->client :rasta) :post "dataset/native"
(mt/mbql-query venues
{:fields [$id $name]}))))))))))
(mt/user-http-request :rasta :post "dataset/native"
(mt/mbql-query venues
{:fields [$id $name]}))))))))))
(deftest report-timezone-test
(mt/test-driver :postgres
(testing "expected (desired) and actual timezone should be returned as part of query results"
(mt/with-temporary-setting-values [report-timezone "US/Pacific"]
(let [results ((mt/user->client :rasta) :post 202 "dataset" (mt/mbql-query checkins
{:aggregation [[:count]]}))]
(let [results (mt/user-http-request :rasta :post 202 "dataset" (mt/mbql-query checkins
{:aggregation [[:count]]}))]
(is (= {:requested_timezone "US/Pacific"
:results_timezone "US/Pacific"}
(-> results
......
......@@ -843,8 +843,11 @@
;; generating Java classes here so they'll be in the DB's native timezone. Some DBs refuse to use
;; the same timezone we're running the tests from *cough* SQL Server *cough*
[(u/prog1 (if (and (isa? driver/hierarchy driver/*driver* :sql)
;; BigQuery doesn't insert rows using SQL statements
(not= driver/*driver* :bigquery))
;; BigQuery/Vertica don't insert rows using SQL statements
;;
;; TODO -- make 'insert-rows-using-statements?` a multimethod so we don't need to
;; hardcode the whitelist here.
(not (#{:vertica :bigquery} driver/*driver*)))
(sql.qp/add-interval-honeysql-form driver/*driver*
(sql.qp/current-datetime-honeysql-form driver/*driver*)
(* i interval-seconds)
......@@ -865,7 +868,7 @@
(def ^:private ^:dynamic *recreate-db-if-stale?* true)
(defn- count-of-grouping [^TimestampDatasetDef dataset, field-grouping & relative-datetime-args]
(defn- count-of-grouping [^TimestampDatasetDef dataset field-grouping & relative-datetime-args]
(-> (mt/dataset dataset
;; DB has values in the range of now() - (interval-seconds * 15) and now() + (interval-seconds * 15). So if it
;; was created more than (interval-seconds * 5) seconds ago, delete the Database and recreate it to make sure
......
......@@ -191,7 +191,7 @@
"Get the ID of the current database or one of its Tables or Fields. Relies on the dynamic variable `*get-db*`, which
can be rebound with `with-db`."
([]
(u/get-id (db)))
(u/the-id (db)))
([table-name]
(impl/the-table-id (id) (format-name table-name)))
......
......@@ -93,7 +93,7 @@
`table-identifier`. Default implementation simply converts SQL generated by `insert-rows-honeysql-form` into SQL
with `hsql/format`; in most cases you should only need to override that method. Override this instead if you do not
want to use HoneySQL to generate the `INSERT` statement."
{:arglists '([driver, ^metabase.util.honeysql_extensions.Identifier table-identifier, row-or-rows])}
{:arglists '([driver ^metabase.util.honeysql_extensions.Identifier table-identifier row-or-rows])}
tx/dispatch-on-driver-with-test-extensions
:hierarchy #'driver/hierarchy)
......
......@@ -17,10 +17,10 @@
(:import java.sql.SQLException))
(defmulti load-data!
"Load the rows for a specific table into a DB. `load-data-chunked!` is the default implementation (see below); several
other implementations like `load-data-all-at-once!` and `load-data-one-at-a-time!` are already defined; see below.
It will likely take some experimentation to see which implementation works correctly and performs best with your
driver."
"Load the rows for a specific table (which has already been created) into a DB. `load-data-chunked!` is the default
implementation (see below); several other implementations like `load-data-all-at-once!` and
`load-data-one-at-a-time!` are already defined; see below. It will likely take some experimentation to see which
implementation works correctly and performs best with your driver."
{:arglists '([driver dbdef tabledef])}
tx/dispatch-on-driver-with-test-extensions
:hierarchy #'driver/hierarchy)
......@@ -129,35 +129,35 @@
;; You can use one of these alternative implementations instead of `load-data-chunked!` if that doesn't work with your
;; DB or one of these other ones performs faster
(def load-data-all-at-once!
(def ^{:arglists '([driver dbdef tabledef])} load-data-all-at-once!
"Implementation of `load-data!`. Insert all rows at once."
(make-load-data-fn))
(def load-data-chunked!
(def ^{:arglists '([driver dbdef tabledef])} load-data-chunked!
"Implementation of `load-data!`. Insert rows in chunks of 200 at a time."
(make-load-data-fn load-data-chunked))
(def load-data-one-at-a-time!
(def ^{:arglists '([driver dbdef tabledef])} load-data-one-at-a-time!
"Implementation of `load-data!`. Insert rows one at a time."
(make-load-data-fn load-data-one-at-a-time))
(def load-data-add-ids!
(def ^{:arglists '([driver dbdef tabledef])} load-data-add-ids!
"Implementation of `load-data!`. Insert all rows at once; add IDs."
(make-load-data-fn load-data-add-ids))
(def load-data-add-ids-chunked!
(def ^{:arglists '([driver dbdef tabledef])} load-data-add-ids-chunked!
"Implementation of `load-data!`. Insert rows in chunks of 200 at a time; add IDs."
(make-load-data-fn load-data-add-ids load-data-chunked))
(def load-data-one-at-a-time-add-ids!
(def ^{:arglists '([driver dbdef tabledef])} load-data-one-at-a-time-add-ids!
"Implementation of `load-data!` that inserts rows one at a time, but adds IDs."
(make-load-data-fn load-data-add-ids load-data-one-at-a-time))
(def load-data-chunked-parallel!
(def ^{:arglists '([driver dbdef tabledef])} load-data-chunked-parallel!
"Implementation of `load-data!`. Insert rows in chunks of 200 at a time, in parallel."
(make-load-data-fn load-data-add-ids (partial load-data-chunked pmap)))
(def load-data-one-at-a-time-parallel!
(def ^{:arglists '([driver dbdef tabledef])} load-data-one-at-a-time-parallel!
"Implementation of `load-data!`. Insert rows one at a time, in parallel."
(make-load-data-fn load-data-add-ids (partial load-data-one-at-a-time pmap)))
;; ^ the parallel versions aren't neccesarily faster than the sequential versions for all drivers so make sure to do
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment