Skip to content
Snippets Groups Projects
Unverified Commit 8e9bcc42 authored by Cam Saul's avatar Cam Saul Committed by GitHub
Browse files

Tests: do "quick" sync for non-test-data datasets; shave MINUTES off of test runs (#14691)

* Quick sync

* Try reducing -Xmx for :ci profile a bit to prevent random test failures

* Update quick sync parameter name

* Update docstring for dataset

* Fix sync-database! schema

* Fix schema again

* Test fix :wrench:
parent 33888708
No related branches found
No related tags found
No related merge requests found
......@@ -148,7 +148,7 @@ executors:
- image: circleci/clojure:lein-2.8.1
- image: metabase/presto-mb-ci
environment:
JAVA_TOOL_OPTIONS: "-Xmx2g"
JAVA_TOOL_OPTIONS: "-Xmx1g"
sparksql:
working_directory: /home/circleci/metabase/metabase/
......@@ -691,7 +691,7 @@ jobs:
- run:
name: Test << parameters.driver >> driver << parameters.description >>
environment:
DRIVERS: h2,<< parameters.driver >>
DRIVERS: << parameters.driver >>
command: lein with-profile +ci,+junit,+ee test
no_output_timeout: << parameters.timeout >>
- store_test_results:
......
......@@ -11,6 +11,7 @@
[metabase.models.table :as table :refer [Table]]
[metabase.query-processor :as qp]
[metabase.query-processor-test :as qp.t :refer [rows]]
[metabase.sync :as sync]
[metabase.test :as mt]
[metabase.test.data.interface :as tx]
[taoensso.nippy :as nippy]
......@@ -148,6 +149,8 @@
(deftest all-num-columns-test
(mt/test-driver :mongo
(mt/dataset all-null-columns
;; do a full sync on the DB to get the correct special type info
(sync/sync-database! (mt/db))
(is (= [{:name "_id", :database_type "java.lang.Long", :base_type :type/Integer, :special_type :type/PK}
{:name "favorite_snack", :database_type "NULL", :base_type :type/*, :special_type nil}
{:name "name", :database_type "java.lang.String", :base_type :type/Text, :special_type :type/Name}]
......
......@@ -245,7 +245,7 @@
:format-result metabase.junit/format-result}}
:ci
{:jvm-opts ["-Xmx2500m"]}
{:jvm-opts ["-Xmx2000m"]}
:install
{}
......
......@@ -26,20 +26,30 @@
(s/defn sync-database! :- SyncDatabaseResults
"Perform all the different sync operations synchronously for `database`.
This is considered a 'full sync' in that all the different sync operations are performed at consecutively. Please
note that this function is *not* what is called by the scheduled tasks; those call different steps independently.
This function is called when a Database is first added."
By default, does a `:full` sync that performs all the different sync operations consecutively. You may instead
specify only a `:schema` sync that will sync just the schema but skip analysis.
Please note that this function is *not* what is called by the scheduled tasks; those call different steps
independently. This function is called when a Database is first added."
{:style/indent 1}
[database :- i/DatabaseInstance]
(sync-util/sync-operation :sync database (format "Sync %s" (sync-util/name-for-logging database))
(mapv (fn [[f step-name]] (assoc (f database) :name step-name))
[
;; First make sure Tables, Fields, and FK information is up-to-date
[sync-metadata/sync-db-metadata! "metadata"]
;; Next, run the 'analysis' step where we do things like scan values of fields and update special types accordingly
[analyze/analyze-db! "analyze"]
;; Finally, update cached FieldValues
[field-values/update-field-values! "field-values"]])))
([database]
(sync-database! database nil))
([database :- i/DatabaseInstance
{:keys [scan], :or {scan :full}} :- (s/maybe {(s/optional-key :scan) (s/maybe (s/enum :schema :full))})]
(sync-util/sync-operation :sync database (format "Sync %s" (sync-util/name-for-logging database))
(mapv (fn [[f step-name]] (assoc (f database) :name step-name))
(filter
some?
[;; First make sure Tables, Fields, and FK information is up-to-date
[sync-metadata/sync-db-metadata! "metadata"]
;; Next, run the 'analysis' step where we do things like scan values of fields and update special types
;; accordingly
(when (= scan :full)
[analyze/analyze-db! "analyze"])
;; Finally, update cached FieldValues
(when (= scan :full)
[field-values/update-field-values! "field-values"])])))))
(s/defn sync-table!
"Perform all the different sync operations synchronously for a given `table`."
......
......@@ -55,16 +55,16 @@
{:style/indent 2}
[operation database-or-id f]
(fn []
(when-not (contains? (@operation->db-ids operation) (u/get-id database-or-id))
(when-not (contains? (@operation->db-ids operation) (u/the-id database-or-id))
(try
;; mark this database as currently syncing so we can prevent duplicate sync attempts (#2337)
(swap! operation->db-ids update operation #(conj (or % #{}) (u/get-id database-or-id)))
(swap! operation->db-ids update operation #(conj (or % #{}) (u/the-id database-or-id)))
(log/debug "Sync operations in flight:" (m/filter-vals seq @operation->db-ids))
;; do our work
(f)
;; always take the ID out of the set when we are through
(finally
(swap! operation->db-ids update operation #(disj % (u/get-id database-or-id))))))))
(swap! operation->db-ids update operation #(disj % (u/the-id database-or-id))))))))
(defn- with-sync-events
......@@ -81,11 +81,11 @@
(fn []
(let [start-time (System/nanoTime)
tracking-hash (str (java.util.UUID/randomUUID))]
(events/publish-event! begin-event-name {:database_id (u/get-id database-or-id), :custom_id tracking-hash})
(events/publish-event! begin-event-name {:database_id (u/the-id database-or-id), :custom_id tracking-hash})
(let [return (f)
total-time-ms (int (/ (- (System/nanoTime) start-time)
1000000.0))]
(events/publish-event! end-event-name {:database_id (u/get-id database-or-id)
(events/publish-event! end-event-name {:database_id (u/the-id database-or-id)
:custom_id tracking-hash
:running_time total-time-ms})
return)))))
......@@ -240,9 +240,9 @@
;;; +----------------------------------------------------------------------------------------------------------------+
(defn db->sync-tables
"Return all the Tables that should go through the sync processes for DATABASE-OR-ID."
"Return all the Tables that should go through the sync processes for `database-or-id`."
[database-or-id]
(db/select Table, :db_id (u/get-id database-or-id), :active true, :visibility_type nil))
(db/select Table, :db_id (u/the-id database-or-id), :active true, :visibility_type nil))
;; The `name-for-logging` function is used all over the sync code to make sure we have easy access to consistently
......@@ -250,8 +250,8 @@
(defprotocol ^:private NameForLogging
(name-for-logging [this]
"Return an appropriate string for logging an object in sync logging messages.
Should be something like \"postgres Database 'test-data'\""))
"Return an appropriate string for logging an object in sync logging messages. Should be something like \"postgres
Database 'test-data'\""))
(extend-protocol NameForLogging
i/DatabaseInstance
......@@ -398,7 +398,7 @@
database :- i/DatabaseInstance
{:keys [start-time end-time]} :- SyncOperationOrStepRunMetadata]
{:task task-name
:db_id (u/get-id database)
:db_id (u/the-id database)
:started_at start-time
:ended_at end-time
:duration (.toMillis (t/duration start-time end-time))})
......
......@@ -60,12 +60,14 @@
["Empty Vending Machine" 0]]]])
(defn- db->fields [db]
(let [table-ids (db/select-ids 'Table :db_id (u/get-id db))]
(let [table-ids (db/select-ids 'Table :db_id (u/the-id db))]
(set (map (partial into {}) (db/select [Field :name :base_type :special_type] :table_id [:in table-ids])))))
(deftest tiny-int-1-test
(mt/test-driver :mysql
(mt/dataset tiny-int-ones
;; trigger a full sync on this database so fields are categorized correctly
(sync/sync-database! (mt/db))
(testing "By default TINYINT(1) should be a boolean"
(is (= #{{:name "number-of-cans", :base_type :type/Boolean, :special_type :type/Category}
{:name "id", :base_type :type/Integer, :special_type :type/PK}
......
......@@ -200,8 +200,8 @@
(apply impl/the-field-id (id table-name) (map format-name (cons field-name nested-field-names)))))
(defmacro dataset
"Load and sync a temporary Database defined by `dataset`, make it the current DB (for `metabase.test.data` functions
like `id` and `db`), and execute `body`.
"Create a database and load it with the data defined by `dataset`, then do a quick metadata-only sync; make it the
current DB (for `metabase.test.data` functions like `id` and `db`), and execute `body`.
`dataset` can be one of the following:
......
(ns metabase.test.data.impl
"Internal implementation of various helper functions in `metabase.test.data`."
(:require [clojure.tools.logging :as log]
[clojure.tools.reader.edn :as edn]
[metabase.config :as config]
[metabase.driver :as driver]
[metabase.models.database :refer [Database]]
......@@ -81,6 +82,9 @@
"Max amount of time to wait for sync to complete."
(u/minutes->ms 5)) ; five minutes
(defonce ^:private reference-sync-durations
(delay (edn/read-string (slurp "test_resources/sync-durations.edn"))))
(defn- create-database! [driver {:keys [database-name table-definitions], :as database-definition}]
{:pre [(seq database-name)]}
(try
......@@ -98,16 +102,20 @@
(try
;; sync newly added DB
(u/with-timeout sync-timeout-ms
(u/profile (format "Sync %s Database %s" driver database-name)
(sync/sync-database! db)
(verify-data-loaded-correctly driver database-definition db)
;; add extra metadata for fields
(try
(add-extra-metadata! database-definition db)
(catch Throwable e
(println "Error adding extra metadata:" e)))))
(let [reference-duration (or (some-> (get @reference-sync-durations database-name) u/format-nanoseconds)
"NONE")
quick-sync? (not= database-name "test-data")]
(u/profile (format "%s %s Database %s (reference H2 duration: %s)"
(if quick-sync? "QUICK sync" "Sync") driver database-name reference-duration)
;; only do "quick sync" for non `test-data` datasets, because it can take literally MINUTES on CI.
(sync/sync-database! db (when quick-sync? {:scan :schema}))
;; add extra metadata for fields
(try
(add-extra-metadata! database-definition db)
(catch Throwable e
(println "Error adding extra metadata:" e))))))
;; make sure we're returing an up-to-date copy of the DB
(Database (u/get-id db))
(Database (u/the-id db))
(catch Throwable e
(let [e (ex-info "Failed to create test database"
{:driver driver
......@@ -115,7 +123,7 @@
:connection-details connection-details}
e)]
(println (u/pprint-to-str 'red (Throwable->map e)))
(db/delete! Database :id (u/get-id db))
(db/delete! Database :id (u/the-id db))
(throw e)))))
(catch Throwable e
(let [message (format "Failed to create %s '%s' test database" driver database-name)]
......@@ -289,7 +297,7 @@
(binding [db/*disable-db-logging* true]
(let [db (get-or-create-database! driver dbdef)]
(assert db)
(assert (db/exists? Database :id (u/get-id db)))
(assert (db/exists? Database :id (u/the-id db)))
db))))]
(binding [*get-db* #(get-db-for-driver (tx/driver))]
(f))))
......@@ -2,6 +2,7 @@
(:require [clojure.java.jdbc :as jdbc]
[clojure.string :as str]
[clojure.tools.logging :as log]
[clojure.tools.reader.edn :as edn]
[medley.core :as m]
[metabase.driver :as driver]
[metabase.driver.sql-jdbc.execute :as sql-jdbc.execute]
......@@ -195,6 +196,9 @@
(jdbc/print-sql-exception-chain e)
(throw e))))))
(defonce ^:private reference-load-durations
(delay (edn/read-string (slurp "test_resources/load-durations.edn"))))
(defn create-db!
"Default implementation of `create-db!` for SQL drivers."
{:arglists '([driver dbdef & {:keys [skip-drop-db?]}])}
......@@ -211,8 +215,12 @@
;; DB rather than on `:server` (no DB in particular)
(execute/execute-sql! driver :db dbdef (str/join ";\n" statements)))
;; Now load the data for each Table
(doseq [tabledef table-definitions]
(u/profile (format "load-data for %s %s %s" (name driver) (:database-name dbdef) (:table-name tabledef))
(doseq [tabledef table-definitions
:let [reference-duration (or (some-> (get @reference-load-durations [(:database-name dbdef) (:table-name tabledef)])
u/format-nanoseconds)
"NONE")]]
(u/profile (format "load-data for %s %s %s (reference H2 duration: %s)"
(name driver) (:database-name dbdef) (:table-name tabledef) reference-duration)
(load-data! driver dbdef tabledef))))
(defn destroy-db!
......
;; this is a map of how long it takes to load the test data for the dataset (in nanoseconds) for an in-memory H2
;; database on Cam's local computer (Ryzen 3950x/32 GB of RAM/Java 14)
{["airports" "airport"] 131568400
["airports" "continent"] 2131300
["airports" "country"] 30031300
["airports" "municipality"] 61022600
["airports" "region"] 79085800
["attempted-murders" "attempts"] 29658300
["avian-singles" "messages"] 11632000
["avian-singles" "users"] 1475300
["basic-field-comments" "basic_field_comments"] 876400
["bird-flocks" "bird"] 2902800
["bird-flocks" "flock"] 1267600
["checkins_interval_15" "checkins"] 1353500
["checkins_interval_86400" "checkins"] 2494900
["checkins_interval_900" "checkins"] 1477200
["comment-after-sync" "comment_after_sync"] 1265800
["daily-bird-counts" "bird-count"] 4581800
["db-with-some-cruft" "acquired_toucans"] 1267800
["db-with-some-cruft" "south_migrationhistory"] 951300
["office-checkins" "checkins"] 5350200
["places-cam-likes" "places"] 1194400
["sad-toucan-incidents" "incidents"] 7324200
["sample-dataset" "orders"] 3439047900
["sample-dataset" "people"] 1922302400
["sample-dataset" "products"] 95636200
["sample-dataset" "reviews"] 292738400
["string-times" "times"] 2629800
["table_with_comment_after_sync_db" "table_with_comment_after_sync"] 997500
["table_with_comment_db" "table_with_comment"] 871300
["table_with_updated_desc_db" "table_with_updated_desc"] 818600
["test-data" "categories"] 10092000
["test-data" "checkins"] 130413900
["test-data" "users"] 23762400
["test-data" "venues"] 29380500
["test-data-self-referencing-user" "users"] 5118200
["test-data-with-null-date-checkins" "categories"] 8328000
["test-data-with-null-date-checkins" "checkins"] 132575300
["test-data-with-null-date-checkins" "users"] 6756300
["test-data-with-null-date-checkins" "venues"] 16336800
["test-data-with-time" "users"] 15280900
["test-data-with-timezones" "categories"] 11324400
["test-data-with-timezones" "checkins"] 143596000
["test-data-with-timezones" "users"] 7185000
["test-data-with-timezones" "venues"] 18375000
["toucan-microsecond-incidents" "incidents"] 1388100
["tupac-sightings" "categories"] 2228800
["tupac-sightings" "cities"] 17286900
["tupac-sightings" "sightings"] 46284700
["update-desc" "update_desc"] 794500}
;; this is a map of how long it takes to sync the dataset (in nanoseconds) for an in-memory H2 database on Cam's local
;; computer (Ryzen 3950x/32 GB of RAM/Java 14)
{"airports" 72271700
"attempted-murders" 41250600
"avian-singles" 44766800
"basic-field-comments" 36389700
"bird-flocks" 41835800
"checkins_interval_15" 45435500
"checkins_interval_86400" 43530100
"checkins_interval_900" 41998300
"comment-after-sync" 37872400
"daily-bird-counts" 37459400
"db-with-some-cruft" 49419200
"office-checkins" 41066800
"places-cam-likes" 40787900
"sad-toucan-incidents" 48648000
"sample-dataset" 96251000
"string-times" 39466300
"table_with_comment_after_sync_db" 34791000
"table_with_comment_db" 35052200
"table_with_updated_desc_db" 35456200
"test-data" 1138090600
"test-data-self-referencing-user" 37713700
"test-data-with-null-date-checkins" 84524900
"test-data-with-time" 40431800
"test-data-with-timezones" 60335800
"toucan-microsecond-incidents" 51240600
"tupac-sightings" 62742200
"update-desc" 32664800}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment