Skip to content
Snippets Groups Projects
Unverified Commit f401d1cc authored by Cam Saul's avatar Cam Saul Committed by GitHub
Browse files

Adapt unique schema logic in CI from Snowflake to Redshift (#29613)

* Adapt unique schema logic in CI from Snowflake to Redshift

* Ok don't stomp on all the old schemas just yet.

* Fix missing vars

* Test fixes :wrench:

* Test fixes :wrench:

* Revert `^:parallel` on tests that shouldn't be allowed to use it
parent 7b617a4d
No related branches found
No related tags found
No related merge requests found
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
[metabase.util.honey-sql-2 :as h2x] [metabase.util.honey-sql-2 :as h2x]
#_{:clj-kondo/ignore [:discouraged-namespace]} #_{:clj-kondo/ignore [:discouraged-namespace]}
[metabase.util.honeysql-extensions :as hx] [metabase.util.honeysql-extensions :as hx]
[metabase.util.log :as log]
[toucan2.core :as t2]) [toucan2.core :as t2])
(:import (:import
(metabase.plugins.jdbc_proxy ProxyDriver))) (metabase.plugins.jdbc_proxy ProxyDriver)))
...@@ -42,7 +43,7 @@ ...@@ -42,7 +43,7 @@
(binding [hx/*honey-sql-version* 2] (binding [hx/*honey-sql-version* 2]
(thunk)))) (thunk))))
(deftest correct-driver-test (deftest ^:parallel correct-driver-test
(mt/test-driver :redshift (mt/test-driver :redshift
(testing "Make sure we're using the correct driver for Redshift" (testing "Make sure we're using the correct driver for Redshift"
(let [driver (java.sql.DriverManager/getDriver "jdbc:redshift://host:5432/testdb") (let [driver (java.sql.DriverManager/getDriver "jdbc:redshift://host:5432/testdb")
...@@ -85,14 +86,20 @@ ...@@ -85,14 +86,20 @@
"FROM" "FROM"
" \"{{schema}}\".\"test_data_users\"" " \"{{schema}}\".\"test_data_users\""
"WHERE" "WHERE"
" (\"{{schema}}\".\"test_data_users\".\"id\" = 1)" " ("
" OR (\"{{schema}}\".\"test_data_users\".\"id\" = 2)" " \"{{schema}}\".\"test_data_users\".\"id\" = 1"
" OR (\"{{schema}}\".\"test_data_users\".\"id\" = 3)" " )"
" OR ("
" \"{{schema}}\".\"test_data_users\".\"id\" = 2"
" )"
" OR ("
" \"{{schema}}\".\"test_data_users\".\"id\" = 3"
" )"
"LIMIT" "LIMIT"
" 2000"]] " 2000"]]
(-> line (-> line
(str/replace #"\Q{{site-uuid}}\E" (public-settings/site-uuid)) (str/replace #"\Q{{site-uuid}}\E" (public-settings/site-uuid))
(str/replace #"\Q{{schema}}\E" redshift.test/session-schema-name)))] (str/replace #"\Q{{schema}}\E" (redshift.test/unique-session-schema))))]
(is (= expected (is (= expected
(sql->lines (sql->lines
(query->native (query->native
...@@ -192,7 +199,7 @@ ...@@ -192,7 +199,7 @@
{:database (mt/id) {:database (mt/id)
:type :native :type :native
:native {:query (str "select * " :native {:query (str "select * "
(format "from \"%s\".test_data_checkins " redshift.test/session-schema-name) (format "from \"%s\".test_data_checkins " (redshift.test/unique-session-schema))
"where {{date}} " "where {{date}} "
"order by date desc " "order by date desc "
"limit 1;") "limit 1;")
...@@ -205,41 +212,53 @@ ...@@ -205,41 +212,53 @@
:target [:dimension [:template-tag "date"]] :target [:dimension [:template-tag "date"]]
:value "past30years"}]}))))))) :value "past30years"}]})))))))
(defn- execute! [format-string & args]
(let [sql (apply format format-string args)
spec (sql-jdbc.conn/connection-details->spec :redshift @redshift.test/db-connection-details)]
(log/info (u/format-color 'blue "[redshift] %s" sql))
(try
(jdbc/execute! spec sql)
(catch Throwable e
(throw (ex-info (format "Error executing SQL: %s" (ex-message e))
{:sql sql}
e)))))
(log/info (u/format-color 'blue "[ok]")))
(deftest redshift-types-test (deftest redshift-types-test
(mt/test-driver (mt/test-driver
:redshift :redshift
(testing "Redshift specific types should be synced correctly" (testing "Redshift specific types should be synced correctly"
(let [db-details (tx/dbdef->connection-details :redshift nil nil) (let [db-details (tx/dbdef->connection-details :redshift nil nil)
tbl-nm "redshift_specific_types" tbl-nm "redshift_specific_types"
qual-tbl-nm (str redshift.test/session-schema-name "." tbl-nm) qual-tbl-nm (format "\"%s\".\"%s\"" (redshift.test/unique-session-schema) tbl-nm)
view-nm "late_binding_view" view-nm "late_binding_view"
qual-view-nm (str redshift.test/session-schema-name "." view-nm)] qual-view-nm (format "\"%s\".\"%s\"" (redshift.test/unique-session-schema) view-nm)]
(mt/with-temp Database [database {:engine :redshift, :details db-details}] (mt/with-temp Database [database {:engine :redshift, :details db-details}]
(try (try
;; create a table with a CHARACTER VARYING and a NUMERIC column, and a late bound view that selects from it ;; create a table with a CHARACTER VARYING and a NUMERIC column, and a late bound view that selects from it
(redshift.test/execute! (execute!
(str "DROP TABLE IF EXISTS %1$s;%n" (str "DROP TABLE IF EXISTS %1$s;%n"
"CREATE TABLE %1$s(weird_varchar CHARACTER VARYING(50), numeric_col NUMERIC(10,2));%n" "CREATE TABLE %1$s(weird_varchar CHARACTER VARYING(50), numeric_col NUMERIC(10,2));%n"
"CREATE OR REPLACE VIEW %2$s AS SELECT * FROM %1$s WITH NO SCHEMA BINDING;") "CREATE OR REPLACE VIEW %2$s AS SELECT * FROM %1$s WITH NO SCHEMA BINDING;")
qual-tbl-nm qual-tbl-nm
qual-view-nm) qual-view-nm)
;; sync the schema again to pick up the new view (and table, though we aren't checking that) ;; sync the schema again to pick up the new view (and table, though we aren't checking that)
(sync/sync-database! database) (sync/sync-database! database)
(is (contains? (is (contains?
(t2/select-fn-set :name Table :db_id (u/the-id database)) ; the new view should have been synced (t2/select-fn-set :name Table :db_id (u/the-id database)) ; the new view should have been synced
view-nm)) view-nm))
(let [table-id (t2/select-one-pk Table :db_id (u/the-id database), :name view-nm)] (let [table-id (t2/select-one-pk Table :db_id (u/the-id database), :name view-nm)]
;; and its columns' :base_type should have been identified correctly ;; and its columns' :base_type should have been identified correctly
(is (= [{:name "numeric_col", :database_type "numeric(10,2)", :base_type :type/Decimal} (is (= [{:name "numeric_col", :database_type "numeric(10,2)", :base_type :type/Decimal}
{:name "weird_varchar", :database_type "character varying(50)", :base_type :type/Text}] {:name "weird_varchar", :database_type "character varying(50)", :base_type :type/Text}]
(map (map
mt/derecordize mt/derecordize
(t2/select [Field :name :database_type :base_type] :table_id table-id {:order-by [:name]}))))) (t2/select [Field :name :database_type :base_type] :table_id table-id {:order-by [:name]})))))
(finally (finally
(redshift.test/execute! (str "DROP TABLE IF EXISTS %s;%n" (execute! (str "DROP TABLE IF EXISTS %s;%n"
"DROP VIEW IF EXISTS %s;") "DROP VIEW IF EXISTS %s;")
qual-tbl-nm qual-tbl-nm
qual-view-nm)))))))) qual-view-nm))))))))
(deftest redshift-lbv-sync-error-test (deftest redshift-lbv-sync-error-test
(mt/test-driver (mt/test-driver
...@@ -247,30 +266,30 @@ ...@@ -247,30 +266,30 @@
(testing "Late-binding view with with data types that cause a JDBC error can still be synced successfully (#21215)" (testing "Late-binding view with with data types that cause a JDBC error can still be synced successfully (#21215)"
(let [db-details (tx/dbdef->connection-details :redshift nil nil) (let [db-details (tx/dbdef->connection-details :redshift nil nil)
view-nm "weird_late_binding_view" view-nm "weird_late_binding_view"
qual-view-nm (str redshift.test/session-schema-name "." view-nm)] qual-view-nm (format "\"%s\".\"%s\"" (redshift.test/unique-session-schema) view-nm)]
(mt/with-temp Database [database {:engine :redshift, :details db-details}] (mt/with-temp Database [database {:engine :redshift, :details db-details}]
(try (try
(redshift.test/execute! (execute!
(str "CREATE OR REPLACE VIEW %1$s AS (" (str "CREATE OR REPLACE VIEW %1$s AS ("
"WITH test_data AS (SELECT 'open' AS shop_status UNION ALL SELECT 'closed' AS shop_status) " "WITH test_data AS (SELECT 'open' AS shop_status UNION ALL SELECT 'closed' AS shop_status) "
"SELECT NULL as raw_null, " "SELECT NULL as raw_null, "
"'hello' as raw_var, " "'hello' as raw_var, "
"CASE WHEN shop_status = 'open' THEN 11387.133 END AS case_when_numeric_inc_nulls " "CASE WHEN shop_status = 'open' THEN 11387.133 END AS case_when_numeric_inc_nulls "
"FROM test_data) WITH NO SCHEMA BINDING;") "FROM test_data) WITH NO SCHEMA BINDING;")
qual-view-nm) qual-view-nm)
(sync/sync-database! database) (sync/sync-database! database)
(is (contains? (is (contains?
(t2/select-fn-set :name Table :db_id (u/the-id database)) ; the new view should have been synced without errors (t2/select-fn-set :name Table :db_id (u/the-id database)) ; the new view should have been synced without errors
view-nm)) view-nm))
(let [table-id (t2/select-one-pk Table :db_id (u/the-id database), :name view-nm)] (let [table-id (t2/select-one-pk Table :db_id (u/the-id database), :name view-nm)]
;; and its columns' :base_type should have been identified correctly ;; and its columns' :base_type should have been identified correctly
(is (= [{:name "case_when_numeric_inc_nulls", :database_type "numeric", :base_type :type/Decimal} (is (= [{:name "case_when_numeric_inc_nulls", :database_type "numeric", :base_type :type/Decimal}
{:name "raw_null", :database_type "varchar", :base_type :type/Text} {:name "raw_null", :database_type "varchar", :base_type :type/Text}
{:name "raw_var", :database_type "character varying(5)", :base_type :type/Text}] {:name "raw_var", :database_type "character varying(5)", :base_type :type/Text}]
(t2/select [Field :name :database_type :base_type] :table_id table-id {:order-by [:name]})))) (t2/select [Field :name :database_type :base_type] :table_id table-id {:order-by [:name]}))))
(finally (finally
(redshift.test/execute! (str "DROP VIEW IF EXISTS %s;") (execute! (str "DROP VIEW IF EXISTS %s;")
qual-view-nm)))))))) qual-view-nm))))))))
(deftest filtered-syncable-schemas-test (deftest filtered-syncable-schemas-test
(mt/test-driver :redshift (mt/test-driver :redshift
...@@ -280,14 +299,14 @@ ...@@ -280,14 +299,14 @@
random-schema (u/lower-case-en (tu.random/random-name)) random-schema (u/lower-case-en (tu.random/random-name))
user-pw "Password1234" user-pw "Password1234"
db-det (:details (mt/db))] db-det (:details (mt/db))]
(redshift.test/execute! (str "CREATE SCHEMA %s;" (execute! (str "CREATE SCHEMA %s;"
"CREATE USER %s PASSWORD '%s';%n" "CREATE USER %s PASSWORD '%s';%n"
"GRANT USAGE ON SCHEMA %s TO %s;%n") "GRANT USAGE ON SCHEMA %s TO %s;%n")
random-schema random-schema
temp-username temp-username
user-pw user-pw
random-schema random-schema
temp-username) temp-username)
(try (try
(binding [redshift.test/*use-original-filtered-syncable-schemas-impl?* true] (binding [redshift.test/*use-original-filtered-syncable-schemas-impl?* true]
(mt/with-temp Database [db {:engine :redshift, :details (assoc db-det :user temp-username :password user-pw)}] (mt/with-temp Database [db {:engine :redshift, :details (assoc db-det :user temp-username :password user-pw)}]
...@@ -302,15 +321,15 @@ ...@@ -302,15 +321,15 @@
(testing "filtered-syncable-schemas for the user should contain the newly created random schema" (testing "filtered-syncable-schemas for the user should contain the newly created random schema"
(is (contains? schemas random-schema))) (is (contains? schemas random-schema)))
(testing "should not contain the current session-schema name (since that was never granted)" (testing "should not contain the current session-schema name (since that was never granted)"
(is (not (contains? schemas redshift.test/session-schema-name)))))))) (is (not (contains? schemas (redshift.test/unique-session-schema)))))))))
(finally (finally
(redshift.test/execute! (str "REVOKE USAGE ON SCHEMA %s FROM %s;%n" (execute! (str "REVOKE USAGE ON SCHEMA %s FROM %s;%n"
"DROP USER IF EXISTS %s;%n" "DROP USER IF EXISTS %s;%n"
"DROP SCHEMA IF EXISTS %s;%n") "DROP SCHEMA IF EXISTS %s;%n")
random-schema random-schema
temp-username temp-username
temp-username temp-username
random-schema))))) random-schema)))))
(testing "Should filter out non-existent schemas (for which nobody has permissions)" (testing "Should filter out non-existent schemas (for which nobody has permissions)"
(let [fake-schema-name (u/qualified-name ::fake-schema)] (let [fake-schema-name (u/qualified-name ::fake-schema)]
...@@ -319,15 +338,15 @@ ...@@ -319,15 +338,15 @@
(with-redefs [sql-jdbc.describe-database/all-schemas (let [orig sql-jdbc.describe-database/all-schemas] (with-redefs [sql-jdbc.describe-database/all-schemas (let [orig sql-jdbc.describe-database/all-schemas]
(fn [metadata] (fn [metadata]
(eduction (eduction
cat cat
[(orig metadata) [fake-schema-name]])))] [(orig metadata) [fake-schema-name]])))]
(let [jdbc-spec (sql-jdbc.conn/db->pooled-connection-spec (mt/db))] (let [jdbc-spec (sql-jdbc.conn/db->pooled-connection-spec (mt/db))]
(with-open [conn (jdbc/get-connection jdbc-spec)] (with-open [conn (jdbc/get-connection jdbc-spec)]
(letfn [(schemas [] (letfn [(schemas []
(reduce (reduce
conj conj
#{} #{}
(sql-jdbc.sync/filtered-syncable-schemas :redshift conn (.getMetaData conn) nil nil)))] (sql-jdbc.sync/filtered-syncable-schemas :redshift conn (.getMetaData conn) nil nil)))]
(testing "if schemas-with-usage-permissions is disabled, the ::fake-schema should come back" (testing "if schemas-with-usage-permissions is disabled, the ::fake-schema should come back"
(with-redefs [redshift/reducible-schemas-with-usage-permissions (fn [_ reducible] (with-redefs [redshift/reducible-schemas-with-usage-permissions (fn [_ reducible]
reducible)] reducible)]
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
[clojure.java.jdbc :as jdbc] [clojure.java.jdbc :as jdbc]
[metabase.driver.sql-jdbc.connection :as sql-jdbc.conn] [metabase.driver.sql-jdbc.connection :as sql-jdbc.conn]
[metabase.driver.sql-jdbc.sync :as sql-jdbc.sync] [metabase.driver.sql-jdbc.sync :as sql-jdbc.sync]
[metabase.driver.sql.test-util.unique-prefix :as sql.tu.unique-prefix]
[metabase.test.data.interface :as tx] [metabase.test.data.interface :as tx]
[metabase.test.data.sql :as sql.tx] [metabase.test.data.sql :as sql.tx]
[metabase.test.data.sql.ddl :as ddl] [metabase.test.data.sql.ddl :as ddl]
...@@ -36,7 +37,7 @@ ...@@ -36,7 +37,7 @@
[_ _] [_ _]
(throw (UnsupportedOperationException. "Redshift does not have a TIME data type."))) (throw (UnsupportedOperationException. "Redshift does not have a TIME data type.")))
(def ^:private db-connection-details (def db-connection-details
(delay {:host (tx/db-test-env-var-or-throw :redshift :host) (delay {:host (tx/db-test-env-var-or-throw :redshift :host)
:port (Integer/parseInt (tx/db-test-env-var-or-throw :redshift :port "5439")) :port (Integer/parseInt (tx/db-test-env-var-or-throw :redshift :port "5439"))
:db (tx/db-test-env-var-or-throw :redshift :db) :db (tx/db-test-env-var-or-throw :redshift :db)
...@@ -47,15 +48,8 @@ ...@@ -47,15 +48,8 @@
[& _] [& _]
@db-connection-details) @db-connection-details)
;; Redshift is tested remotely, which means we need to support multiple tests happening against the same remote host (defn unique-session-schema []
;; at the same time. Since Redshift doesn't let us create and destroy databases (we must re-use the same database (str (sql.tu.unique-prefix/unique-prefix) "schema"))
;; throughout the tests) we'll just fake it by creating a new schema when tests start running and re-use the same
;; schema for each test
(defonce ^:private session-schema-number
(rand-int 240)) ; there's a maximum of 256 schemas per DB so make sure we don't go over that limit
(defonce session-schema-name
(str "schema_" session-schema-number))
(defmethod sql.tx/create-db-sql :redshift [& _] nil) (defmethod sql.tx/create-db-sql :redshift [& _] nil)
(defmethod sql.tx/drop-db-if-exists-sql :redshift [& _] nil) (defmethod sql.tx/drop-db-if-exists-sql :redshift [& _] nil)
...@@ -63,7 +57,7 @@ ...@@ -63,7 +57,7 @@
(defmethod sql.tx/pk-sql-type :redshift [_] "INTEGER IDENTITY(1,1)") (defmethod sql.tx/pk-sql-type :redshift [_] "INTEGER IDENTITY(1,1)")
(defmethod sql.tx/qualified-name-components :redshift [& args] (defmethod sql.tx/qualified-name-components :redshift [& args]
(apply tx/single-db-qualified-name-components session-schema-name args)) (apply tx/single-db-qualified-name-components (unique-session-schema) args))
;; don't use the Postgres implementation of `drop-db-ddl-statements` because it adds an extra statment to kill all ;; don't use the Postgres implementation of `drop-db-ddl-statements` because it adds an extra statment to kill all
;; open connections to that DB, which doesn't work with Redshift ;; open connections to that DB, which doesn't work with Redshift
...@@ -77,16 +71,29 @@ ...@@ -77,16 +71,29 @@
;;; Create + destroy the schema used for this test session ;;; Create + destroy the schema used for this test session
(defn execute! [format-string & args] (defn- delete-old-schemas! [^java.sql.Connection conn]
(let [sql (apply format format-string args) (with-open [rset (.. conn getMetaData getSchemas)
spec (sql-jdbc.conn/connection-details->spec :redshift @db-connection-details)] stmt (.createStatement conn)]
(log/info (u/format-color 'blue "[redshift] %s" sql)) (while (.next rset)
(jdbc/execute! spec sql)) (let [schema (.getString rset "TABLE_SCHEM")
(log/info (u/format-color 'blue "[ok]"))) sql (format "DROP SCHEMA IF EXISTS \"%s\" CASCADE;" schema)]
(when (sql.tu.unique-prefix/old-dataset-name? schema)
(log/info (u/format-color 'blue "[redshift] %s" sql))
(.execute stmt sql))))))
(defn- create-session-schema! [^java.sql.Connection conn]
(with-open [stmt (.createStatement conn)]
(doseq [^String sql [(format "DROP SCHEMA IF EXISTS \"%s\" CASCADE;" (unique-session-schema))
(format "CREATE SCHEMA \"%s\";" (unique-session-schema))]]
(log/info (u/format-color 'blue "[redshift] %s" sql))
(.execute stmt sql))))
(defmethod tx/before-run :redshift (defmethod tx/before-run :redshift
[_] [_driver]
(execute! "DROP SCHEMA IF EXISTS %s CASCADE; CREATE SCHEMA %s;" session-schema-name session-schema-name)) (with-open [conn (jdbc/get-connection
(sql-jdbc.conn/connection-details->spec :redshift @db-connection-details))]
(delete-old-schemas! conn)
(create-session-schema! conn)))
(defonce ^:private ^{:arglists '([driver connection metadata _ _])} (defonce ^:private ^{:arglists '([driver connection metadata _ _])}
original-filtered-syncable-schemas original-filtered-syncable-schemas
...@@ -103,4 +110,4 @@ ...@@ -103,4 +110,4 @@
[driver conn metadata schema-inclusion-filters schema-exclusion-filters] [driver conn metadata schema-inclusion-filters schema-exclusion-filters]
(if *use-original-filtered-syncable-schemas-impl?* (if *use-original-filtered-syncable-schemas-impl?*
(original-filtered-syncable-schemas driver conn metadata schema-inclusion-filters schema-exclusion-filters) (original-filtered-syncable-schemas driver conn metadata schema-inclusion-filters schema-exclusion-filters)
#{session-schema-name "spectrum"})) #{(unique-session-schema) "spectrum"}))
...@@ -2,11 +2,9 @@ ...@@ -2,11 +2,9 @@
(:require (:require
[clojure.java.jdbc :as jdbc] [clojure.java.jdbc :as jdbc]
[clojure.string :as str] [clojure.string :as str]
[clojure.test :refer :all]
[java-time :as t]
[metabase.driver.sql-jdbc.connection :as sql-jdbc.conn] [metabase.driver.sql-jdbc.connection :as sql-jdbc.conn]
[metabase.driver.sql.test-util.unique-prefix :as sql.tu.unique-prefix]
[metabase.driver.sql.util.unprepare :as unprepare] [metabase.driver.sql.util.unprepare :as unprepare]
[metabase.public-settings :as public-settings]
[metabase.test.data.interface :as tx] [metabase.test.data.interface :as tx]
[metabase.test.data.sql :as sql.tx] [metabase.test.data.sql :as sql.tx]
[metabase.test.data.sql-jdbc :as sql-jdbc.tx] [metabase.test.data.sql-jdbc :as sql-jdbc.tx]
...@@ -14,7 +12,6 @@ ...@@ -14,7 +12,6 @@
[metabase.test.data.sql-jdbc.load-data :as load-data] [metabase.test.data.sql-jdbc.load-data :as load-data]
[metabase.test.data.sql.ddl :as ddl] [metabase.test.data.sql.ddl :as ddl]
[metabase.util :as u] [metabase.util :as u]
[metabase.util.date-2 :as u.date]
[metabase.util.log :as log])) [metabase.util.log :as log]))
(set! *warn-on-reflection* true) (set! *warn-on-reflection* true)
...@@ -35,44 +32,6 @@ ...@@ -35,44 +32,6 @@
:type/Time "TIME"}] :type/Time "TIME"}]
(defmethod sql.tx/field-base-type->sql-type [:snowflake base-type] [_ _] sql-type)) (defmethod sql.tx/field-base-type->sql-type [:snowflake base-type] [_ _] sql-type))
;;; In the past we had one shared prefix for everybody, and everybody was expected to play nice and not screw with it.
;;; This eventually led to BIG problems when one CI job would think see that a dataset did not exist, and try to
;;; recreate it, and then another CI job would do the same thing at the same time, and eventually we'd end up with a
;;; half-created dataset that was missing a bunch of rows. So here is the new strategy going forward:
;;;
;;; 1. Every instance of Metabase gets their own prefix like `<current-date-utc>_<site-uuid>_` e.g. `test-data` becomes
;;; something like
;;;
;;; 2023_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data
;;;
;;; This will prevent jobs from running at the same time from stomping on each other's work.
;;;
;;; 2. To avoid filling our Snowflake account up with ephemeral data that never gets deleted, we will delete datasets
;;; following this pattern when they are two days old or older. E.g. if it is currently `2023-02-17` in UTC then we
;;; can delete anything dataset starts with `2023_02_15` or older. This is done once the first time we create a
;;; Snowflake test dataset in this process. See [[delete-old-datasets-if-needed!]] below.
;;;
;;; See this Slack thread for more info
;;; https://metaboat.slack.com/archives/CKZEMT1MJ/p1676659086280609?thread_ts=1676656964.624609&cid=CKZEMT1MJ or ask
;;; me (Cam) if you have any questions.
(defn- utc-date
"`LocalDate` in UTC time."
[]
(t/local-date (u.date/with-time-zone-same-instant (t/zoned-date-time) "UTC")))
(defn- unique-prefix
"Unique prefix for test datasets for this instance. Format is `<current-date-utc>_<site-uuid>_`. See comments above.
Example:
2023_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_"
([]
(unique-prefix (utc-date)))
([local-date]
{:pre [(instance? java.time.LocalDate local-date)]}
(-> (format "%s_%s_" local-date (public-settings/site-uuid))
(str/replace #"-" "_"))))
(def ^:dynamic *database-prefix-fn* (def ^:dynamic *database-prefix-fn*
"Function that returns a unique prefix to use for test datasets for this instance. This is dynamic so we can rebind it "Function that returns a unique prefix to use for test datasets for this instance. This is dynamic so we can rebind it
to something fixed when we're testing the SQL we generate to something fixed when we're testing the SQL we generate
...@@ -80,7 +39,7 @@ ...@@ -80,7 +39,7 @@
This is a function because [[unique-prefix]] can't be calculated until the application database is initialized This is a function because [[unique-prefix]] can't be calculated until the application database is initialized
because it relies on [[public-settings/site-uuid]]." because it relies on [[public-settings/site-uuid]]."
(memoize unique-prefix)) #'sql.tu.unique-prefix/unique-prefix)
(defn- qualified-db-name (defn- qualified-db-name
"Prepend `database-name` with the [[*database-prefix-fn*]] so we don't stomp on any other jobs running at the same "Prepend `database-name` with the [[*database-prefix-fn*]] so we don't stomp on any other jobs running at the same
...@@ -125,36 +84,6 @@ ...@@ -125,36 +84,6 @@
[] []
(sql-jdbc.conn/connection-details->spec :snowflake (tx/dbdef->connection-details :snowflake :server nil))) (sql-jdbc.conn/connection-details->spec :snowflake (tx/dbdef->connection-details :snowflake :server nil)))
(defn- old-dataset-name?
"Is this dataset name prefixed by a date two days ago or older?
If the date is invalid e.g. `2023-02-31` then we'll count it as old so it will get deleted anyway."
[dataset-name]
(when-let [[_ year month day] (re-matches #"^(\d{4})_(\d{2})_(\d{2}).*$" dataset-name)]
(let [dataset-date (try
(t/local-date (parse-long year) (parse-long month) (parse-long day))
(catch Throwable _
nil))]
(if-not dataset-date
true
(t/before? dataset-date (u.date/add (utc-date) :day -1))))))
(deftest ^:parallel old-dataset-name?-test
(are [s] (old-dataset-name? s)
"2023_02_01_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
"2023_01_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
"2022_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
(str (unique-prefix (u.date/add (utc-date) :day -2)) "test-data")
;; if the date is invalid we should just treat it as old and delete it.
"2022_00_00_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
"2022_13_01_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
"2022_02_31_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data")
(are [s] (not (old-dataset-name? s))
"2050_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
"v3_test-data"
(str (unique-prefix) "test-data")
(str (unique-prefix (u.date/add (utc-date) :day -1)) "test-data")))
(defn- old-dataset-names (defn- old-dataset-names
"Return a collection of all dataset names that are old -- prefixed with a date two days ago or older?" "Return a collection of all dataset names that are old -- prefixed with a date two days ago or older?"
[] []
...@@ -168,7 +97,7 @@ ...@@ -168,7 +97,7 @@
;; them all lower-case ;; them all lower-case
(let [catalog (u/lower-case-en (.getString rset "TABLE_CAT")) (let [catalog (u/lower-case-en (.getString rset "TABLE_CAT"))
acc (cond-> acc acc (cond-> acc
(old-dataset-name? catalog) (conj catalog))] (sql.tu.unique-prefix/old-dataset-name? catalog) (conj catalog))]
(recur acc)))))))) (recur acc))))))))
(defn- delete-old-datasets! (defn- delete-old-datasets!
......
(ns metabase.driver.sql.test-util.unique-prefix
"Tooling for testing Cloud-based SQL databases, creating unique schema names for every test run and 'garbage
collecting' old ones.
In the past we had one shared prefix for everybody, and everybody was expected to play nice and not screw with it.
This eventually led to BIG problems when one CI job would think see that a dataset did not exist, and try to
recreate it, and then another CI job would do the same thing at the same time, and eventually we'd end up with a
half-created dataset that was missing a bunch of rows. So here is the new strategy going forward:
1. Every instance of Metabase gets their own prefix like `<current-date-utc>_<site-uuid>_` e.g. `test-data` becomes
something like
2023_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data
This will prevent jobs from running at the same time from stomping on each other's work.
2. To avoid filling our Snowflake/Redshift/etc. accounts up with ephemeral data that never gets deleted, we will
delete datasets following this pattern when they are two days old or older. E.g. if it is currently `2023-02-17` in
UTC then we can delete anything dataset starts with `2023_02_15` or older. This is done once the first time we
create a test dataset in this process, i.e. done in [[metabase.test.data.interface/before-run]].
See [[metabase.test.data.snowflake/delete-old-datasets-if-needed!]] for example.
See this Slack thread for more info
https://metaboat.slack.com/archives/CKZEMT1MJ/p1676659086280609?thread_ts=1676656964.624609&cid=CKZEMT1MJ or ask
me (Cam) if you have any questions."
(:require
[clojure.string :as str]
[clojure.test :refer :all]
[java-time :as t]
[metabase.public-settings :as public-settings]
[metabase.util.date-2 :as u.date]))
(defn- utc-date
"`LocalDate` in UTC time."
[]
(t/local-date (u.date/with-time-zone-same-instant (t/zoned-date-time) "UTC")))
(defn- unique-prefix*
([]
(unique-prefix* (utc-date)))
([local-date]
{:pre [(instance? java.time.LocalDate local-date)]}
(-> (format "%s_%s_" local-date (public-settings/site-uuid))
(str/replace #"-" "_"))))
(def ^{:arglists '([])} unique-prefix
"Unique prefix for test datasets for this instance. Format is `<current-date-utc>_<site-uuid>_`. See comments above.
Example:
2023_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_"
(memoize unique-prefix*))
(defn old-dataset-name?
"Is this dataset name prefixed by a date two days ago or older?
If the date is invalid e.g. `2023-02-31` then we'll count it as old so it will get deleted anyway."
[dataset-name]
(when-let [[_ year month day] (re-matches #"^(\d{4})_(\d{2})_(\d{2}).*$" dataset-name)]
(let [dataset-date (try
(t/local-date (parse-long year) (parse-long month) (parse-long day))
(catch Throwable _
nil))]
(if-not dataset-date
true
(t/before? dataset-date (u.date/add (utc-date) :day -1))))))
(deftest ^:parallel old-dataset-name?-test
(are [s] (old-dataset-name? s)
"2023_02_01_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
"2023_01_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
"2022_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
(str (unique-prefix* (u.date/add (utc-date) :day -2)) "test-data")
;; if the date is invalid we should just treat it as old and delete it.
"2022_00_00_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
"2022_13_01_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
"2022_02_31_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data")
(are [s] (not (old-dataset-name? s))
"2050_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
"v3_test-data"
(str (unique-prefix*) "test-data")
(str (unique-prefix* (u.date/add (utc-date) :day -1)) "test-data")))
...@@ -150,20 +150,24 @@ ...@@ -150,20 +150,24 @@
patterns-type-prop (keyword (str (:name schema-filter-prop) "-patterns"))] patterns-type-prop (keyword (str (:name schema-filter-prop) "-patterns"))]
(testing "Filtering connections for schemas works as expected" (testing "Filtering connections for schemas works as expected"
(testing " with an inclusion filter" (testing " with an inclusion filter"
(sync-and-assert-filtered-tables {:name (format "Test %s DB with dataset inclusion filters" driver) (sync-and-assert-filtered-tables
:engine driver {:name (format "Test %s DB with dataset inclusion filters" driver)
:details (-> (mt/db) :engine driver
:details :details (-> (mt/db)
(assoc filter-type-prop "inclusion" :details
patterns-type-prop "s*,v*"))} (assoc filter-type-prop "inclusion"
(fn [{schema-name :schema}] patterns-type-prop "s*,v*,2*"))}
(is (contains? #{\s \v} (first schema-name)))))) (fn [{schema-name :schema}]
(testing (format "schema name = %s" (pr-str schema-name))
(is (contains? #{\s \v \2} (first schema-name)))))))
(testing " with an exclusion filter" (testing " with an exclusion filter"
(sync-and-assert-filtered-tables {:name (format "Test %s DB with dataset exclusion filters" driver) (sync-and-assert-filtered-tables
:engine driver {:name (format "Test %s DB with dataset exclusion filters" driver)
:details (-> (mt/db) :engine driver
:details :details (-> (mt/db)
(assoc filter-type-prop "exclusion" :details
patterns-type-prop "v*"))} (assoc filter-type-prop "exclusion"
(fn [{schema-name :schema}] patterns-type-prop "v*"))}
(is (not= \v (first schema-name)))))))))) (fn [{schema-name :schema}]
(testing (format "schema name = %s" (pr-str schema-name))
(is (not= \v (first schema-name)))))))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment