From f401d1cce468ad9a3134648c7fd9a039ba72ad5a Mon Sep 17 00:00:00 2001 From: Cam Saul <1455846+camsaul@users.noreply.github.com> Date: Fri, 31 Mar 2023 23:47:27 -0700 Subject: [PATCH] Adapt unique schema logic in CI from Snowflake to Redshift (#29613) * Adapt unique schema logic in CI from Snowflake to Redshift * Ok don't stomp on all the old schemas just yet. * Fix missing vars * Test fixes :wrench: * Test fixes :wrench: * Revert `^:parallel` on tests that shouldn't be allowed to use it --- .../test/metabase/driver/redshift_test.clj | 179 ++++++++++-------- .../test/metabase/test/data/redshift.clj | 47 +++-- .../test/metabase/test/data/snowflake.clj | 77 +------- .../driver/sql/test_util/unique_prefix.clj | 81 ++++++++ .../sql_jdbc/sync/describe_database_test.clj | 36 ++-- 5 files changed, 230 insertions(+), 190 deletions(-) create mode 100644 test/metabase/driver/sql/test_util/unique_prefix.clj diff --git a/modules/drivers/redshift/test/metabase/driver/redshift_test.clj b/modules/drivers/redshift/test/metabase/driver/redshift_test.clj index 7ae90e76522..85e82a274f4 100644 --- a/modules/drivers/redshift/test/metabase/driver/redshift_test.clj +++ b/modules/drivers/redshift/test/metabase/driver/redshift_test.clj @@ -27,6 +27,7 @@ [metabase.util.honey-sql-2 :as h2x] #_{:clj-kondo/ignore [:discouraged-namespace]} [metabase.util.honeysql-extensions :as hx] + [metabase.util.log :as log] [toucan2.core :as t2]) (:import (metabase.plugins.jdbc_proxy ProxyDriver))) @@ -42,7 +43,7 @@ (binding [hx/*honey-sql-version* 2] (thunk)))) -(deftest correct-driver-test +(deftest ^:parallel correct-driver-test (mt/test-driver :redshift (testing "Make sure we're using the correct driver for Redshift" (let [driver (java.sql.DriverManager/getDriver "jdbc:redshift://host:5432/testdb") @@ -85,14 +86,20 @@ "FROM" " \"{{schema}}\".\"test_data_users\"" "WHERE" - " (\"{{schema}}\".\"test_data_users\".\"id\" = 1)" - " OR (\"{{schema}}\".\"test_data_users\".\"id\" = 2)" - " OR (\"{{schema}}\".\"test_data_users\".\"id\" = 3)" + " (" + " \"{{schema}}\".\"test_data_users\".\"id\" = 1" + " )" + " OR (" + " \"{{schema}}\".\"test_data_users\".\"id\" = 2" + " )" + " OR (" + " \"{{schema}}\".\"test_data_users\".\"id\" = 3" + " )" "LIMIT" " 2000"]] - (-> line - (str/replace #"\Q{{site-uuid}}\E" (public-settings/site-uuid)) - (str/replace #"\Q{{schema}}\E" redshift.test/session-schema-name)))] + (-> line + (str/replace #"\Q{{site-uuid}}\E" (public-settings/site-uuid)) + (str/replace #"\Q{{schema}}\E" (redshift.test/unique-session-schema))))] (is (= expected (sql->lines (query->native @@ -192,7 +199,7 @@ {:database (mt/id) :type :native :native {:query (str "select * " - (format "from \"%s\".test_data_checkins " redshift.test/session-schema-name) + (format "from \"%s\".test_data_checkins " (redshift.test/unique-session-schema)) "where {{date}} " "order by date desc " "limit 1;") @@ -205,41 +212,53 @@ :target [:dimension [:template-tag "date"]] :value "past30years"}]}))))))) +(defn- execute! [format-string & args] + (let [sql (apply format format-string args) + spec (sql-jdbc.conn/connection-details->spec :redshift @redshift.test/db-connection-details)] + (log/info (u/format-color 'blue "[redshift] %s" sql)) + (try + (jdbc/execute! spec sql) + (catch Throwable e + (throw (ex-info (format "Error executing SQL: %s" (ex-message e)) + {:sql sql} + e))))) + (log/info (u/format-color 'blue "[ok]"))) + (deftest redshift-types-test (mt/test-driver :redshift (testing "Redshift specific types should be synced correctly" (let [db-details (tx/dbdef->connection-details :redshift nil nil) tbl-nm "redshift_specific_types" - qual-tbl-nm (str redshift.test/session-schema-name "." tbl-nm) + qual-tbl-nm (format "\"%s\".\"%s\"" (redshift.test/unique-session-schema) tbl-nm) view-nm "late_binding_view" - qual-view-nm (str redshift.test/session-schema-name "." view-nm)] + qual-view-nm (format "\"%s\".\"%s\"" (redshift.test/unique-session-schema) view-nm)] (mt/with-temp Database [database {:engine :redshift, :details db-details}] - (try - ;; create a table with a CHARACTER VARYING and a NUMERIC column, and a late bound view that selects from it - (redshift.test/execute! - (str "DROP TABLE IF EXISTS %1$s;%n" - "CREATE TABLE %1$s(weird_varchar CHARACTER VARYING(50), numeric_col NUMERIC(10,2));%n" - "CREATE OR REPLACE VIEW %2$s AS SELECT * FROM %1$s WITH NO SCHEMA BINDING;") - qual-tbl-nm - qual-view-nm) - ;; sync the schema again to pick up the new view (and table, though we aren't checking that) - (sync/sync-database! database) - (is (contains? - (t2/select-fn-set :name Table :db_id (u/the-id database)) ; the new view should have been synced - view-nm)) - (let [table-id (t2/select-one-pk Table :db_id (u/the-id database), :name view-nm)] - ;; and its columns' :base_type should have been identified correctly - (is (= [{:name "numeric_col", :database_type "numeric(10,2)", :base_type :type/Decimal} - {:name "weird_varchar", :database_type "character varying(50)", :base_type :type/Text}] - (map - mt/derecordize - (t2/select [Field :name :database_type :base_type] :table_id table-id {:order-by [:name]}))))) - (finally - (redshift.test/execute! (str "DROP TABLE IF EXISTS %s;%n" - "DROP VIEW IF EXISTS %s;") - qual-tbl-nm - qual-view-nm)))))))) + (try + ;; create a table with a CHARACTER VARYING and a NUMERIC column, and a late bound view that selects from it + (execute! + (str "DROP TABLE IF EXISTS %1$s;%n" + "CREATE TABLE %1$s(weird_varchar CHARACTER VARYING(50), numeric_col NUMERIC(10,2));%n" + "CREATE OR REPLACE VIEW %2$s AS SELECT * FROM %1$s WITH NO SCHEMA BINDING;") + qual-tbl-nm + qual-view-nm) + ;; sync the schema again to pick up the new view (and table, though we aren't checking that) + (sync/sync-database! database) + (is (contains? + (t2/select-fn-set :name Table :db_id (u/the-id database)) ; the new view should have been synced + view-nm)) + (let [table-id (t2/select-one-pk Table :db_id (u/the-id database), :name view-nm)] + ;; and its columns' :base_type should have been identified correctly + (is (= [{:name "numeric_col", :database_type "numeric(10,2)", :base_type :type/Decimal} + {:name "weird_varchar", :database_type "character varying(50)", :base_type :type/Text}] + (map + mt/derecordize + (t2/select [Field :name :database_type :base_type] :table_id table-id {:order-by [:name]}))))) + (finally + (execute! (str "DROP TABLE IF EXISTS %s;%n" + "DROP VIEW IF EXISTS %s;") + qual-tbl-nm + qual-view-nm)))))))) (deftest redshift-lbv-sync-error-test (mt/test-driver @@ -247,30 +266,30 @@ (testing "Late-binding view with with data types that cause a JDBC error can still be synced successfully (#21215)" (let [db-details (tx/dbdef->connection-details :redshift nil nil) view-nm "weird_late_binding_view" - qual-view-nm (str redshift.test/session-schema-name "." view-nm)] - (mt/with-temp Database [database {:engine :redshift, :details db-details}] - (try - (redshift.test/execute! - (str "CREATE OR REPLACE VIEW %1$s AS (" - "WITH test_data AS (SELECT 'open' AS shop_status UNION ALL SELECT 'closed' AS shop_status) " - "SELECT NULL as raw_null, " - "'hello' as raw_var, " - "CASE WHEN shop_status = 'open' THEN 11387.133 END AS case_when_numeric_inc_nulls " - "FROM test_data) WITH NO SCHEMA BINDING;") - qual-view-nm) - (sync/sync-database! database) - (is (contains? - (t2/select-fn-set :name Table :db_id (u/the-id database)) ; the new view should have been synced without errors - view-nm)) - (let [table-id (t2/select-one-pk Table :db_id (u/the-id database), :name view-nm)] - ;; and its columns' :base_type should have been identified correctly - (is (= [{:name "case_when_numeric_inc_nulls", :database_type "numeric", :base_type :type/Decimal} - {:name "raw_null", :database_type "varchar", :base_type :type/Text} - {:name "raw_var", :database_type "character varying(5)", :base_type :type/Text}] - (t2/select [Field :name :database_type :base_type] :table_id table-id {:order-by [:name]})))) - (finally - (redshift.test/execute! (str "DROP VIEW IF EXISTS %s;") - qual-view-nm)))))))) + qual-view-nm (format "\"%s\".\"%s\"" (redshift.test/unique-session-schema) view-nm)] + (mt/with-temp Database [database {:engine :redshift, :details db-details}] + (try + (execute! + (str "CREATE OR REPLACE VIEW %1$s AS (" + "WITH test_data AS (SELECT 'open' AS shop_status UNION ALL SELECT 'closed' AS shop_status) " + "SELECT NULL as raw_null, " + "'hello' as raw_var, " + "CASE WHEN shop_status = 'open' THEN 11387.133 END AS case_when_numeric_inc_nulls " + "FROM test_data) WITH NO SCHEMA BINDING;") + qual-view-nm) + (sync/sync-database! database) + (is (contains? + (t2/select-fn-set :name Table :db_id (u/the-id database)) ; the new view should have been synced without errors + view-nm)) + (let [table-id (t2/select-one-pk Table :db_id (u/the-id database), :name view-nm)] + ;; and its columns' :base_type should have been identified correctly + (is (= [{:name "case_when_numeric_inc_nulls", :database_type "numeric", :base_type :type/Decimal} + {:name "raw_null", :database_type "varchar", :base_type :type/Text} + {:name "raw_var", :database_type "character varying(5)", :base_type :type/Text}] + (t2/select [Field :name :database_type :base_type] :table_id table-id {:order-by [:name]})))) + (finally + (execute! (str "DROP VIEW IF EXISTS %s;") + qual-view-nm)))))))) (deftest filtered-syncable-schemas-test (mt/test-driver :redshift @@ -280,14 +299,14 @@ random-schema (u/lower-case-en (tu.random/random-name)) user-pw "Password1234" db-det (:details (mt/db))] - (redshift.test/execute! (str "CREATE SCHEMA %s;" - "CREATE USER %s PASSWORD '%s';%n" - "GRANT USAGE ON SCHEMA %s TO %s;%n") - random-schema - temp-username - user-pw - random-schema - temp-username) + (execute! (str "CREATE SCHEMA %s;" + "CREATE USER %s PASSWORD '%s';%n" + "GRANT USAGE ON SCHEMA %s TO %s;%n") + random-schema + temp-username + user-pw + random-schema + temp-username) (try (binding [redshift.test/*use-original-filtered-syncable-schemas-impl?* true] (mt/with-temp Database [db {:engine :redshift, :details (assoc db-det :user temp-username :password user-pw)}] @@ -302,15 +321,15 @@ (testing "filtered-syncable-schemas for the user should contain the newly created random schema" (is (contains? schemas random-schema))) (testing "should not contain the current session-schema name (since that was never granted)" - (is (not (contains? schemas redshift.test/session-schema-name)))))))) + (is (not (contains? schemas (redshift.test/unique-session-schema))))))))) (finally - (redshift.test/execute! (str "REVOKE USAGE ON SCHEMA %s FROM %s;%n" - "DROP USER IF EXISTS %s;%n" - "DROP SCHEMA IF EXISTS %s;%n") - random-schema - temp-username - temp-username - random-schema))))) + (execute! (str "REVOKE USAGE ON SCHEMA %s FROM %s;%n" + "DROP USER IF EXISTS %s;%n" + "DROP SCHEMA IF EXISTS %s;%n") + random-schema + temp-username + temp-username + random-schema))))) (testing "Should filter out non-existent schemas (for which nobody has permissions)" (let [fake-schema-name (u/qualified-name ::fake-schema)] @@ -319,15 +338,15 @@ (with-redefs [sql-jdbc.describe-database/all-schemas (let [orig sql-jdbc.describe-database/all-schemas] (fn [metadata] (eduction - cat - [(orig metadata) [fake-schema-name]])))] + cat + [(orig metadata) [fake-schema-name]])))] (let [jdbc-spec (sql-jdbc.conn/db->pooled-connection-spec (mt/db))] (with-open [conn (jdbc/get-connection jdbc-spec)] (letfn [(schemas [] (reduce - conj - #{} - (sql-jdbc.sync/filtered-syncable-schemas :redshift conn (.getMetaData conn) nil nil)))] + conj + #{} + (sql-jdbc.sync/filtered-syncable-schemas :redshift conn (.getMetaData conn) nil nil)))] (testing "if schemas-with-usage-permissions is disabled, the ::fake-schema should come back" (with-redefs [redshift/reducible-schemas-with-usage-permissions (fn [_ reducible] reducible)] diff --git a/modules/drivers/redshift/test/metabase/test/data/redshift.clj b/modules/drivers/redshift/test/metabase/test/data/redshift.clj index ec111b4e8bf..def7b655550 100644 --- a/modules/drivers/redshift/test/metabase/test/data/redshift.clj +++ b/modules/drivers/redshift/test/metabase/test/data/redshift.clj @@ -3,6 +3,7 @@ [clojure.java.jdbc :as jdbc] [metabase.driver.sql-jdbc.connection :as sql-jdbc.conn] [metabase.driver.sql-jdbc.sync :as sql-jdbc.sync] + [metabase.driver.sql.test-util.unique-prefix :as sql.tu.unique-prefix] [metabase.test.data.interface :as tx] [metabase.test.data.sql :as sql.tx] [metabase.test.data.sql.ddl :as ddl] @@ -36,7 +37,7 @@ [_ _] (throw (UnsupportedOperationException. "Redshift does not have a TIME data type."))) -(def ^:private db-connection-details +(def db-connection-details (delay {:host (tx/db-test-env-var-or-throw :redshift :host) :port (Integer/parseInt (tx/db-test-env-var-or-throw :redshift :port "5439")) :db (tx/db-test-env-var-or-throw :redshift :db) @@ -47,15 +48,8 @@ [& _] @db-connection-details) -;; Redshift is tested remotely, which means we need to support multiple tests happening against the same remote host -;; at the same time. Since Redshift doesn't let us create and destroy databases (we must re-use the same database -;; throughout the tests) we'll just fake it by creating a new schema when tests start running and re-use the same -;; schema for each test -(defonce ^:private session-schema-number - (rand-int 240)) ; there's a maximum of 256 schemas per DB so make sure we don't go over that limit - -(defonce session-schema-name - (str "schema_" session-schema-number)) +(defn unique-session-schema [] + (str (sql.tu.unique-prefix/unique-prefix) "schema")) (defmethod sql.tx/create-db-sql :redshift [& _] nil) (defmethod sql.tx/drop-db-if-exists-sql :redshift [& _] nil) @@ -63,7 +57,7 @@ (defmethod sql.tx/pk-sql-type :redshift [_] "INTEGER IDENTITY(1,1)") (defmethod sql.tx/qualified-name-components :redshift [& args] - (apply tx/single-db-qualified-name-components session-schema-name args)) + (apply tx/single-db-qualified-name-components (unique-session-schema) args)) ;; don't use the Postgres implementation of `drop-db-ddl-statements` because it adds an extra statment to kill all ;; open connections to that DB, which doesn't work with Redshift @@ -77,16 +71,29 @@ ;;; Create + destroy the schema used for this test session -(defn execute! [format-string & args] - (let [sql (apply format format-string args) - spec (sql-jdbc.conn/connection-details->spec :redshift @db-connection-details)] - (log/info (u/format-color 'blue "[redshift] %s" sql)) - (jdbc/execute! spec sql)) - (log/info (u/format-color 'blue "[ok]"))) +(defn- delete-old-schemas! [^java.sql.Connection conn] + (with-open [rset (.. conn getMetaData getSchemas) + stmt (.createStatement conn)] + (while (.next rset) + (let [schema (.getString rset "TABLE_SCHEM") + sql (format "DROP SCHEMA IF EXISTS \"%s\" CASCADE;" schema)] + (when (sql.tu.unique-prefix/old-dataset-name? schema) + (log/info (u/format-color 'blue "[redshift] %s" sql)) + (.execute stmt sql)))))) + +(defn- create-session-schema! [^java.sql.Connection conn] + (with-open [stmt (.createStatement conn)] + (doseq [^String sql [(format "DROP SCHEMA IF EXISTS \"%s\" CASCADE;" (unique-session-schema)) + (format "CREATE SCHEMA \"%s\";" (unique-session-schema))]] + (log/info (u/format-color 'blue "[redshift] %s" sql)) + (.execute stmt sql)))) (defmethod tx/before-run :redshift - [_] - (execute! "DROP SCHEMA IF EXISTS %s CASCADE; CREATE SCHEMA %s;" session-schema-name session-schema-name)) + [_driver] + (with-open [conn (jdbc/get-connection + (sql-jdbc.conn/connection-details->spec :redshift @db-connection-details))] + (delete-old-schemas! conn) + (create-session-schema! conn))) (defonce ^:private ^{:arglists '([driver connection metadata _ _])} original-filtered-syncable-schemas @@ -103,4 +110,4 @@ [driver conn metadata schema-inclusion-filters schema-exclusion-filters] (if *use-original-filtered-syncable-schemas-impl?* (original-filtered-syncable-schemas driver conn metadata schema-inclusion-filters schema-exclusion-filters) - #{session-schema-name "spectrum"})) + #{(unique-session-schema) "spectrum"})) diff --git a/modules/drivers/snowflake/test/metabase/test/data/snowflake.clj b/modules/drivers/snowflake/test/metabase/test/data/snowflake.clj index 3f13c684903..edde99360ac 100644 --- a/modules/drivers/snowflake/test/metabase/test/data/snowflake.clj +++ b/modules/drivers/snowflake/test/metabase/test/data/snowflake.clj @@ -2,11 +2,9 @@ (:require [clojure.java.jdbc :as jdbc] [clojure.string :as str] - [clojure.test :refer :all] - [java-time :as t] [metabase.driver.sql-jdbc.connection :as sql-jdbc.conn] + [metabase.driver.sql.test-util.unique-prefix :as sql.tu.unique-prefix] [metabase.driver.sql.util.unprepare :as unprepare] - [metabase.public-settings :as public-settings] [metabase.test.data.interface :as tx] [metabase.test.data.sql :as sql.tx] [metabase.test.data.sql-jdbc :as sql-jdbc.tx] @@ -14,7 +12,6 @@ [metabase.test.data.sql-jdbc.load-data :as load-data] [metabase.test.data.sql.ddl :as ddl] [metabase.util :as u] - [metabase.util.date-2 :as u.date] [metabase.util.log :as log])) (set! *warn-on-reflection* true) @@ -35,44 +32,6 @@ :type/Time "TIME"}] (defmethod sql.tx/field-base-type->sql-type [:snowflake base-type] [_ _] sql-type)) -;;; In the past we had one shared prefix for everybody, and everybody was expected to play nice and not screw with it. -;;; This eventually led to BIG problems when one CI job would think see that a dataset did not exist, and try to -;;; recreate it, and then another CI job would do the same thing at the same time, and eventually we'd end up with a -;;; half-created dataset that was missing a bunch of rows. So here is the new strategy going forward: -;;; -;;; 1. Every instance of Metabase gets their own prefix like `<current-date-utc>_<site-uuid>_` e.g. `test-data` becomes -;;; something like -;;; -;;; 2023_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data -;;; -;;; This will prevent jobs from running at the same time from stomping on each other's work. -;;; -;;; 2. To avoid filling our Snowflake account up with ephemeral data that never gets deleted, we will delete datasets -;;; following this pattern when they are two days old or older. E.g. if it is currently `2023-02-17` in UTC then we -;;; can delete anything dataset starts with `2023_02_15` or older. This is done once the first time we create a -;;; Snowflake test dataset in this process. See [[delete-old-datasets-if-needed!]] below. -;;; -;;; See this Slack thread for more info -;;; https://metaboat.slack.com/archives/CKZEMT1MJ/p1676659086280609?thread_ts=1676656964.624609&cid=CKZEMT1MJ or ask -;;; me (Cam) if you have any questions. - -(defn- utc-date - "`LocalDate` in UTC time." - [] - (t/local-date (u.date/with-time-zone-same-instant (t/zoned-date-time) "UTC"))) - -(defn- unique-prefix - "Unique prefix for test datasets for this instance. Format is `<current-date-utc>_<site-uuid>_`. See comments above. - Example: - - 2023_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_" - ([] - (unique-prefix (utc-date))) - ([local-date] - {:pre [(instance? java.time.LocalDate local-date)]} - (-> (format "%s_%s_" local-date (public-settings/site-uuid)) - (str/replace #"-" "_")))) - (def ^:dynamic *database-prefix-fn* "Function that returns a unique prefix to use for test datasets for this instance. This is dynamic so we can rebind it to something fixed when we're testing the SQL we generate @@ -80,7 +39,7 @@ This is a function because [[unique-prefix]] can't be calculated until the application database is initialized because it relies on [[public-settings/site-uuid]]." - (memoize unique-prefix)) + #'sql.tu.unique-prefix/unique-prefix) (defn- qualified-db-name "Prepend `database-name` with the [[*database-prefix-fn*]] so we don't stomp on any other jobs running at the same @@ -125,36 +84,6 @@ [] (sql-jdbc.conn/connection-details->spec :snowflake (tx/dbdef->connection-details :snowflake :server nil))) -(defn- old-dataset-name? - "Is this dataset name prefixed by a date two days ago or older? - - If the date is invalid e.g. `2023-02-31` then we'll count it as old so it will get deleted anyway." - [dataset-name] - (when-let [[_ year month day] (re-matches #"^(\d{4})_(\d{2})_(\d{2}).*$" dataset-name)] - (let [dataset-date (try - (t/local-date (parse-long year) (parse-long month) (parse-long day)) - (catch Throwable _ - nil))] - (if-not dataset-date - true - (t/before? dataset-date (u.date/add (utc-date) :day -1)))))) - -(deftest ^:parallel old-dataset-name?-test - (are [s] (old-dataset-name? s) - "2023_02_01_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data" - "2023_01_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data" - "2022_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data" - (str (unique-prefix (u.date/add (utc-date) :day -2)) "test-data") - ;; if the date is invalid we should just treat it as old and delete it. - "2022_00_00_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data" - "2022_13_01_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data" - "2022_02_31_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data") - (are [s] (not (old-dataset-name? s)) - "2050_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data" - "v3_test-data" - (str (unique-prefix) "test-data") - (str (unique-prefix (u.date/add (utc-date) :day -1)) "test-data"))) - (defn- old-dataset-names "Return a collection of all dataset names that are old -- prefixed with a date two days ago or older?" [] @@ -168,7 +97,7 @@ ;; them all lower-case (let [catalog (u/lower-case-en (.getString rset "TABLE_CAT")) acc (cond-> acc - (old-dataset-name? catalog) (conj catalog))] + (sql.tu.unique-prefix/old-dataset-name? catalog) (conj catalog))] (recur acc)))))))) (defn- delete-old-datasets! diff --git a/test/metabase/driver/sql/test_util/unique_prefix.clj b/test/metabase/driver/sql/test_util/unique_prefix.clj new file mode 100644 index 00000000000..bf938872c8e --- /dev/null +++ b/test/metabase/driver/sql/test_util/unique_prefix.clj @@ -0,0 +1,81 @@ +(ns metabase.driver.sql.test-util.unique-prefix + "Tooling for testing Cloud-based SQL databases, creating unique schema names for every test run and 'garbage + collecting' old ones. + + In the past we had one shared prefix for everybody, and everybody was expected to play nice and not screw with it. + This eventually led to BIG problems when one CI job would think see that a dataset did not exist, and try to + recreate it, and then another CI job would do the same thing at the same time, and eventually we'd end up with a + half-created dataset that was missing a bunch of rows. So here is the new strategy going forward: + + 1. Every instance of Metabase gets their own prefix like `<current-date-utc>_<site-uuid>_` e.g. `test-data` becomes + something like + + 2023_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data + + This will prevent jobs from running at the same time from stomping on each other's work. + + 2. To avoid filling our Snowflake/Redshift/etc. accounts up with ephemeral data that never gets deleted, we will + delete datasets following this pattern when they are two days old or older. E.g. if it is currently `2023-02-17` in + UTC then we can delete anything dataset starts with `2023_02_15` or older. This is done once the first time we + create a test dataset in this process, i.e. done in [[metabase.test.data.interface/before-run]]. + See [[metabase.test.data.snowflake/delete-old-datasets-if-needed!]] for example. + + See this Slack thread for more info + https://metaboat.slack.com/archives/CKZEMT1MJ/p1676659086280609?thread_ts=1676656964.624609&cid=CKZEMT1MJ or ask + me (Cam) if you have any questions." + (:require + [clojure.string :as str] + [clojure.test :refer :all] + [java-time :as t] + [metabase.public-settings :as public-settings] + [metabase.util.date-2 :as u.date])) + +(defn- utc-date + "`LocalDate` in UTC time." + [] + (t/local-date (u.date/with-time-zone-same-instant (t/zoned-date-time) "UTC"))) + +(defn- unique-prefix* + ([] + (unique-prefix* (utc-date))) + ([local-date] + {:pre [(instance? java.time.LocalDate local-date)]} + (-> (format "%s_%s_" local-date (public-settings/site-uuid)) + (str/replace #"-" "_")))) + +(def ^{:arglists '([])} unique-prefix + "Unique prefix for test datasets for this instance. Format is `<current-date-utc>_<site-uuid>_`. See comments above. + Example: + + 2023_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_" + (memoize unique-prefix*)) + +(defn old-dataset-name? + "Is this dataset name prefixed by a date two days ago or older? + + If the date is invalid e.g. `2023-02-31` then we'll count it as old so it will get deleted anyway." + [dataset-name] + (when-let [[_ year month day] (re-matches #"^(\d{4})_(\d{2})_(\d{2}).*$" dataset-name)] + (let [dataset-date (try + (t/local-date (parse-long year) (parse-long month) (parse-long day)) + (catch Throwable _ + nil))] + (if-not dataset-date + true + (t/before? dataset-date (u.date/add (utc-date) :day -1)))))) + +(deftest ^:parallel old-dataset-name?-test + (are [s] (old-dataset-name? s) + "2023_02_01_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data" + "2023_01_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data" + "2022_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data" + (str (unique-prefix* (u.date/add (utc-date) :day -2)) "test-data") + ;; if the date is invalid we should just treat it as old and delete it. + "2022_00_00_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data" + "2022_13_01_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data" + "2022_02_31_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data") + (are [s] (not (old-dataset-name? s)) + "2050_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data" + "v3_test-data" + (str (unique-prefix*) "test-data") + (str (unique-prefix* (u.date/add (utc-date) :day -1)) "test-data"))) diff --git a/test/metabase/driver/sql_jdbc/sync/describe_database_test.clj b/test/metabase/driver/sql_jdbc/sync/describe_database_test.clj index 5d284c11f8a..de6eca12d1d 100644 --- a/test/metabase/driver/sql_jdbc/sync/describe_database_test.clj +++ b/test/metabase/driver/sql_jdbc/sync/describe_database_test.clj @@ -150,20 +150,24 @@ patterns-type-prop (keyword (str (:name schema-filter-prop) "-patterns"))] (testing "Filtering connections for schemas works as expected" (testing " with an inclusion filter" - (sync-and-assert-filtered-tables {:name (format "Test %s DB with dataset inclusion filters" driver) - :engine driver - :details (-> (mt/db) - :details - (assoc filter-type-prop "inclusion" - patterns-type-prop "s*,v*"))} - (fn [{schema-name :schema}] - (is (contains? #{\s \v} (first schema-name)))))) + (sync-and-assert-filtered-tables + {:name (format "Test %s DB with dataset inclusion filters" driver) + :engine driver + :details (-> (mt/db) + :details + (assoc filter-type-prop "inclusion" + patterns-type-prop "s*,v*,2*"))} + (fn [{schema-name :schema}] + (testing (format "schema name = %s" (pr-str schema-name)) + (is (contains? #{\s \v \2} (first schema-name))))))) (testing " with an exclusion filter" - (sync-and-assert-filtered-tables {:name (format "Test %s DB with dataset exclusion filters" driver) - :engine driver - :details (-> (mt/db) - :details - (assoc filter-type-prop "exclusion" - patterns-type-prop "v*"))} - (fn [{schema-name :schema}] - (is (not= \v (first schema-name)))))))))) + (sync-and-assert-filtered-tables + {:name (format "Test %s DB with dataset exclusion filters" driver) + :engine driver + :details (-> (mt/db) + :details + (assoc filter-type-prop "exclusion" + patterns-type-prop "v*"))} + (fn [{schema-name :schema}] + (testing (format "schema name = %s" (pr-str schema-name)) + (is (not= \v (first schema-name))))))))))) -- GitLab