Adapt unique schema logic in CI from Snowflake to Redshift (#29613)

* Adapt unique schema logic in CI from Snowflake to Redshift * Ok don't stomp on all the old schemas just yet. * Fix missing vars * Test fixes * Test fixes * Revert `^:parallel` on tests that shouldn't be allowed to use it

Adapt unique schema logic in CI from Snowflake to Redshift (#29613)
f401d1cc · Cam Saul · GitHub · 7b617a4d · f401d1cc · f401d1cc
Unverified Commit f401d1cc authored 1 year ago by Cam Saul Committed by GitHub 1 year ago
--- a/modules/drivers/redshift/test/metabase/driver/redshift_test.clj
+++ b/modules/drivers/redshift/test/metabase/driver/redshift_test.clj
@@ -27,6 +27,7 @@
   [metabase.util.honey-sql-2 :as h2x]
   #_{:clj-kondo/ignore [:discouraged-namespace]}
   [metabase.util.honeysql-extensions :as hx]
+   [metabase.util.log :as log]
   [toucan2.core :as t2])
  (:import
   (metabase.plugins.jdbc_proxy ProxyDriver)))
@@ -42,7 +43,7 @@
                      (binding [hx/*honey-sql-version* 2]
                        (thunk))))

-(deftest correct-driver-test
+(deftest ^:parallel correct-driver-test
  (mt/test-driver :redshift
    (testing "Make sure we're using the correct driver for Redshift"
      (let [driver (java.sql.DriverManager/getDriver "jdbc:redshift://host:5432/testdb")
@@ -85,14 +86,20 @@
                                 "FROM"
                                 "  \"{{schema}}\".\"test_data_users\""
                                 "WHERE"
-                                 "  (\"{{schema}}\".\"test_data_users\".\"id\" = 1)"
-                                 "  OR (\"{{schema}}\".\"test_data_users\".\"id\" = 2)"
-                                 "  OR (\"{{schema}}\".\"test_data_users\".\"id\" = 3)"
+                                 "  ("
+                                 "    \"{{schema}}\".\"test_data_users\".\"id\" = 1"
+                                 "  )"
+                                 "  OR ("
+                                 "    \"{{schema}}\".\"test_data_users\".\"id\" = 2"
+                                 "  )"
+                                 "  OR ("
+                                 "    \"{{schema}}\".\"test_data_users\".\"id\" = 3"
+                                 "  )"
                                 "LIMIT"
                                 "  2000"]]
-                         (-> line
-                             (str/replace #"\Q{{site-uuid}}\E" (public-settings/site-uuid))
-                             (str/replace #"\Q{{schema}}\E" redshift.test/session-schema-name)))]
+                       (-> line
+                           (str/replace #"\Q{{site-uuid}}\E" (public-settings/site-uuid))
+                           (str/replace #"\Q{{schema}}\E" (redshift.test/unique-session-schema))))]
        (is (= expected
               (sql->lines
                (query->native
@@ -192,7 +199,7 @@
                {:database   (mt/id)
                 :type       :native
                 :native     {:query         (str "select * "
-                                                  (format "from \"%s\".test_data_checkins " redshift.test/session-schema-name)
+                                                  (format "from \"%s\".test_data_checkins " (redshift.test/unique-session-schema))
                                                  "where {{date}} "
                                                  "order by date desc "
                                                  "limit 1;")
@@ -205,41 +212,53 @@
                               :target [:dimension [:template-tag "date"]]
                               :value  "past30years"}]})))))))

+(defn- execute! [format-string & args]
+  (let [sql  (apply format format-string args)
+        spec (sql-jdbc.conn/connection-details->spec :redshift @redshift.test/db-connection-details)]
+    (log/info (u/format-color 'blue "[redshift] %s" sql))
+    (try
+      (jdbc/execute! spec sql)
+      (catch Throwable e
+        (throw (ex-info (format "Error executing SQL: %s" (ex-message e))
+                        {:sql sql}
+                        e)))))
+  (log/info (u/format-color 'blue "[ok]")))
+
 (deftest redshift-types-test
  (mt/test-driver
    :redshift
    (testing "Redshift specific types should be synced correctly"
      (let [db-details   (tx/dbdef->connection-details :redshift nil nil)
            tbl-nm       "redshift_specific_types"
-            qual-tbl-nm  (str redshift.test/session-schema-name "." tbl-nm)
+            qual-tbl-nm  (format "\"%s\".\"%s\"" (redshift.test/unique-session-schema) tbl-nm)
            view-nm      "late_binding_view"
-            qual-view-nm (str redshift.test/session-schema-name "." view-nm)]
+            qual-view-nm (format "\"%s\".\"%s\"" (redshift.test/unique-session-schema) view-nm)]
        (mt/with-temp Database [database {:engine :redshift, :details db-details}]
-         (try
-          ;; create a table with a CHARACTER VARYING and a NUMERIC column, and a late bound view that selects from it
-          (redshift.test/execute!
-           (str "DROP TABLE IF EXISTS %1$s;%n"
-                "CREATE TABLE %1$s(weird_varchar CHARACTER VARYING(50), numeric_col NUMERIC(10,2));%n"
-                "CREATE OR REPLACE VIEW %2$s AS SELECT * FROM %1$s WITH NO SCHEMA BINDING;")
-           qual-tbl-nm
-           qual-view-nm)
-          ;; sync the schema again to pick up the new view (and table, though we aren't checking that)
-          (sync/sync-database! database)
-          (is (contains?
-               (t2/select-fn-set :name Table :db_id (u/the-id database)) ; the new view should have been synced
-               view-nm))
-          (let [table-id (t2/select-one-pk Table :db_id (u/the-id database), :name view-nm)]
-            ;; and its columns' :base_type should have been identified correctly
-            (is (= [{:name "numeric_col",   :database_type "numeric(10,2)",         :base_type :type/Decimal}
-                    {:name "weird_varchar", :database_type "character varying(50)", :base_type :type/Text}]
-                   (map
-                    mt/derecordize
-                    (t2/select [Field :name :database_type :base_type] :table_id table-id {:order-by [:name]})))))
-          (finally
-            (redshift.test/execute! (str "DROP TABLE IF EXISTS %s;%n"
-                                         "DROP VIEW IF EXISTS %s;")
-                                    qual-tbl-nm
-                                    qual-view-nm))))))))
+          (try
+            ;; create a table with a CHARACTER VARYING and a NUMERIC column, and a late bound view that selects from it
+            (execute!
+             (str "DROP TABLE IF EXISTS %1$s;%n"
+                  "CREATE TABLE %1$s(weird_varchar CHARACTER VARYING(50), numeric_col NUMERIC(10,2));%n"
+                  "CREATE OR REPLACE VIEW %2$s AS SELECT * FROM %1$s WITH NO SCHEMA BINDING;")
+             qual-tbl-nm
+             qual-view-nm)
+            ;; sync the schema again to pick up the new view (and table, though we aren't checking that)
+            (sync/sync-database! database)
+            (is (contains?
+                 (t2/select-fn-set :name Table :db_id (u/the-id database)) ; the new view should have been synced
+                 view-nm))
+            (let [table-id (t2/select-one-pk Table :db_id (u/the-id database), :name view-nm)]
+              ;; and its columns' :base_type should have been identified correctly
+              (is (= [{:name "numeric_col",   :database_type "numeric(10,2)",         :base_type :type/Decimal}
+                      {:name "weird_varchar", :database_type "character varying(50)", :base_type :type/Text}]
+                     (map
+                      mt/derecordize
+                      (t2/select [Field :name :database_type :base_type] :table_id table-id {:order-by [:name]})))))
+            (finally
+              (execute! (str "DROP TABLE IF EXISTS %s;%n"
+                             "DROP VIEW IF EXISTS %s;")
+                        qual-tbl-nm
+                        qual-view-nm))))))))

 (deftest redshift-lbv-sync-error-test
  (mt/test-driver
@@ -247,30 +266,30 @@
    (testing "Late-binding view with with data types that cause a JDBC error can still be synced successfully (#21215)"
      (let [db-details   (tx/dbdef->connection-details :redshift nil nil)
            view-nm      "weird_late_binding_view"
-            qual-view-nm (str redshift.test/session-schema-name "." view-nm)]
-       (mt/with-temp Database [database {:engine :redshift, :details db-details}]
-         (try
-           (redshift.test/execute!
-            (str "CREATE OR REPLACE VIEW %1$s AS ("
-                 "WITH test_data AS (SELECT 'open' AS shop_status UNION ALL SELECT 'closed' AS shop_status) "
-                 "SELECT NULL as raw_null, "
-                 "'hello' as raw_var, "
-                 "CASE WHEN shop_status = 'open' THEN 11387.133 END AS case_when_numeric_inc_nulls "
-                 "FROM test_data) WITH NO SCHEMA BINDING;")
-            qual-view-nm)
-           (sync/sync-database! database)
-           (is (contains?
-                (t2/select-fn-set :name Table :db_id (u/the-id database)) ; the new view should have been synced without errors
-                view-nm))
-           (let [table-id (t2/select-one-pk Table :db_id (u/the-id database), :name view-nm)]
-             ;; and its columns' :base_type should have been identified correctly
-             (is (= [{:name "case_when_numeric_inc_nulls", :database_type "numeric",              :base_type :type/Decimal}
-                     {:name "raw_null",                    :database_type "varchar",              :base_type :type/Text}
-                     {:name "raw_var",                     :database_type "character varying(5)", :base_type :type/Text}]
-                    (t2/select [Field :name :database_type :base_type] :table_id table-id {:order-by [:name]}))))
-           (finally
-             (redshift.test/execute! (str "DROP VIEW IF EXISTS %s;")
-                                     qual-view-nm))))))))
+            qual-view-nm (format "\"%s\".\"%s\"" (redshift.test/unique-session-schema) view-nm)]
+        (mt/with-temp Database [database {:engine :redshift, :details db-details}]
+          (try
+            (execute!
+             (str "CREATE OR REPLACE VIEW %1$s AS ("
+                  "WITH test_data AS (SELECT 'open' AS shop_status UNION ALL SELECT 'closed' AS shop_status) "
+                  "SELECT NULL as raw_null, "
+                  "'hello' as raw_var, "
+                  "CASE WHEN shop_status = 'open' THEN 11387.133 END AS case_when_numeric_inc_nulls "
+                  "FROM test_data) WITH NO SCHEMA BINDING;")
+             qual-view-nm)
+            (sync/sync-database! database)
+            (is (contains?
+                 (t2/select-fn-set :name Table :db_id (u/the-id database)) ; the new view should have been synced without errors
+                 view-nm))
+            (let [table-id (t2/select-one-pk Table :db_id (u/the-id database), :name view-nm)]
+              ;; and its columns' :base_type should have been identified correctly
+              (is (= [{:name "case_when_numeric_inc_nulls", :database_type "numeric",              :base_type :type/Decimal}
+                      {:name "raw_null",                    :database_type "varchar",              :base_type :type/Text}
+                      {:name "raw_var",                     :database_type "character varying(5)", :base_type :type/Text}]
+                     (t2/select [Field :name :database_type :base_type] :table_id table-id {:order-by [:name]}))))
+            (finally
+              (execute! (str "DROP VIEW IF EXISTS %s;")
+                        qual-view-nm))))))))

 (deftest filtered-syncable-schemas-test
  (mt/test-driver :redshift
@@ -280,14 +299,14 @@
            random-schema (u/lower-case-en (tu.random/random-name))
            user-pw       "Password1234"
            db-det        (:details (mt/db))]
-        (redshift.test/execute! (str "CREATE SCHEMA %s;"
-                                     "CREATE USER %s PASSWORD '%s';%n"
-                                     "GRANT USAGE ON SCHEMA %s TO %s;%n")
-                                random-schema
-                                temp-username
-                                user-pw
-                                random-schema
-                                temp-username)
+        (execute! (str "CREATE SCHEMA %s;"
+                       "CREATE USER %s PASSWORD '%s';%n"
+                       "GRANT USAGE ON SCHEMA %s TO %s;%n")
+                  random-schema
+                  temp-username
+                  user-pw
+                  random-schema
+                  temp-username)
        (try
          (binding [redshift.test/*use-original-filtered-syncable-schemas-impl?* true]
            (mt/with-temp Database [db {:engine :redshift, :details (assoc db-det :user temp-username :password user-pw)}]
@@ -302,15 +321,15 @@
                  (testing "filtered-syncable-schemas for the user should contain the newly created random schema"
                    (is (contains? schemas random-schema)))
                  (testing "should not contain the current session-schema name (since that was never granted)"
-                    (is (not (contains? schemas redshift.test/session-schema-name))))))))
+                    (is (not (contains? schemas (redshift.test/unique-session-schema)))))))))
          (finally
-            (redshift.test/execute! (str "REVOKE USAGE ON SCHEMA %s FROM %s;%n"
-                                         "DROP USER IF EXISTS %s;%n"
-                                         "DROP SCHEMA IF EXISTS %s;%n")
-                                    random-schema
-                                    temp-username
-                                    temp-username
-                                    random-schema)))))
+            (execute! (str "REVOKE USAGE ON SCHEMA %s FROM %s;%n"
+                           "DROP USER IF EXISTS %s;%n"
+                           "DROP SCHEMA IF EXISTS %s;%n")
+                      random-schema
+                      temp-username
+                      temp-username
+                      random-schema)))))

    (testing "Should filter out non-existent schemas (for which nobody has permissions)"
      (let [fake-schema-name (u/qualified-name ::fake-schema)]
@@ -319,15 +338,15 @@
          (with-redefs [sql-jdbc.describe-database/all-schemas (let [orig sql-jdbc.describe-database/all-schemas]
                                                                 (fn [metadata]
                                                                   (eduction
-                                                                     cat
-                                                                     [(orig metadata) [fake-schema-name]])))]
+                                                                    cat
+                                                                    [(orig metadata) [fake-schema-name]])))]
            (let [jdbc-spec (sql-jdbc.conn/db->pooled-connection-spec (mt/db))]
              (with-open [conn (jdbc/get-connection jdbc-spec)]
                (letfn [(schemas []
                          (reduce
-                            conj
-                            #{}
-                            (sql-jdbc.sync/filtered-syncable-schemas :redshift conn (.getMetaData conn) nil nil)))]
+                           conj
+                           #{}
+                           (sql-jdbc.sync/filtered-syncable-schemas :redshift conn (.getMetaData conn) nil nil)))]
                  (testing "if schemas-with-usage-permissions is disabled, the ::fake-schema should come back"
                    (with-redefs [redshift/reducible-schemas-with-usage-permissions (fn [_ reducible]
                                                                                      reducible)]

--- a/modules/drivers/redshift/test/metabase/test/data/redshift.clj
+++ b/modules/drivers/redshift/test/metabase/test/data/redshift.clj
@@ -3,6 +3,7 @@
   [clojure.java.jdbc :as jdbc]
   [metabase.driver.sql-jdbc.connection :as sql-jdbc.conn]
   [metabase.driver.sql-jdbc.sync :as sql-jdbc.sync]
+   [metabase.driver.sql.test-util.unique-prefix :as sql.tu.unique-prefix]
   [metabase.test.data.interface :as tx]
   [metabase.test.data.sql :as sql.tx]
   [metabase.test.data.sql.ddl :as ddl]
@@ -36,7 +37,7 @@
  [_ _]
  (throw (UnsupportedOperationException. "Redshift does not have a TIME data type.")))

-(def ^:private db-connection-details
+(def db-connection-details
  (delay {:host     (tx/db-test-env-var-or-throw :redshift :host)
          :port     (Integer/parseInt (tx/db-test-env-var-or-throw :redshift :port "5439"))
          :db       (tx/db-test-env-var-or-throw :redshift :db)
@@ -47,15 +48,8 @@
  [& _]
  @db-connection-details)

-;; Redshift is tested remotely, which means we need to support multiple tests happening against the same remote host
-;; at the same time. Since Redshift doesn't let us create and destroy databases (we must re-use the same database
-;; throughout the tests) we'll just fake it by creating a new schema when tests start running and re-use the same
-;; schema for each test
-(defonce ^:private session-schema-number
-  (rand-int 240)) ; there's a maximum of 256 schemas per DB so make sure we don't go over that limit
-
-(defonce session-schema-name
-  (str "schema_" session-schema-number))
+(defn unique-session-schema []
+  (str (sql.tu.unique-prefix/unique-prefix) "schema"))

 (defmethod sql.tx/create-db-sql         :redshift [& _] nil)
 (defmethod sql.tx/drop-db-if-exists-sql :redshift [& _] nil)
@@ -63,7 +57,7 @@
 (defmethod sql.tx/pk-sql-type :redshift [_] "INTEGER IDENTITY(1,1)")

 (defmethod sql.tx/qualified-name-components :redshift [& args]
-  (apply tx/single-db-qualified-name-components session-schema-name args))
+  (apply tx/single-db-qualified-name-components (unique-session-schema) args))

 ;; don't use the Postgres implementation of `drop-db-ddl-statements` because it adds an extra statment to kill all
 ;; open connections to that DB, which doesn't work with Redshift
@@ -77,16 +71,29 @@

 ;;; Create + destroy the schema used for this test session

-(defn execute! [format-string & args]
-  (let [sql  (apply format format-string args)
-        spec (sql-jdbc.conn/connection-details->spec :redshift @db-connection-details)]
-    (log/info (u/format-color 'blue "[redshift] %s" sql))
-    (jdbc/execute! spec sql))
-  (log/info (u/format-color 'blue "[ok]")))
+(defn- delete-old-schemas! [^java.sql.Connection conn]
+  (with-open [rset (.. conn getMetaData getSchemas)
+              stmt (.createStatement conn)]
+    (while (.next rset)
+      (let [schema (.getString rset "TABLE_SCHEM")
+            sql    (format "DROP SCHEMA IF EXISTS \"%s\" CASCADE;" schema)]
+        (when (sql.tu.unique-prefix/old-dataset-name? schema)
+          (log/info (u/format-color 'blue "[redshift] %s" sql))
+          (.execute stmt sql))))))
+
+(defn- create-session-schema! [^java.sql.Connection conn]
+  (with-open [stmt (.createStatement conn)]
+    (doseq [^String sql [(format "DROP SCHEMA IF EXISTS \"%s\" CASCADE;" (unique-session-schema))
+                         (format "CREATE SCHEMA \"%s\";"  (unique-session-schema))]]
+      (log/info (u/format-color 'blue "[redshift] %s" sql))
+      (.execute stmt sql))))

 (defmethod tx/before-run :redshift
-  [_]
-  (execute! "DROP SCHEMA IF EXISTS %s CASCADE; CREATE SCHEMA %s;" session-schema-name session-schema-name))
+  [_driver]
+  (with-open [conn (jdbc/get-connection
+                    (sql-jdbc.conn/connection-details->spec :redshift @db-connection-details))]
+    (delete-old-schemas! conn)
+    (create-session-schema! conn)))

 (defonce ^:private ^{:arglists '([driver connection metadata _ _])}
  original-filtered-syncable-schemas
@@ -103,4 +110,4 @@
  [driver conn metadata schema-inclusion-filters schema-exclusion-filters]
  (if *use-original-filtered-syncable-schemas-impl?*
    (original-filtered-syncable-schemas driver conn metadata schema-inclusion-filters schema-exclusion-filters)
-    #{session-schema-name "spectrum"}))
+    #{(unique-session-schema) "spectrum"}))
--- a/modules/drivers/snowflake/test/metabase/test/data/snowflake.clj
+++ b/modules/drivers/snowflake/test/metabase/test/data/snowflake.clj
@@ -2,11 +2,9 @@
  (:require
   [clojure.java.jdbc :as jdbc]
   [clojure.string :as str]
-   [clojure.test :refer :all]
-   [java-time :as t]
   [metabase.driver.sql-jdbc.connection :as sql-jdbc.conn]
+   [metabase.driver.sql.test-util.unique-prefix :as sql.tu.unique-prefix]
   [metabase.driver.sql.util.unprepare :as unprepare]
-   [metabase.public-settings :as public-settings]
   [metabase.test.data.interface :as tx]
   [metabase.test.data.sql :as sql.tx]
   [metabase.test.data.sql-jdbc :as sql-jdbc.tx]
@@ -14,7 +12,6 @@
   [metabase.test.data.sql-jdbc.load-data :as load-data]
   [metabase.test.data.sql.ddl :as ddl]
   [metabase.util :as u]
-   [metabase.util.date-2 :as u.date]
   [metabase.util.log :as log]))

 (set! *warn-on-reflection* true)
@@ -35,44 +32,6 @@
                              :type/Time           "TIME"}]
  (defmethod sql.tx/field-base-type->sql-type [:snowflake base-type] [_ _] sql-type))

-;;; In the past we had one shared prefix for everybody, and everybody was expected to play nice and not screw with it.
-;;; This eventually led to BIG problems when one CI job would think see that a dataset did not exist, and try to
-;;; recreate it, and then another CI job would do the same thing at the same time, and eventually we'd end up with a
-;;; half-created dataset that was missing a bunch of rows. So here is the new strategy going forward:
-;;;
-;;; 1. Every instance of Metabase gets their own prefix like `<current-date-utc>_<site-uuid>_` e.g. `test-data` becomes
-;;;    something like
-;;;
-;;;       2023_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data
-;;;
-;;;    This will prevent jobs from running at the same time from stomping on each other's work.
-;;;
-;;; 2. To avoid filling our Snowflake account up with ephemeral data that never gets deleted, we will delete datasets
-;;;    following this pattern when they are two days old or older. E.g. if it is currently `2023-02-17` in UTC then we
-;;;    can delete anything dataset starts with `2023_02_15` or older. This is done once the first time we create a
-;;;    Snowflake test dataset in this process. See [[delete-old-datasets-if-needed!]] below.
-;;;
-;;; See this Slack thread for more info
-;;; https://metaboat.slack.com/archives/CKZEMT1MJ/p1676659086280609?thread_ts=1676656964.624609&cid=CKZEMT1MJ or ask
-;;; me (Cam) if you have any questions.
-
-(defn- utc-date
-  "`LocalDate` in UTC time."
-  []
-  (t/local-date (u.date/with-time-zone-same-instant (t/zoned-date-time) "UTC")))
-
-(defn- unique-prefix
-  "Unique prefix for test datasets for this instance. Format is `<current-date-utc>_<site-uuid>_`. See comments above.
-  Example:
-
-    2023_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_"
-  ([]
-   (unique-prefix (utc-date)))
-  ([local-date]
-   {:pre [(instance? java.time.LocalDate local-date)]}
-   (-> (format "%s_%s_" local-date (public-settings/site-uuid))
-       (str/replace  #"-" "_"))))
-
 (def ^:dynamic *database-prefix-fn*
  "Function that returns a unique prefix to use for test datasets for this instance. This is dynamic so we can rebind it
  to something fixed when we're testing the SQL we generate
@@ -80,7 +39,7 @@

  This is a function because [[unique-prefix]] can't be calculated until the application database is initialized
  because it relies on [[public-settings/site-uuid]]."
-  (memoize unique-prefix))
+  #'sql.tu.unique-prefix/unique-prefix)

 (defn- qualified-db-name
  "Prepend `database-name` with the [[*database-prefix-fn*]] so we don't stomp on any other jobs running at the same
@@ -125,36 +84,6 @@
  []
  (sql-jdbc.conn/connection-details->spec :snowflake (tx/dbdef->connection-details :snowflake :server nil)))

-(defn- old-dataset-name?
-  "Is this dataset name prefixed by a date two days ago or older?
-
-  If the date is invalid e.g. `2023-02-31` then we'll count it as old so it will get deleted anyway."
-  [dataset-name]
-  (when-let [[_ year month day] (re-matches #"^(\d{4})_(\d{2})_(\d{2}).*$" dataset-name)]
-    (let [dataset-date (try
-                         (t/local-date (parse-long year) (parse-long month) (parse-long day))
-                         (catch Throwable _
-                           nil))]
-      (if-not dataset-date
-        true
-        (t/before? dataset-date (u.date/add (utc-date) :day -1))))))
-
-(deftest ^:parallel old-dataset-name?-test
-  (are [s] (old-dataset-name? s)
-    "2023_02_01_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
-    "2023_01_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
-    "2022_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
-    (str (unique-prefix (u.date/add (utc-date) :day -2)) "test-data")
-    ;; if the date is invalid we should just treat it as old and delete it.
-    "2022_00_00_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
-    "2022_13_01_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
-    "2022_02_31_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data")
-  (are [s] (not (old-dataset-name? s))
-    "2050_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
-    "v3_test-data"
-    (str (unique-prefix) "test-data")
-    (str (unique-prefix (u.date/add (utc-date) :day -1)) "test-data")))
-
 (defn- old-dataset-names
  "Return a collection of all dataset names that are old -- prefixed with a date two days ago or older?"
  []
@@ -168,7 +97,7 @@
            ;; them all lower-case
            (let [catalog (u/lower-case-en (.getString rset "TABLE_CAT"))
                  acc     (cond-> acc
-                            (old-dataset-name? catalog) (conj catalog))]
+                            (sql.tu.unique-prefix/old-dataset-name? catalog) (conj catalog))]
              (recur acc))))))))

 (defn- delete-old-datasets!

--- a/test/metabase/driver/sql/test_util/unique_prefix.clj
+++ b/test/metabase/driver/sql/test_util/unique_prefix.clj
+(ns metabase.driver.sql.test-util.unique-prefix
+  "Tooling for testing Cloud-based SQL databases, creating unique schema names for every test run and 'garbage
+  collecting' old ones.
+
+  In the past we had one shared prefix for everybody, and everybody was expected to play nice and not screw with it.
+  This eventually led to BIG problems when one CI job would think see that a dataset did not exist, and try to
+  recreate it, and then another CI job would do the same thing at the same time, and eventually we'd end up with a
+  half-created dataset that was missing a bunch of rows. So here is the new strategy going forward:
+
+  1. Every instance of Metabase gets their own prefix like `<current-date-utc>_<site-uuid>_` e.g. `test-data` becomes
+     something like
+
+         2023_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data
+
+     This will prevent jobs from running at the same time from stomping on each other's work.
+
+  2. To avoid filling our Snowflake/Redshift/etc. accounts up with ephemeral data that never gets deleted, we will
+     delete datasets following this pattern when they are two days old or older. E.g. if it is currently `2023-02-17` in
+     UTC then we can delete anything dataset starts with `2023_02_15` or older. This is done once the first time we
+     create a test dataset in this process, i.e. done in [[metabase.test.data.interface/before-run]].
+     See [[metabase.test.data.snowflake/delete-old-datasets-if-needed!]] for example.
+
+  See this Slack thread for more info
+  https://metaboat.slack.com/archives/CKZEMT1MJ/p1676659086280609?thread_ts=1676656964.624609&cid=CKZEMT1MJ or ask
+  me (Cam) if you have any questions."
+  (:require
+   [clojure.string :as str]
+   [clojure.test :refer :all]
+   [java-time :as t]
+   [metabase.public-settings :as public-settings]
+   [metabase.util.date-2 :as u.date]))
+
+(defn- utc-date
+  "`LocalDate` in UTC time."
+  []
+  (t/local-date (u.date/with-time-zone-same-instant (t/zoned-date-time) "UTC")))
+
+(defn- unique-prefix*
+  ([]
+   (unique-prefix* (utc-date)))
+  ([local-date]
+   {:pre [(instance? java.time.LocalDate local-date)]}
+   (-> (format "%s_%s_" local-date (public-settings/site-uuid))
+       (str/replace  #"-" "_"))))
+
+(def ^{:arglists '([])} unique-prefix
+  "Unique prefix for test datasets for this instance. Format is `<current-date-utc>_<site-uuid>_`. See comments above.
+  Example:
+
+    2023_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_"
+  (memoize unique-prefix*))
+
+(defn old-dataset-name?
+  "Is this dataset name prefixed by a date two days ago or older?
+
+  If the date is invalid e.g. `2023-02-31` then we'll count it as old so it will get deleted anyway."
+  [dataset-name]
+  (when-let [[_ year month day] (re-matches #"^(\d{4})_(\d{2})_(\d{2}).*$" dataset-name)]
+    (let [dataset-date (try
+                         (t/local-date (parse-long year) (parse-long month) (parse-long day))
+                         (catch Throwable _
+                           nil))]
+      (if-not dataset-date
+        true
+        (t/before? dataset-date (u.date/add (utc-date) :day -1))))))
+
+(deftest ^:parallel old-dataset-name?-test
+  (are [s] (old-dataset-name? s)
+    "2023_02_01_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
+    "2023_01_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
+    "2022_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
+    (str (unique-prefix* (u.date/add (utc-date) :day -2)) "test-data")
+    ;; if the date is invalid we should just treat it as old and delete it.
+    "2022_00_00_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
+    "2022_13_01_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
+    "2022_02_31_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data")
+  (are [s] (not (old-dataset-name? s))
+    "2050_02_17_82e897cb_ad31_4c82_a4b6_3e9e2e1dc1cb_test-data"
+    "v3_test-data"
+    (str (unique-prefix*) "test-data")
+    (str (unique-prefix* (u.date/add (utc-date) :day -1)) "test-data")))
--- a/test/metabase/driver/sql_jdbc/sync/describe_database_test.clj
+++ b/test/metabase/driver/sql_jdbc/sync/describe_database_test.clj
@@ -150,20 +150,24 @@
          patterns-type-prop (keyword (str (:name schema-filter-prop) "-patterns"))]
      (testing "Filtering connections for schemas works as expected"
        (testing " with an inclusion filter"
-          (sync-and-assert-filtered-tables {:name    (format "Test %s DB with dataset inclusion filters" driver)
-                                            :engine  driver
-                                            :details (-> (mt/db)
-                                                         :details
-                                                         (assoc filter-type-prop "inclusion"
-                                                                patterns-type-prop "s*,v*"))}
-            (fn [{schema-name :schema}]
-              (is (contains? #{\s \v} (first schema-name))))))
+          (sync-and-assert-filtered-tables
+           {:name    (format "Test %s DB with dataset inclusion filters" driver)
+            :engine  driver
+            :details (-> (mt/db)
+                         :details
+                         (assoc filter-type-prop "inclusion"
+                                patterns-type-prop "s*,v*,2*"))}
+           (fn [{schema-name :schema}]
+             (testing (format "schema name = %s" (pr-str schema-name))
+               (is (contains? #{\s \v \2} (first schema-name)))))))
        (testing " with an exclusion filter"
-          (sync-and-assert-filtered-tables {:name    (format "Test %s DB with dataset exclusion filters" driver)
-                                            :engine  driver
-                                            :details (-> (mt/db)
-                                                         :details
-                                                         (assoc filter-type-prop "exclusion"
-                                                                patterns-type-prop "v*"))}
-            (fn [{schema-name :schema}]
-              (is (not= \v (first schema-name))))))))))
+          (sync-and-assert-filtered-tables
+           {:name    (format "Test %s DB with dataset exclusion filters" driver)
+            :engine  driver
+            :details (-> (mt/db)
+                         :details
+                         (assoc filter-type-prop "exclusion"
+                                patterns-type-prop "v*"))}
+           (fn [{schema-name :schema}]
+             (testing (format "schema name = %s" (pr-str schema-name))
+               (is (not= \v (first schema-name)))))))))))