From d13da89336dfb1f3a61ee5e43b308f9ef614e5f9 Mon Sep 17 00:00:00 2001 From: Cam Saul <1455846+camsaul@users.noreply.github.com> Date: Wed, 13 Apr 2022 09:07:47 -0700 Subject: [PATCH] Fix Snowflake start of week (#21604) --- .../src/metabase/driver/snowflake.clj | 45 ++++++------ .../test/metabase/driver/snowflake_test.clj | 72 ++++++++++--------- .../sparksql/src/metabase/driver/sparksql.clj | 9 ++- shared/src/metabase/mbql/schema.cljc | 3 + src/metabase/driver/common.clj | 13 ++-- src/metabase/driver/sql_jdbc/connection.clj | 31 ++++---- .../driver/sql_jdbc/connection_test.clj | 66 ++++++++++------- test/metabase/test.clj | 1 - 8 files changed, 129 insertions(+), 111 deletions(-) diff --git a/modules/drivers/snowflake/src/metabase/driver/snowflake.clj b/modules/drivers/snowflake/src/metabase/driver/snowflake.clj index 466ea66d1ba..e3ed9dd4e39 100644 --- a/modules/drivers/snowflake/src/metabase/driver/snowflake.clj +++ b/modules/drivers/snowflake/src/metabase/driver/snowflake.clj @@ -45,23 +45,17 @@ :sunday) (defn- start-of-week-setting->snowflake-offset - "Returns the Snowflake offset for the current :start-of-week setting, if defined. Snowflake considers :monday to be - 1, through :sunday as 7, so we need to increment our raw offset value." + "Value to use for the `WEEK_START` connection parameter -- see + https://docs.snowflake.com/en/sql-reference/parameters.html#label-week-start -- based on + the [[metabase.public-settings/start-of-week]] Setting. Snowflake considers `:monday` to be `1`, through `:sunday` + as `7`." [] - (when-let [offset (driver.common/start-of-week->int)] - (inc offset))) - -(defn- maybe-set-week-start [{:keys [:additional-options] :as details}] - (if-not (str/blank? additional-options) - (let [week-start-from-opts (-> (sql-jdbc.common/additional-options->map additional-options :url) - (get "week_start"))] - (if-not week-start-from-opts - (assoc details :week_start (or (start-of-week-setting->snowflake-offset) 7)) - details)) - (assoc details :week_start (or (start-of-week-setting->snowflake-offset) 7)))) + (inc (driver.common/start-of-week->int))) (defmethod sql-jdbc.conn/connection-details->spec :snowflake - [_ {:keys [account], :as opts}] + [_ {:keys [account additional-options], :as details}] + (when (get "week_start" (sql-jdbc.common/additional-options->map additional-options :url)) + (log/warn (trs "You should not set WEEK_START in Snowflake connection options; this might lead to incorrect results. Set the Start of Week Setting instead."))) (let [upcase-not-nil (fn [s] (when s (u/upper-case-en s)))] ;; it appears to be the case that their JDBC driver ignores `db` -- see my bug report at ;; https://support.snowflake.net/s/question/0D50Z00008WTOMCSA5/ @@ -76,17 +70,19 @@ ;; other SESSION parameters ;; not 100% sure why we need to do this but if we don't set the connection to UTC our report timezone ;; stuff doesn't work, even though we ultimately override this when we set the session timezone - :timezone "UTC"} - (-> opts - ;; original version of the Snowflake driver incorrectly used `dbname` in the details fields instead of - ;; `db`. If we run across `dbname`, correct our behavior - maybe-set-week-start + :timezone "UTC" + ;; tell Snowflake to use the same start of week that we have set for the + ;; [[metabase.public-settings/start-of-week]] Setting. + :week_start (start-of-week-setting->snowflake-offset)} + (-> details + ;; original version of the Snowflake driver incorrectly used `dbname` in the details fields instead + ;; of `db`. If we run across `dbname`, correct our behavior (set/rename-keys {:dbname :db}) ;; see https://github.com/metabase/metabase/issues/9511 (update :warehouse upcase-not-nil) (update :schema upcase-not-nil) (dissoc :host :port :timezone))) - (sql-jdbc.common/handle-additional-options opts)))) + (sql-jdbc.common/handle-additional-options details)))) (defmethod sql-jdbc.sync/database-type->base-type :snowflake [_ base-type] @@ -158,13 +154,14 @@ (defmethod sql.qp/date [:snowflake :quarter-of-year] [_ _ expr] (extract :quarter expr)) (defmethod sql.qp/date [:snowflake :year] [_ _ expr] (date-trunc :year expr)) +;; these don't need to be adjusted for start of week, since we're Setting the WEEK_START connection parameter (defmethod sql.qp/date [:snowflake :week] - [_ _ expr] - (sql.qp/adjust-start-of-week :snowflake (partial date-trunc :week) expr)) + [_driver _unit expr] + (date-trunc :week expr)) (defmethod sql.qp/date [:snowflake :day-of-week] - [_ _ expr] - (sql.qp/adjust-day-of-week :snowflake (extract :dayofweek expr))) + [_driver _unit expr] + (extract :dayofweek expr)) (defmethod sql.qp/->honeysql [:snowflake :regex-match-first] [driver [_ arg pattern]] diff --git a/modules/drivers/snowflake/test/metabase/driver/snowflake_test.clj b/modules/drivers/snowflake/test/metabase/driver/snowflake_test.clj index 11eff5048ac..bf8f8c0fbdc 100644 --- a/modules/drivers/snowflake/test/metabase/driver/snowflake_test.clj +++ b/modules/drivers/snowflake/test/metabase/driver/snowflake_test.clj @@ -124,9 +124,9 @@ (is (= true (can-connect? (:details (mt/db)))) "can-connect? should return true for normal Snowflake DB details") - (is (thrown? net.snowflake.client.jdbc.SnowflakeSQLException - (mt/suppress-output - (can-connect? (assoc (:details (mt/db)) :db (mt/random-name))))) + (is (thrown? + net.snowflake.client.jdbc.SnowflakeSQLException + (can-connect? (assoc (:details (mt/db)) :db (mt/random-name)))) "can-connect? should throw for Snowflake databases that don't exist (#9511)")))) (deftest report-timezone-test @@ -180,40 +180,42 @@ (deftest week-start-test (mt/test-driver :snowflake (testing "The WEEK_START session setting is correctly incorporated" - (letfn [(invalidate-pool! [] - (sql-jdbc.conn/invalidate-pool-for-db! (mt/db))) - (run-dayofweek-query [date-str] + (letfn [(run-dayofweek-query [date-str] (-> (mt/rows - (qp/process-query {:database (mt/id) - :type :native - :native {:query (str "SELECT DAYOFWEEK({{filter_date}})") - :template-tags {:filter_date {:name "filter_date" - :display_name "Just A Date" - :type "date"}}} - :parameters [{:type "date/single" - :target ["variable" ["template-tag" "filter_date"]] - :value date-str}]})) + (qp/process-query {:database (mt/id) + :type :native + :native {:query (str "SELECT DAYOFWEEK({{filter_date}})") + :template-tags {:filter_date {:name "filter_date" + :display_name "Just A Date" + :type "date"}}} + :parameters [{:type "date/single" + :target ["variable" ["template-tag" "filter_date"]] + :value date-str}]})) ffirst))] - (try - (testing " under the default value of 7 (Sunday)" - (is (= 1 (run-dayofweek-query "2021-01-10"))) ; Sunday (first day of the week) - (is (= 2 (run-dayofweek-query "2021-01-11")))) ; Monday (second day of the week) - (testing " when we control it via the Metabase setting value" - (mt/with-temporary-setting-values [start-of-week :monday] - (invalidate-pool!) ; have to invalidate pool to force new connection creation to pick up changed setting - (is (= 7 (run-dayofweek-query "2021-01-10"))) ; Sunday (last day of week now) - (is (= 1 (run-dayofweek-query "2021-01-11"))) ; Monday (first day of week now) - (testing " when we put it in the additional-options (connection string)" - (mt/with-temp Database [db (-> (select-keys (mt/db) [:details :engine]) - (assoc :name "Test WEEK_START Connection String") - ;; set WEEK_START as 3 (Wednesday) in connection string - ;; this should take precedence over the start-of-week setting in effect - (update :details assoc :additional-options "WEEK_START=3"))] - (mt/with-db db - (is (= 7 (run-dayofweek-query "2021-01-12"))) ; Tuesday (last day of week now) - (is (= 1 (run-dayofweek-query "2021-01-13")))))))) ; Wednesday (first day of week now) - ;; invalidate pool again after the test to ensure the connection cache forgets the temp setting version - (finally (invalidate-pool!))))))) + (testing "under the default value of 7 (Sunday)" + (mt/with-temporary-setting-values [start-of-week :sunday] + (is (= 1 (run-dayofweek-query "2021-01-10")) "Sunday (first day of the week)") + (is (= 2 (run-dayofweek-query "2021-01-11")) "Monday (second day of the week)"))) + (testing "when we control it via the Metabase setting value" + (mt/with-temporary-setting-values [start-of-week :monday] + (is (= 7 (run-dayofweek-query "2021-01-10")) "Sunday (last day of week now)") + (is (= 1 (run-dayofweek-query "2021-01-11")) "Monday (first day of week now)"))))))) + +(deftest first-day-of-week-test + (mt/test-driver :snowflake + (testing "Day-of-week should work correctly regardless of what the `start-of-week` Setting is set to (#20999)" + (mt/dataset sample-dataset + (doseq [[start-of-week friday-int] [[:friday 1] + [:monday 5] + [:sunday 6]]] + (mt/with-temporary-setting-values [start-of-week start-of-week] + (let [query (mt/mbql-query people + {:breakout [!day-of-week.birth_date] + :aggregation [[:count]] + :filter [:= $birth_date "1986-12-12"]})] + (mt/with-native-query-testing-context query + (is (= [[friday-int 1]] + (mt/rows (qp/process-query query)))))))))))) (deftest normalize-test (mt/test-driver :snowflake diff --git a/modules/drivers/sparksql/src/metabase/driver/sparksql.clj b/modules/drivers/sparksql/src/metabase/driver/sparksql.clj index 1c532373894..e92e00226fe 100644 --- a/modules/drivers/sparksql/src/metabase/driver/sparksql.clj +++ b/modules/drivers/sparksql/src/metabase/driver/sparksql.clj @@ -76,6 +76,11 @@ ;;; ------------------------------------------- Other Driver Method Impls -------------------------------------------- +(defrecord SparkSQLDataSource [url properties] + javax.sql.DataSource + (getConnection [_this] + (fixed-hive-connection/fixed-hive-connection url properties))) + (defmethod sql-jdbc.conn/connection-details->spec :sparksql [_driver {:keys [host port db jdbc-flags dbname] :or {host "localhost", port 10000, db "", jdbc-flags ""} @@ -85,9 +90,7 @@ db (or dbname db) url (format "jdbc:hive2://%s:%s/%s%s" host port db jdbc-flags) properties (pool/map->properties (dissoc opts :host :port :jdbc-flags)) - data-source (reify javax.sql.DataSource - (getConnection [_this] - (fixed-hive-connection/fixed-hive-connection url properties)))] + data-source (->SparkSQLDataSource url properties)] {:datasource data-source})) (defn- dash-to-underscore [s] diff --git a/shared/src/metabase/mbql/schema.cljc b/shared/src/metabase/mbql/schema.cljc index b9db2574631..46eb035e204 100644 --- a/shared/src/metabase/mbql/schema.cljc +++ b/shared/src/metabase/mbql/schema.cljc @@ -41,6 +41,9 @@ ;;; ------------------------------------------------- Datetime Stuff ------------------------------------------------- +;; `:day-of-week` depends on the [[metabase.public-settings/start-of-week]] Setting, by default Sunday. +;; 1 = first day of the week (e.g. Sunday) +;; 7 = last day of the week (e.g. Saturday) (def date-bucketing-units "Set of valid units for bucketing or comparing against a *date* Field." #{:default :day :day-of-week :day-of-month :day-of-year :week :week-of-year diff --git a/src/metabase/driver/common.clj b/src/metabase/driver/common.clj index 9ed7764b671..b3c2e35a0f1 100644 --- a/src/metabase/driver/common.clj +++ b/src/metabase/driver/common.clj @@ -435,16 +435,17 @@ (def ^:private ^clojure.lang.PersistentVector days-of-week [:monday :tuesday :wednesday :thursday :friday :saturday :sunday]) -(defn start-of-week->int - "Returns the int value for the current :start-of-week setting value, which ranges from 0 (:monday) to 6 (:sunday). - If the :start-of-week setting does not have a value, then `nil` is returned." +(s/defn start-of-week->int :- (s/pred (fn [n] (and (integer? n) (<= 0 n 6))) + "Start of week integer") + "Returns the int value for the current [[metabase.public-settings/start-of-week]] Setting value, which ranges from + `0` (`:monday`) to `6` (`:sunday`). This is guaranteed to return a value." {:added "0.42.0"} [] - (when-let [v (setting/get-value-of-type :keyword :start-of-week)] - (.indexOf days-of-week v))) + (.indexOf days-of-week (setting/get-value-of-type :keyword :start-of-week))) (s/defn start-of-week-offset :- s/Int - "Return the offset for start of week to have the week start on `setting/start-of-week` given `driver`." + "Return the offset for start of week to have the week start on [[metabase.public-settings/start-of-week]] given + `driver`." [driver] (let [db-start-of-week (.indexOf days-of-week (driver/db-start-of-week driver)) target-start-of-week (start-of-week->int) diff --git a/src/metabase/driver/sql_jdbc/connection.clj b/src/metabase/driver/sql_jdbc/connection.clj index f73d8ad05b3..3961eb92b79 100644 --- a/src/metabase/driver/sql_jdbc/connection.clj +++ b/src/metabase/driver/sql_jdbc/connection.clj @@ -119,22 +119,21 @@ (atom {})) (defonce ^:private ^{:doc "A map of DB details hash values, keyed by Database `:id`."} - database-id->db-details-hashes + database-id->jdbc-spec-hash (atom {})) -(defn- db-details-hash - "Computes a hash value for the given `database`'s `:details` map, for the purpose of determining if details changed - and therefore the existing connection pool needs to be invalidated." - [database] +(defn- jdbc-spec-hash + "Computes a hash value for the JDBC connection spec based on `database`'s `:details` map, for the purpose of + determining if details changed and therefore the existing connection pool needs to be invalidated." + [{driver :engine, :keys [details], :as database}] {:pre [(or nil? (instance? (type Database) database))]} - (if (some? database) - (hash (:details database)) - nil)) + (when (some? database) + (hash (connection-details->spec driver details)))) (defn- set-pool! "Atomically update the current connection pool for Database `database` with `database-id`. Use this function instead of modifying database-id->connection-pool` directly because it properly closes down old pools in a thread-safe way, - ensuring no more than one pool is ever open for a single database. Also modifies the database-id->db-details-hashes + ensuring no more than one pool is ever open for a single database. Also modifies the [[database-id->jdbc-spec-hash]] map with the hash value of the given DB's details map." [database-id pool-spec-or-nil database] {:pre [(integer? database-id)]} @@ -146,7 +145,7 @@ (when-not (identical? old-pool-spec pool-spec-or-nil) (destroy-pool! database-id old-pool-spec)))) ;; update the db details hash cache with the new hash value - (swap! database-id->db-details-hashes assoc database-id (db-details-hash database)) + (swap! database-id->jdbc-spec-hash assoc database-id (jdbc-spec-hash database)) nil) (defn invalidate-pool-for-db! @@ -155,7 +154,7 @@ (set-pool! (u/the-id database) nil nil)) (defn notify-database-updated - "Default implementation of `driver/notify-database-updated` for JDBC SQL drivers. We are being informed that a + "Default implementation of [[driver/notify-database-updated]] for JDBC SQL drivers. We are being informed that a `database` has been updated, so lets shut down the connection pool (if it exists) under the assumption that the connection details have changed." [database] @@ -166,7 +165,7 @@ db-id))) nil) -(defn- log-db-details-hash-change-msg! [db-id] +(defn- log-jdbc-spec-hash-change-msg! [db-id] (log/warn (u/format-color 'yellow (trs "Hash of database {0} details changed; marking pool invalid to reopen it" db-id))) nil) @@ -190,16 +189,16 @@ (when-let [details (get @database-id->connection-pool db-id)] (cond ;; details hash changed from what is cached; invalid - (let [curr-hash (get @database-id->db-details-hashes db-id) - new-hash (db-details-hash db)] + (let [curr-hash (get @database-id->jdbc-spec-hash db-id) + new-hash (jdbc-spec-hash db)] (when (and (some? curr-hash) (not= curr-hash new-hash)) ;; the hash didn't match, but it's possible that a stale instance of `DatabaseInstance` ;; was passed in (ex: from a long-running sync operation); fetch the latest one from ;; our app DB, and see if it STILL doesn't match (not= curr-hash (-> (db/select-one [Database :id :engine :details] :id database-id) - db-details-hash)))) + jdbc-spec-hash)))) (if log-invalidation? - (log-db-details-hash-change-msg! db-id) + (log-jdbc-spec-hash-change-msg! db-id) nil) (nil? (:tunnel-session details)) ; no tunnel in use; valid diff --git a/test/metabase/driver/sql_jdbc/connection_test.clj b/test/metabase/driver/sql_jdbc/connection_test.clj index 3a9ad780a47..f05ebd53a0b 100644 --- a/test/metabase/driver/sql_jdbc/connection_test.clj +++ b/test/metabase/driver/sql_jdbc/connection_test.clj @@ -19,7 +19,7 @@ (deftest can-connect-with-details?-test (is (= true (driver.u/can-connect-with-details? :h2 (:details (data/db))))) - (testing "Lie and say Test DB is Postgres. CAN-CONNECT? should fail" + (testing "Lie and say Test DB is Postgres. `can-connect?` should fail" (is (= false (driver.u/can-connect-with-details? :postgres (:details (data/db)))))) (testing "Random made-up DBs should fail" @@ -75,50 +75,64 @@ ;; ensure that, for any sql-jdbc drier anyway, we found *some* DB name to use in this String (is (not= db-nm "null")))))) +(deftest same-connection-details-result-in-equal-specs-test + (testing "Two JDBC specs created with the same details must be considered equal for the connection pool cache to work correctly" + ;; this is only really a concern for drivers like Spark SQL that create custom DataSources instead of plain details + ;; maps -- those DataSources need to be considered equal based on the connection string/properties + (mt/test-drivers (sql-jdbc.tu/sql-jdbc-drivers) + (let [details (:details (mt/db)) + spec-1 (sql-jdbc.conn/connection-details->spec driver/*driver* details) + spec-2 (sql-jdbc.conn/connection-details->spec driver/*driver* details)] + (is (= spec-1 spec-2)))))) + +(defn- perturb-db-details [db] + (update db + :details + (fn [details] + (case driver/*driver* + :redshift + (assoc details :additional-options "defaultRowFetchSize=1000") + + (cond-> details + ;; swap localhost and 127.0.0.1 + (= "localhost" (:host details)) + (assoc :host "127.0.0.1") + + (= "127.0.0.1" (:host details)) + (assoc :host "localhost") + + :else + (assoc :new-config "something")))))) + (deftest connection-pool-invalidated-on-details-change (mt/test-drivers (sql-jdbc.tu/sql-jdbc-drivers) - (testing "db->pooled-connection-spec marks a connection pool invalid if the db details map changes" + (testing "db->pooled-connection-spec marks a connection pool invalid if the db details map changes\n" (let [db (mt/db) hash-change-called-times (atom 0) hash-change-fn (fn [db-id] (is (= (u/the-id db) db-id)) (swap! hash-change-called-times inc) - nil) - perturb-db-details (fn [db] - (update db - :details - (fn [details] - (case driver/*driver* - :redshift - (assoc details :additional-options "defaultRowFetchSize=1000") - - (cond-> details - ;; swap localhost and 127.0.0.1 - (= "localhost" (:host details)) - (assoc :host "127.0.0.1") - - (= "127.0.0.1" (:host details)) - (assoc :host "localhost") - - :else - (assoc :new-config "something"))))))] + nil)] (try (sql-jdbc.conn/invalidate-pool-for-db! db) ;; a little bit hacky to redefine the log fn, but it's the most direct way to test - (with-redefs [sql-jdbc.conn/log-db-details-hash-change-msg! hash-change-fn] + (with-redefs [sql-jdbc.conn/log-jdbc-spec-hash-change-msg! hash-change-fn] (let [pool-spec-1 (sql-jdbc.conn/db->pooled-connection-spec db) - db-hash-1 (get @@#'sql-jdbc.conn/database-id->db-details-hashes (u/the-id db))] + db-hash-1 (get @@#'sql-jdbc.conn/database-id->jdbc-spec-hash (u/the-id db))] (testing "hash value calculated correctly for new pooled conn" (is (some? pool-spec-1)) (is (integer? db-hash-1)) (is (not= db-hash-1 0))) (testing "changing DB details results in hash value changing and connection being invalidated" (let [db-perturbed (perturb-db-details db)] + (testing "The calculated hash should be different" + (is (not= (#'sql-jdbc.conn/jdbc-spec-hash db) + (#'sql-jdbc.conn/jdbc-spec-hash db-perturbed)))) (db/update! Database (mt/id) :details (:details db-perturbed)) - (let [;; this call should result in the connection pool becoming invalidated, and the new hash value + (let [ ;; this call should result in the connection pool becoming invalidated, and the new hash value ;; being stored based upon these updated details pool-spec-2 (sql-jdbc.conn/db->pooled-connection-spec db-perturbed) - db-hash-2 (get @@#'sql-jdbc.conn/database-id->db-details-hashes (u/the-id db))] + db-hash-2 (get @@#'sql-jdbc.conn/database-id->jdbc-spec-hash (u/the-id db))] ;; to throw a wrench into things, kick off a sync of the original db (unperturbed); this ;; simulates a long running sync that began before the perturbed details were saved to the app DB ;; the sync steps SHOULD NOT invalidate the connection pool, because doing so could cause a seesaw @@ -133,7 +147,7 @@ ;; even after getting the latest `DatabaseInstance` (sync/sync-database! db {:scan :schema}) (is (some? pool-spec-2)) - (is (= 1 @hash-change-called-times)) + (is (= 1 @hash-change-called-times) "One hash change should have been logged") (is (integer? db-hash-2)) (is (not= db-hash-2 0)) (is (not= db-hash-1 db-hash-2))))))) diff --git a/test/metabase/test.clj b/test/metabase/test.clj index c4f87a72c39..62d5cee7c46 100644 --- a/test/metabase/test.clj +++ b/test/metabase/test.clj @@ -99,7 +99,6 @@ when-testing-driver] [driver - *driver* with-driver] [et -- GitLab