From dbf40b7207b71fae1dc08911a607a8ef4e4a5996 Mon Sep 17 00:00:00 2001 From: Case Nelson <case@metabase.com> Date: Tue, 24 May 2022 15:32:14 -0600 Subject: [PATCH] Add anchor time to persistence schedule (#22827) * Add anchor time to persistence schedule Introduces a new public-setting persisted-model-refresh-anchor-time, representing the time to begin the refresh job. Defaults to midnight "00:00". Also peg the cron jobs to run in the first of: reporting timezone, system timezone, or UTC. This means that if a user needs the refresh to be anchored consistently to a certain time in a certain place, they need to update reporting timezone to match. * Add tests for anchor time * Force anchor-time to midnight if refresh hours is less than 6 * Need to check that hours was passed in * Fix ns lint * Add tests to ensure timezone is being set on trigger --- src/metabase/api/database.clj | 6 ++- src/metabase/api/persist.clj | 28 +++++++++--- src/metabase/public_settings.clj | 6 +++ src/metabase/task/persist_refresh.clj | 41 ++++++++++-------- test/metabase/task/persist_refresh_test.clj | 48 ++++++++++++++++++--- 5 files changed, 98 insertions(+), 31 deletions(-) diff --git a/src/metabase/api/database.clj b/src/metabase/api/database.clj index f3535c9d7c9..567a84ddea4 100644 --- a/src/metabase/api/database.clj +++ b/src/metabase/api/database.clj @@ -668,8 +668,10 @@ ;; do secrets require special handling to not clobber them or mess up encryption? (do (db/update! Database id :options (assoc (:options database) :persist-models-enabled true)) - (task.persist-refresh/schedule-persistence-for-database! database - (public-settings/persisted-model-refresh-interval-hours)) + (task.persist-refresh/schedule-persistence-for-database! + database + (public-settings/persisted-model-refresh-interval-hours) + (public-settings/persisted-model-refresh-anchor-time)) api/generic-204-no-content) (throw (ex-info (ddl.i/error->message error schema) {:error error diff --git a/src/metabase/api/persist.clj b/src/metabase/api/persist.clj index 90ad44e1dad..87bb927ee37 100644 --- a/src/metabase/api/persist.clj +++ b/src/metabase/api/persist.clj @@ -1,5 +1,6 @@ (ns metabase.api.persist - (:require [clojure.tools.logging :as log] + (:require [clojure.string :as str] + [clojure.tools.logging :as log] [compojure.core :refer [GET POST]] [honeysql.helpers :as hh] [metabase.api.common :as api] @@ -92,12 +93,29 @@ (deferred-tru "Integer greater than or equal to one and less than or equal to twenty-four")) (deferred-tru "Value must be an integer representing hours greater than or equal to one and less than or equal to twenty-four"))) +(def ^:private AnchorTime + "Schema representing valid anchor time for refreshing persisted models." + (su/with-api-error-message + (s/constrained s/Str (fn [t] + (let [[hours minutes] (map parse-long (str/split t #":"))] + (and (<= 0 hours 23) (<= 0 minutes 59)))) + (deferred-tru "String representing a time in format HH:mm")) + (deferred-tru "Value must be a string representing a time in format HH:mm"))) + (api/defendpoint POST "/set-interval" - "Set the interval (in hours) to refresh persisted models. Shape should be JSON like {hours: 4}." - [:as {{:keys [hours], :as _body} :body}] - {hours HoursInterval} + "Set the interval (in hours) to refresh persisted models. + Anchor can be provided to set the time to begin the interval (local to reporting-timezone or system). + Shape should be JSON like {hours: 4, anchor: 16:45}." + [:as {{:keys [hours anchor], :as _body} :body}] + {hours (s/maybe HoursInterval) + anchor (s/maybe AnchorTime)} (validation/check-has-application-permission :setting) - (public-settings/persisted-model-refresh-interval-hours! hours) + (when hours + (public-settings/persisted-model-refresh-interval-hours! hours)) + (if (and hours (< hours 6)) + (public-settings/persisted-model-refresh-anchor-time! "00:00") + (when anchor + (public-settings/persisted-model-refresh-anchor-time! anchor))) (task.persist-refresh/reschedule-refresh!) api/generic-204-no-content) diff --git a/src/metabase/public_settings.clj b/src/metabase/public_settings.clj index 009fe679a82..bea4f9ba1c1 100644 --- a/src/metabase/public_settings.clj +++ b/src/metabase/public_settings.clj @@ -219,6 +219,12 @@ :default 6 :visibility :admin) +(defsetting persisted-model-refresh-anchor-time + (deferred-tru "Anchor time to begin refreshing persisted models.") + :type :string + :default "00:00" + :visibility :admin) + (def ^:private ^:const global-max-caching-kb "Although depending on the database, we can support much larger cached values (1GB for PG, 2GB for H2 and 4GB for MySQL) we are not curretly setup to deal with data of that size. The datatypes we are using will hold this data in diff --git a/src/metabase/task/persist_refresh.clj b/src/metabase/task/persist_refresh.clj index 09c52a4e9a3..99d8d74b580 100644 --- a/src/metabase/task/persist_refresh.clj +++ b/src/metabase/task/persist_refresh.clj @@ -1,11 +1,13 @@ (ns metabase.task.persist-refresh - (:require [clojure.tools.logging :as log] + (:require [clojure.string :as str] + [clojure.tools.logging :as log] [clojurewerkz.quartzite.conversion :as qc] [clojurewerkz.quartzite.jobs :as jobs] [clojurewerkz.quartzite.schedule.cron :as cron] [clojurewerkz.quartzite.triggers :as triggers] [java-time :as t] [metabase.db :as mdb] + [metabase.driver :as driver] [metabase.driver.ddl.interface :as ddl.i] [metabase.driver.sql.query-processor :as sql.qp] [metabase.models.card :refer [Card]] @@ -13,12 +15,14 @@ [metabase.models.persisted-info :as persisted-info :refer [PersistedInfo]] [metabase.models.task-history :refer [TaskHistory]] [metabase.public-settings :as public-settings] + [metabase.query-processor.timezone :as qp.timezone] [metabase.task :as task] [metabase.util :as u] [metabase.util.i18n :refer [trs]] [potemkin.types :as p] [toucan.db :as db]) - (:import [org.quartz ObjectAlreadyExistsException Trigger])) + (:import java.util.TimeZone + [org.quartz ObjectAlreadyExistsException Trigger])) (defn- job-context->job-type [job-context] @@ -236,14 +240,16 @@ (defn- cron-schedule "Return a cron schedule that fires every `hours` hours." - [hours] - (cron/schedule - ;; every 8 hours - (cron/cron-schedule (if (= hours 24) - ;; hack: scheduling for midnight UTC but this does raise the issue of anchor time - "0 0 0 * * ? *" - (format "0 0 0/%d * * ? *" hours))) - (cron/with-misfire-handling-instruction-do-nothing))) + [hours anchor-time] + (let [[start-hour start-minute] (map parse-long (str/split anchor-time #":"))] + (cron/schedule + (cron/cron-schedule (if (= 24 hours) + (format "0 %d %d * * ? *" start-minute start-hour) + (format "0 %d %d/%d * * ? *" start-minute start-hour hours))) + (cron/in-time-zone (TimeZone/getTimeZone (or (driver/report-timezone) + (qp.timezone/system-timezone-id) + "UTC"))) + (cron/with-misfire-handling-instruction-do-nothing)))) (def ^:private prune-scheduled-trigger (triggers/build @@ -252,7 +258,7 @@ (triggers/for-job (jobs/key prune-job-key)) (triggers/start-now) (triggers/with-schedule - (cron-schedule 1)))) + (cron-schedule 1 "00:00")))) (def ^:private prune-once-trigger (triggers/build @@ -261,7 +267,7 @@ (triggers/for-job (jobs/key prune-job-key)) (triggers/start-now))) -(defn- database-trigger [database interval-hours] +(defn- database-trigger [database interval-hours anchor-time] (triggers/build (triggers/with-description (format "Refresh models for database %d" (u/the-id database))) (triggers/with-identity (database-trigger-key database)) @@ -270,7 +276,7 @@ (triggers/for-job (jobs/key refresh-job-key)) (triggers/start-now) (triggers/with-schedule - (cron-schedule interval-hours)))) + (cron-schedule interval-hours anchor-time)))) (defn- individual-trigger [persisted-info] (triggers/build @@ -285,8 +291,8 @@ (defn schedule-persistence-for-database! "Schedule a database for persistence refreshing." - [database interval-hours] - (let [tggr (database-trigger database interval-hours)] + [database interval-hours anchor-time] + (let [tggr (database-trigger database interval-hours anchor-time)] (log/info (u/format-color 'green "Scheduling persistence refreshes for database %d: trigger: %s" @@ -344,10 +350,11 @@ `:persist-models-enabled` in the options at interval [[public-settings/persisted-model-refresh-interval-hours]]." [] (let [dbs-with-persistence (filter (comp :persist-models-enabled :options) (Database)) - interval-hours (public-settings/persisted-model-refresh-interval-hours)] + interval-hours (public-settings/persisted-model-refresh-interval-hours) + anchor-time (public-settings/persisted-model-refresh-anchor-time)] (unschedule-all-refresh-triggers! refresh-job-key) (doseq [db dbs-with-persistence] - (schedule-persistence-for-database! db interval-hours)))) + (schedule-persistence-for-database! db interval-hours anchor-time)))) (defn enable-persisting! "Enable persisting diff --git a/test/metabase/task/persist_refresh_test.clj b/test/metabase/task/persist_refresh_test.clj index c17ac3c065b..26b8005a3f0 100644 --- a/test/metabase/task/persist_refresh_test.clj +++ b/test/metabase/task/persist_refresh_test.clj @@ -3,6 +3,7 @@ [clojurewerkz.quartzite.conversion :as qc] [medley.core :as m] [metabase.models :refer [Card Database PersistedInfo TaskHistory]] + [metabase.query-processor.timezone :as qp.timezone] [metabase.task.persist-refresh :as pr] [metabase.test :as mt] [metabase.util :as u] @@ -22,25 +23,46 @@ (deftest cron-schedule-test (testing "creates schedule per hour when less than 24 hours" (is (= "0 0 0/8 * * ? *" - (schedule-string (#'pr/cron-schedule 8))))) + (schedule-string (#'pr/cron-schedule 8 "00:00")))) + (testing "when anchored" + (is (= "0 30 1/8 * * ? *" + (schedule-string (#'pr/cron-schedule 8 "01:30")))))) (testing "creates schedule string per day when 24 hours" (is (= "0 0 0 * * ? *" - (schedule-string (#'pr/cron-schedule 24)))))) + (schedule-string (#'pr/cron-schedule 24 "00:00")))) + (testing "when anchored" + (is (= "0 30 1 * * ? *" + (schedule-string (#'pr/cron-schedule 24 "01:30"))))))) (deftest trigger-job-info-test (testing "Database refresh trigger" - (let [tggr (#'pr/database-trigger {:id 1} 5)] + (let [tggr (#'pr/database-trigger {:id 1} 5 "00:00")] (is (= {"db-id" 1 "type" "database"} (qc/from-job-data (.getJobDataMap tggr)))) (is (= "0 0 0/5 * * ? *" (schedule-string tggr))) (is (= "metabase.task.PersistenceRefresh.database.trigger.1" (.. tggr getKey getName)))) - (let [tggr (#'pr/database-trigger {:id 1} 24)] + (let [tggr (#'pr/database-trigger {:id 1} 24 "00:00")] (is (= {"db-id" 1 "type" "database"} (qc/from-job-data (.getJobDataMap tggr)))) (is (= "0 0 0 * * ? *" - (schedule-string tggr))))) + (schedule-string tggr)))) + (testing "in report timezone UTC" + (mt/with-temporary-setting-values [report-timezone "UTC"] + (let [tggr (#'pr/database-trigger {:id 1} 5 "00:00")] + (is (= "UTC" + (.. tggr getTimeZone getID)))))) + (testing "in report timezone LA" + (mt/with-temporary-setting-values [report-timezone "America/Los_Angeles"] + (let [tggr (#'pr/database-trigger {:id 1} 5 "00:00")] + (is (= "America/Los_Angeles" + (.. tggr getTimeZone getID)))))) + (testing "in system timezone" + (mt/with-temporary-setting-values [report-timezone nil] + (let [tggr (#'pr/database-trigger {:id 1} 5 "00:00")] + (is (= (qp.timezone/system-timezone-id) + (.. tggr getTimeZone getID))))))) (testing "Individual refresh trigger" (let [tggr (#'pr/individual-trigger {:card_id 5 :id 1})] (is (= {"persisted-id" 1 "type" "individual"} @@ -60,7 +82,8 @@ (mt/with-temp* [Database [db-1 {:options {:persist-models-enabled true}}] Database [db-2 {:options {:persist-models-enabled true}}]] (#'pr/job-init!) - (mt/with-temporary-setting-values [persisted-model-refresh-interval-hours 4] + (mt/with-temporary-setting-values [persisted-model-refresh-interval-hours 4 + persisted-model-refresh-anchor-time "00:00"] (pr/reschedule-refresh!) (is (= {(u/the-id db-1) {:data {"db-id" (u/the-id db-1) "type" "database"} :schedule "0 0 0/4 * * ? *" @@ -69,7 +92,8 @@ :schedule "0 0 0/4 * * ? *" :key (format "metabase.task.PersistenceRefresh.database.trigger.%d" (u/the-id db-2))}} (job-info db-1 db-2)))) - (mt/with-temporary-setting-values [persisted-model-refresh-interval-hours 8] + (mt/with-temporary-setting-values [persisted-model-refresh-interval-hours 8 + persisted-model-refresh-anchor-time "00:00"] (pr/reschedule-refresh!) (is (= {(u/the-id db-1) {:data {"db-id" (u/the-id db-1) "type" "database"} :schedule "0 0 0/8 * * ? *" @@ -77,6 +101,16 @@ (u/the-id db-2) {:data {"db-id" (u/the-id db-2) "type" "database"} :schedule "0 0 0/8 * * ? *" :key (format "metabase.task.PersistenceRefresh.database.trigger.%d" (u/the-id db-2))}} + (job-info db-1 db-2)))) + (mt/with-temporary-setting-values [persisted-model-refresh-interval-hours 8 + persisted-model-refresh-anchor-time "01:30"] + (pr/reschedule-refresh!) + (is (= {(u/the-id db-1) {:data {"db-id" (u/the-id db-1) "type" "database"} + :schedule "0 30 1/8 * * ? *" + :key (format "metabase.task.PersistenceRefresh.database.trigger.%d" (u/the-id db-1))} + (u/the-id db-2) {:data {"db-id" (u/the-id db-2) "type" "database"} + :schedule "0 30 1/8 * * ? *" + :key (format "metabase.task.PersistenceRefresh.database.trigger.%d" (u/the-id db-2))}} (job-info db-1 db-2))))))) -- GitLab