Skip to content
Snippets Groups Projects
Unverified Commit dbf40b72 authored by Case Nelson's avatar Case Nelson Committed by GitHub
Browse files

Add anchor time to persistence schedule (#22827)

* Add anchor time to persistence schedule

Introduces a new public-setting persisted-model-refresh-anchor-time,
representing the time to begin the refresh job. Defaults to midnight
"00:00".

Also peg the cron jobs to run in the first of: reporting timezone, system timezone, or
UTC. This means that if a user needs the refresh to be anchored consistently to a certain time in a certain place, they need to update reporting timezone to match.

* Add tests for anchor time

* Force anchor-time to midnight if refresh hours is less than 6

* Need to check that hours was passed in

* Fix ns lint

* Add tests to ensure timezone is being set on trigger
parent 2e633b45
No related merge requests found
......@@ -668,8 +668,10 @@
;; do secrets require special handling to not clobber them or mess up encryption?
(do (db/update! Database id :options
(assoc (:options database) :persist-models-enabled true))
(task.persist-refresh/schedule-persistence-for-database! database
(public-settings/persisted-model-refresh-interval-hours))
(task.persist-refresh/schedule-persistence-for-database!
database
(public-settings/persisted-model-refresh-interval-hours)
(public-settings/persisted-model-refresh-anchor-time))
api/generic-204-no-content)
(throw (ex-info (ddl.i/error->message error schema)
{:error error
......
(ns metabase.api.persist
(:require [clojure.tools.logging :as log]
(:require [clojure.string :as str]
[clojure.tools.logging :as log]
[compojure.core :refer [GET POST]]
[honeysql.helpers :as hh]
[metabase.api.common :as api]
......@@ -92,12 +93,29 @@
(deferred-tru "Integer greater than or equal to one and less than or equal to twenty-four"))
(deferred-tru "Value must be an integer representing hours greater than or equal to one and less than or equal to twenty-four")))
(def ^:private AnchorTime
"Schema representing valid anchor time for refreshing persisted models."
(su/with-api-error-message
(s/constrained s/Str (fn [t]
(let [[hours minutes] (map parse-long (str/split t #":"))]
(and (<= 0 hours 23) (<= 0 minutes 59))))
(deferred-tru "String representing a time in format HH:mm"))
(deferred-tru "Value must be a string representing a time in format HH:mm")))
(api/defendpoint POST "/set-interval"
"Set the interval (in hours) to refresh persisted models. Shape should be JSON like {hours: 4}."
[:as {{:keys [hours], :as _body} :body}]
{hours HoursInterval}
"Set the interval (in hours) to refresh persisted models.
Anchor can be provided to set the time to begin the interval (local to reporting-timezone or system).
Shape should be JSON like {hours: 4, anchor: 16:45}."
[:as {{:keys [hours anchor], :as _body} :body}]
{hours (s/maybe HoursInterval)
anchor (s/maybe AnchorTime)}
(validation/check-has-application-permission :setting)
(public-settings/persisted-model-refresh-interval-hours! hours)
(when hours
(public-settings/persisted-model-refresh-interval-hours! hours))
(if (and hours (< hours 6))
(public-settings/persisted-model-refresh-anchor-time! "00:00")
(when anchor
(public-settings/persisted-model-refresh-anchor-time! anchor)))
(task.persist-refresh/reschedule-refresh!)
api/generic-204-no-content)
......
......@@ -219,6 +219,12 @@
:default 6
:visibility :admin)
(defsetting persisted-model-refresh-anchor-time
(deferred-tru "Anchor time to begin refreshing persisted models.")
:type :string
:default "00:00"
:visibility :admin)
(def ^:private ^:const global-max-caching-kb
"Although depending on the database, we can support much larger cached values (1GB for PG, 2GB for H2 and 4GB for
MySQL) we are not curretly setup to deal with data of that size. The datatypes we are using will hold this data in
......
(ns metabase.task.persist-refresh
(:require [clojure.tools.logging :as log]
(:require [clojure.string :as str]
[clojure.tools.logging :as log]
[clojurewerkz.quartzite.conversion :as qc]
[clojurewerkz.quartzite.jobs :as jobs]
[clojurewerkz.quartzite.schedule.cron :as cron]
[clojurewerkz.quartzite.triggers :as triggers]
[java-time :as t]
[metabase.db :as mdb]
[metabase.driver :as driver]
[metabase.driver.ddl.interface :as ddl.i]
[metabase.driver.sql.query-processor :as sql.qp]
[metabase.models.card :refer [Card]]
......@@ -13,12 +15,14 @@
[metabase.models.persisted-info :as persisted-info :refer [PersistedInfo]]
[metabase.models.task-history :refer [TaskHistory]]
[metabase.public-settings :as public-settings]
[metabase.query-processor.timezone :as qp.timezone]
[metabase.task :as task]
[metabase.util :as u]
[metabase.util.i18n :refer [trs]]
[potemkin.types :as p]
[toucan.db :as db])
(:import [org.quartz ObjectAlreadyExistsException Trigger]))
(:import java.util.TimeZone
[org.quartz ObjectAlreadyExistsException Trigger]))
(defn- job-context->job-type
[job-context]
......@@ -236,14 +240,16 @@
(defn- cron-schedule
"Return a cron schedule that fires every `hours` hours."
[hours]
(cron/schedule
;; every 8 hours
(cron/cron-schedule (if (= hours 24)
;; hack: scheduling for midnight UTC but this does raise the issue of anchor time
"0 0 0 * * ? *"
(format "0 0 0/%d * * ? *" hours)))
(cron/with-misfire-handling-instruction-do-nothing)))
[hours anchor-time]
(let [[start-hour start-minute] (map parse-long (str/split anchor-time #":"))]
(cron/schedule
(cron/cron-schedule (if (= 24 hours)
(format "0 %d %d * * ? *" start-minute start-hour)
(format "0 %d %d/%d * * ? *" start-minute start-hour hours)))
(cron/in-time-zone (TimeZone/getTimeZone (or (driver/report-timezone)
(qp.timezone/system-timezone-id)
"UTC")))
(cron/with-misfire-handling-instruction-do-nothing))))
(def ^:private prune-scheduled-trigger
(triggers/build
......@@ -252,7 +258,7 @@
(triggers/for-job (jobs/key prune-job-key))
(triggers/start-now)
(triggers/with-schedule
(cron-schedule 1))))
(cron-schedule 1 "00:00"))))
(def ^:private prune-once-trigger
(triggers/build
......@@ -261,7 +267,7 @@
(triggers/for-job (jobs/key prune-job-key))
(triggers/start-now)))
(defn- database-trigger [database interval-hours]
(defn- database-trigger [database interval-hours anchor-time]
(triggers/build
(triggers/with-description (format "Refresh models for database %d" (u/the-id database)))
(triggers/with-identity (database-trigger-key database))
......@@ -270,7 +276,7 @@
(triggers/for-job (jobs/key refresh-job-key))
(triggers/start-now)
(triggers/with-schedule
(cron-schedule interval-hours))))
(cron-schedule interval-hours anchor-time))))
(defn- individual-trigger [persisted-info]
(triggers/build
......@@ -285,8 +291,8 @@
(defn schedule-persistence-for-database!
"Schedule a database for persistence refreshing."
[database interval-hours]
(let [tggr (database-trigger database interval-hours)]
[database interval-hours anchor-time]
(let [tggr (database-trigger database interval-hours anchor-time)]
(log/info
(u/format-color 'green
"Scheduling persistence refreshes for database %d: trigger: %s"
......@@ -344,10 +350,11 @@
`:persist-models-enabled` in the options at interval [[public-settings/persisted-model-refresh-interval-hours]]."
[]
(let [dbs-with-persistence (filter (comp :persist-models-enabled :options) (Database))
interval-hours (public-settings/persisted-model-refresh-interval-hours)]
interval-hours (public-settings/persisted-model-refresh-interval-hours)
anchor-time (public-settings/persisted-model-refresh-anchor-time)]
(unschedule-all-refresh-triggers! refresh-job-key)
(doseq [db dbs-with-persistence]
(schedule-persistence-for-database! db interval-hours))))
(schedule-persistence-for-database! db interval-hours anchor-time))))
(defn enable-persisting!
"Enable persisting
......
......@@ -3,6 +3,7 @@
[clojurewerkz.quartzite.conversion :as qc]
[medley.core :as m]
[metabase.models :refer [Card Database PersistedInfo TaskHistory]]
[metabase.query-processor.timezone :as qp.timezone]
[metabase.task.persist-refresh :as pr]
[metabase.test :as mt]
[metabase.util :as u]
......@@ -22,25 +23,46 @@
(deftest cron-schedule-test
(testing "creates schedule per hour when less than 24 hours"
(is (= "0 0 0/8 * * ? *"
(schedule-string (#'pr/cron-schedule 8)))))
(schedule-string (#'pr/cron-schedule 8 "00:00"))))
(testing "when anchored"
(is (= "0 30 1/8 * * ? *"
(schedule-string (#'pr/cron-schedule 8 "01:30"))))))
(testing "creates schedule string per day when 24 hours"
(is (= "0 0 0 * * ? *"
(schedule-string (#'pr/cron-schedule 24))))))
(schedule-string (#'pr/cron-schedule 24 "00:00"))))
(testing "when anchored"
(is (= "0 30 1 * * ? *"
(schedule-string (#'pr/cron-schedule 24 "01:30")))))))
(deftest trigger-job-info-test
(testing "Database refresh trigger"
(let [tggr (#'pr/database-trigger {:id 1} 5)]
(let [tggr (#'pr/database-trigger {:id 1} 5 "00:00")]
(is (= {"db-id" 1 "type" "database"}
(qc/from-job-data (.getJobDataMap tggr))))
(is (= "0 0 0/5 * * ? *"
(schedule-string tggr)))
(is (= "metabase.task.PersistenceRefresh.database.trigger.1"
(.. tggr getKey getName))))
(let [tggr (#'pr/database-trigger {:id 1} 24)]
(let [tggr (#'pr/database-trigger {:id 1} 24 "00:00")]
(is (= {"db-id" 1 "type" "database"}
(qc/from-job-data (.getJobDataMap tggr))))
(is (= "0 0 0 * * ? *"
(schedule-string tggr)))))
(schedule-string tggr))))
(testing "in report timezone UTC"
(mt/with-temporary-setting-values [report-timezone "UTC"]
(let [tggr (#'pr/database-trigger {:id 1} 5 "00:00")]
(is (= "UTC"
(.. tggr getTimeZone getID))))))
(testing "in report timezone LA"
(mt/with-temporary-setting-values [report-timezone "America/Los_Angeles"]
(let [tggr (#'pr/database-trigger {:id 1} 5 "00:00")]
(is (= "America/Los_Angeles"
(.. tggr getTimeZone getID))))))
(testing "in system timezone"
(mt/with-temporary-setting-values [report-timezone nil]
(let [tggr (#'pr/database-trigger {:id 1} 5 "00:00")]
(is (= (qp.timezone/system-timezone-id)
(.. tggr getTimeZone getID)))))))
(testing "Individual refresh trigger"
(let [tggr (#'pr/individual-trigger {:card_id 5 :id 1})]
(is (= {"persisted-id" 1 "type" "individual"}
......@@ -60,7 +82,8 @@
(mt/with-temp* [Database [db-1 {:options {:persist-models-enabled true}}]
Database [db-2 {:options {:persist-models-enabled true}}]]
(#'pr/job-init!)
(mt/with-temporary-setting-values [persisted-model-refresh-interval-hours 4]
(mt/with-temporary-setting-values [persisted-model-refresh-interval-hours 4
persisted-model-refresh-anchor-time "00:00"]
(pr/reschedule-refresh!)
(is (= {(u/the-id db-1) {:data {"db-id" (u/the-id db-1) "type" "database"}
:schedule "0 0 0/4 * * ? *"
......@@ -69,7 +92,8 @@
:schedule "0 0 0/4 * * ? *"
:key (format "metabase.task.PersistenceRefresh.database.trigger.%d" (u/the-id db-2))}}
(job-info db-1 db-2))))
(mt/with-temporary-setting-values [persisted-model-refresh-interval-hours 8]
(mt/with-temporary-setting-values [persisted-model-refresh-interval-hours 8
persisted-model-refresh-anchor-time "00:00"]
(pr/reschedule-refresh!)
(is (= {(u/the-id db-1) {:data {"db-id" (u/the-id db-1) "type" "database"}
:schedule "0 0 0/8 * * ? *"
......@@ -77,6 +101,16 @@
(u/the-id db-2) {:data {"db-id" (u/the-id db-2) "type" "database"}
:schedule "0 0 0/8 * * ? *"
:key (format "metabase.task.PersistenceRefresh.database.trigger.%d" (u/the-id db-2))}}
(job-info db-1 db-2))))
(mt/with-temporary-setting-values [persisted-model-refresh-interval-hours 8
persisted-model-refresh-anchor-time "01:30"]
(pr/reschedule-refresh!)
(is (= {(u/the-id db-1) {:data {"db-id" (u/the-id db-1) "type" "database"}
:schedule "0 30 1/8 * * ? *"
:key (format "metabase.task.PersistenceRefresh.database.trigger.%d" (u/the-id db-1))}
(u/the-id db-2) {:data {"db-id" (u/the-id db-2) "type" "database"}
:schedule "0 30 1/8 * * ? *"
:key (format "metabase.task.PersistenceRefresh.database.trigger.%d" (u/the-id db-2))}}
(job-info db-1 db-2)))))))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment