Skip to content
Snippets Groups Projects
Unverified Commit fc54bd83 authored by Anton Kulyk's avatar Anton Kulyk Committed by GitHub
Browse files

Update model caching schedule API (#23734)


* Replace interval-hours and anchor-time with cron

Removed the two settings and replaced with a single cron schedule setting

Renamed the /set-interval endpoint to /set-refresh-schedule

* Ignore "year" part in schedule endpoint

* Fix variable

* Use a temp scheduler and initialize the refresh job

Running into errors when updating the triggers for each database's
refresh job because the "job" itself didn't exist. Reminder of what's
going on here: There's a single refresh job. It has a trigger for each
database. So updating the trigger would fail since it doesn't exist
since there was no job to hold the triggers.

This error is quite clear in the tests run locally:

```
ERROR in metabase.api.persist-test/set-refresh-schedule-test (util.clj:421)
Uncaught exception, not in assertion.

        clojure.lang.ExceptionInfo: Error in with-temporary-setting-values: Couldn't store trigger 'DEFAULT.metabase.task.PersistenceRefresh.database.trigger.1' for 'DEFAULT.metabase.task.PersistenceRefresh.job' job:The job (DEFAULT.metabase.task.PersistenceRefresh.job) referenced by the trigger does not exist.
    location: metabase.public-settings/persisted-models-enabled
     setting: "persisted-models-enabled"
       value: true
```

But this logging is absent from the logging in Github annoyingly:

```
FAIL in metabase.api.persist-test/set-refresh-schedule-test (persist_test.clj:26)
Setting new cron schedule reschedules refresh tasks
Setting :persisted-models-enabled = true
expected: "0 0 0/12 * * ? *"
  actual: (nil)
```
Whic doesn't give us the error, just the test result.

* update to newer API `set-interval` -> `set-refresh-schedule`

```
;; old
{:hours 4}

;; new
{:cron "0 0 0/1 * * ? *"}
```

Co-authored-by: default avatarCase Nelson <case@metabase.com>
Co-authored-by: default avatardan sutton <dan@dpsutton.com>
parent 72a16788
No related branches found
No related tags found
No related merge requests found
......@@ -334,8 +334,11 @@
(testing (format "persist/disable with %s user" (mt/user-descriptor user))
(mt/user-http-request user :post status "persist/disable")))
(set-interval [user status]
(testing (format "persist/set-interval with %s user" (mt/user-descriptor user))
(mt/user-http-request user :post status "persist/set-interval" {"hours" 1})))]
(testing (format "persist/set-refresh-schedule with %s user"
(mt/user-descriptor user))
(mt/user-http-request user :post status
"persist/set-refresh-schedule"
{"cron" "0 0 0/1 * * ? *"})))]
(testing "if `advanced-permissions` is disabled, require admins,"
(enable-persist :crowberto 204)
......
......@@ -672,8 +672,7 @@
(assoc (:options database) :persist-models-enabled true))
(task.persist-refresh/schedule-persistence-for-database!
database
(public-settings/persisted-model-refresh-interval-hours)
(public-settings/persisted-model-refresh-anchor-time))
(public-settings/persisted-model-refresh-cron-schedule))
api/generic-204-no-content)
(throw (ex-info (ddl.i/error->message error schema)
{:error error
......
......@@ -86,36 +86,28 @@
(api/write-check (Database (:database_id persisted-info)))
persisted-info))
(def ^:private HoursInterval
"Schema representing valid interval hours for refreshing persisted models."
(su/with-api-error-message
(s/constrained s/Int #(<= 1 % 24)
(deferred-tru "Integer greater than or equal to one and less than or equal to twenty-four"))
(deferred-tru "Value must be an integer representing hours greater than or equal to one and less than or equal to twenty-four")))
(def ^:private AnchorTime
"Schema representing valid anchor time for refreshing persisted models."
(def ^:private CronSchedule
"Schema representing valid cron schedule for refreshing persisted models."
(su/with-api-error-message
(s/constrained s/Str (fn [t]
(let [[hours minutes] (map parse-long (str/split t #":"))]
(and (<= 0 hours 23) (<= 0 minutes 59))))
(deferred-tru "String representing a time in format HH:mm"))
(deferred-tru "Value must be a string representing a time in format HH:mm")))
(let [parts (str/split t #" ")]
(= 7 (count parts))))
(deferred-tru "String representing a cron schedule"))
(deferred-tru "Value must be a string representing a cron schedule of format <seconds> <minutes> <hours> <day of month> <month> <day of week> <year>")))
(api/defendpoint POST "/set-interval"
"Set the interval (in hours) to refresh persisted models.
Anchor can be provided to set the time to begin the interval (local to reporting-timezone or system).
Shape should be JSON like {hours: 4, anchor: 16:45}."
[:as {{:keys [hours anchor], :as _body} :body}]
{hours (s/maybe HoursInterval)
anchor (s/maybe AnchorTime)}
(api/defendpoint POST "/set-refresh-schedule"
"Set the cron schedule to refresh persisted models.
Shape should be JSON like {cron: \"0 30 1/8 * * ? *\"}."
[:as {{:keys [cron], :as _body} :body}]
{cron CronSchedule}
(validation/check-has-application-permission :setting)
(when hours
(public-settings/persisted-model-refresh-interval-hours! hours))
(if (and hours (< hours 6))
(public-settings/persisted-model-refresh-anchor-time! "00:00")
(when anchor
(public-settings/persisted-model-refresh-anchor-time! anchor)))
(when cron
(when-not (and (string? cron)
(org.quartz.CronExpression/isValidExpression cron)
(str/ends-with? cron "*"))
(throw (ex-info (tru "Must be a valid cron string not specifying a year")
{:status-code 400})))
(public-settings/persisted-model-refresh-cron-schedule! cron))
(task.persist-refresh/reschedule-refresh!)
api/generic-204-no-content)
......
......@@ -213,16 +213,10 @@
:default false
:visibility :authenticated)
(defsetting persisted-model-refresh-interval-hours
(deferred-tru "Hour interval to refresh persisted models.")
:type :integer
:default 6
:visibility :admin)
(defsetting persisted-model-refresh-anchor-time
(deferred-tru "Anchor time to begin refreshing persisted models.")
(defsetting persisted-model-refresh-cron-schedule
(deferred-tru "cron syntax string to schedule refreshing persisted models.")
:type :string
:default "00:00"
:default "0 0 0/6 * * ? *"
:visibility :admin)
(def ^:private ^:const global-max-caching-kb
......
......@@ -253,16 +253,23 @@
(defn- cron-schedule
"Return a cron schedule that fires every `hours` hours."
[hours anchor-time]
(let [[start-hour start-minute] (map parse-long (str/split anchor-time #":"))]
(cron/schedule
(cron/cron-schedule (if (= 24 hours)
[cron-spec]
(cron/schedule
(cron/cron-schedule cron-spec)
(cron/in-time-zone (TimeZone/getTimeZone (or (driver/report-timezone)
(qp.timezone/system-timezone-id)
"UTC")))
(cron/with-misfire-handling-instruction-do-nothing)))
(comment
(let [[start-hour start-minute] (map parse-long (str/split "00:00" #":"))
hours 1]
(if (= 24 hours)
(format "0 %d %d * * ? *" start-minute start-hour)
(format "0 %d %d/%d * * ? *" start-minute start-hour hours)))
(cron/in-time-zone (TimeZone/getTimeZone (or (driver/report-timezone)
(qp.timezone/system-timezone-id)
"UTC")))
(cron/with-misfire-handling-instruction-do-nothing))))
)
(def ^:private prune-scheduled-trigger
(triggers/build
......@@ -271,7 +278,7 @@
(triggers/for-job (jobs/key prune-job-key))
(triggers/start-now)
(triggers/with-schedule
(cron-schedule 1 "00:00"))))
(cron-schedule "0 0 0/1 * * ? *"))))
(def ^:private prune-once-trigger
(triggers/build
......@@ -280,7 +287,7 @@
(triggers/for-job (jobs/key prune-job-key))
(triggers/start-now)))
(defn- database-trigger [database interval-hours anchor-time]
(defn- database-trigger [database cron-spec]
(triggers/build
(triggers/with-description (format "Refresh models for database %d" (u/the-id database)))
(triggers/with-identity (database-trigger-key database))
......@@ -289,7 +296,7 @@
(triggers/for-job (jobs/key refresh-job-key))
(triggers/start-now)
(triggers/with-schedule
(cron-schedule interval-hours anchor-time))))
(cron-schedule cron-spec))))
(defn- individual-trigger [persisted-info]
(triggers/build
......@@ -304,8 +311,8 @@
(defn schedule-persistence-for-database!
"Schedule a database for persistence refreshing."
[database interval-hours anchor-time]
(let [tggr (database-trigger database interval-hours anchor-time)]
[database cron-spec]
(let [tggr (database-trigger database cron-spec)]
(log/info
(u/format-color 'green
"Scheduling persistence refreshes for database %d: trigger: %s"
......@@ -360,14 +367,13 @@
(defn reschedule-refresh!
"Reschedule refresh for all enabled databases. Removes all existing triggers, and schedules refresh for databases with
`:persist-models-enabled` in the options at interval [[public-settings/persisted-model-refresh-interval-hours]]."
`:persist-models-enabled` in the options at interval [[public-settings/persisted-model-refresh-cron-schedule]]."
[]
(let [dbs-with-persistence (filter (comp :persist-models-enabled :options) (Database))
interval-hours (public-settings/persisted-model-refresh-interval-hours)
anchor-time (public-settings/persisted-model-refresh-anchor-time)]
cron-schedule (public-settings/persisted-model-refresh-cron-schedule)]
(unschedule-all-refresh-triggers! refresh-job-key)
(doseq [db dbs-with-persistence]
(schedule-persistence-for-database! db interval-hours anchor-time))))
(schedule-persistence-for-database! db cron-schedule))))
(defn enable-persisting!
"Enable persisting
......
(ns metabase.api.persist-test
(:require [clojure.test :refer :all]
[metabase.models.database :refer [Database]]
[metabase.task.persist-refresh :as task.persist-refresh]
[metabase.test :as mt]
[metabase.test.fixtures :as fixtures]))
(use-fixtures :once (fixtures/initialize :db :test-users))
(def ^:private default-cron "0 0 0/12 * * ? *")
(defn- do-with-setup [f]
(mt/with-temp-scheduler
(#'task.persist-refresh/job-init!)
(mt/with-temporary-setting-values [:persisted-models-enabled true]
(mt/with-temp* [Database [db {:options {:persist-models-enabled true}}]]
(task.persist-refresh/schedule-persistence-for-database! db default-cron)
(f db)))))
(defmacro ^:private with-setup
"Sets up a temp scheduler, a temp database and enabled persistence"
[db-binding & body]
`(do-with-setup (fn [~db-binding] ~@body)))
(deftest set-refresh-schedule-test
(testing "Setting new cron schedule reschedules refresh tasks"
(with-setup db
(is (= default-cron (get-in (task.persist-refresh/job-info-by-db-id)
[(:id db) :schedule])))
(let [new-schedule "0 0 0/12 * * ? *"]
(mt/user-http-request :crowberto :post 204 "persist/set-refresh-schedule"
{:cron new-schedule})
(is (= new-schedule
(get-in (task.persist-refresh/job-info-by-db-id)
[(:id db) :schedule]))))))
(testing "Prevents setting a year value"
(with-setup db
(let [bad-schedule "0 0 0/12 * * ? 1995"]
(is (= "Must be a valid cron string not specifying a year"
(mt/user-http-request :crowberto :post 400 "persist/set-refresh-schedule"
{:cron bad-schedule})))
(is (= default-cron
(get-in (task.persist-refresh/job-info-by-db-id)
[(:id db) :schedule])))))))
......@@ -23,44 +23,44 @@
(deftest cron-schedule-test
(testing "creates schedule per hour when less than 24 hours"
(is (= "0 0 0/8 * * ? *"
(schedule-string (#'pr/cron-schedule 8 "00:00"))))
(schedule-string (#'pr/cron-schedule "0 0 0/8 * * ? *"))))
(testing "when anchored"
(is (= "0 30 1/8 * * ? *"
(schedule-string (#'pr/cron-schedule 8 "01:30"))))))
(schedule-string (#'pr/cron-schedule "0 30 1/8 * * ? *"))))))
(testing "creates schedule string per day when 24 hours"
(is (= "0 0 0 * * ? *"
(schedule-string (#'pr/cron-schedule 24 "00:00"))))
(schedule-string (#'pr/cron-schedule "0 0 0 * * ? *"))))
(testing "when anchored"
(is (= "0 30 1 * * ? *"
(schedule-string (#'pr/cron-schedule 24 "01:30")))))))
(schedule-string (#'pr/cron-schedule "0 30 1 * * ? *")))))))
(deftest trigger-job-info-test
(testing "Database refresh trigger"
(let [tggr (#'pr/database-trigger {:id 1} 5 "00:00")]
(let [tggr (#'pr/database-trigger {:id 1} "0 0 0/5 * * ? *")]
(is (= {"db-id" 1 "type" "database"}
(qc/from-job-data (.getJobDataMap tggr))))
(is (= "0 0 0/5 * * ? *"
(schedule-string tggr)))
(is (= "metabase.task.PersistenceRefresh.database.trigger.1"
(.. tggr getKey getName))))
(let [tggr (#'pr/database-trigger {:id 1} 24 "00:00")]
(let [tggr (#'pr/database-trigger {:id 1} "0 0 0 * * ? *")]
(is (= {"db-id" 1 "type" "database"}
(qc/from-job-data (.getJobDataMap tggr))))
(is (= "0 0 0 * * ? *"
(schedule-string tggr))))
(testing "in report timezone UTC"
(mt/with-temporary-setting-values [report-timezone "UTC"]
(let [tggr (#'pr/database-trigger {:id 1} 5 "00:00")]
(let [tggr (#'pr/database-trigger {:id 1} "0 0 0/5 * * ? *")]
(is (= "UTC"
(.. tggr getTimeZone getID))))))
(testing "in report timezone LA"
(mt/with-temporary-setting-values [report-timezone "America/Los_Angeles"]
(let [tggr (#'pr/database-trigger {:id 1} 5 "00:00")]
(let [tggr (#'pr/database-trigger {:id 1} "0 0 0/5 * * ? *")]
(is (= "America/Los_Angeles"
(.. tggr getTimeZone getID))))))
(testing "in system timezone"
(mt/with-temporary-setting-values [report-timezone nil]
(let [tggr (#'pr/database-trigger {:id 1} 5 "00:00")]
(let [tggr (#'pr/database-trigger {:id 1} "0 0 0/5 * * ? *")]
(is (= (qp.timezone/system-timezone-id)
(.. tggr getTimeZone getID)))))))
(testing "Individual refresh trigger"
......@@ -82,8 +82,7 @@
(mt/with-temp* [Database [db-1 {:options {:persist-models-enabled true}}]
Database [db-2 {:options {:persist-models-enabled true}}]]
(#'pr/job-init!)
(mt/with-temporary-setting-values [persisted-model-refresh-interval-hours 4
persisted-model-refresh-anchor-time "00:00"]
(mt/with-temporary-setting-values [persisted-model-refresh-cron-schedule "0 0 0/4 * * ? *"]
(pr/reschedule-refresh!)
(is (= {(u/the-id db-1) {:data {"db-id" (u/the-id db-1) "type" "database"}
:schedule "0 0 0/4 * * ? *"
......@@ -92,8 +91,7 @@
:schedule "0 0 0/4 * * ? *"
:key (format "metabase.task.PersistenceRefresh.database.trigger.%d" (u/the-id db-2))}}
(job-info db-1 db-2))))
(mt/with-temporary-setting-values [persisted-model-refresh-interval-hours 8
persisted-model-refresh-anchor-time "00:00"]
(mt/with-temporary-setting-values [persisted-model-refresh-cron-schedule "0 0 0/8 * * ? *"]
(pr/reschedule-refresh!)
(is (= {(u/the-id db-1) {:data {"db-id" (u/the-id db-1) "type" "database"}
:schedule "0 0 0/8 * * ? *"
......@@ -102,8 +100,7 @@
:schedule "0 0 0/8 * * ? *"
:key (format "metabase.task.PersistenceRefresh.database.trigger.%d" (u/the-id db-2))}}
(job-info db-1 db-2))))
(mt/with-temporary-setting-values [persisted-model-refresh-interval-hours 8
persisted-model-refresh-anchor-time "01:30"]
(mt/with-temporary-setting-values [persisted-model-refresh-cron-schedule "0 30 1/8 * * ? *"]
(pr/reschedule-refresh!)
(is (= {(u/the-id db-1) {:data {"db-id" (u/the-id db-1) "type" "database"}
:schedule "0 30 1/8 * * ? *"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment