From 368a44b3d20de85853bd4a21dbeae88e51370cab Mon Sep 17 00:00:00 2001 From: Allen Gilliland <agilliland@gmail.com> Date: Sun, 27 Sep 2015 21:06:42 -0700 Subject: [PATCH] use Quartzite for scheduling and running background tasks to give us some more options. --- project.clj | 4 +- src/metabase/core.clj | 13 +- src/metabase/task.clj | 204 ++++----------------------- src/metabase/task/sync_databases.clj | 42 ++++++ 4 files changed, 86 insertions(+), 177 deletions(-) create mode 100644 src/metabase/task/sync_databases.clj diff --git a/project.clj b/project.clj index 03a37fc4dcb..bbb222084c0 100644 --- a/project.clj +++ b/project.clj @@ -24,6 +24,7 @@ [cheshire "5.5.0"] ; fast JSON encoding (used by Ring JSON middleware) [clj-http-lite "0.2.1"] ; HTTP client; lightweight version of clj-http that uses HttpURLConnection instead of Apache [clj-time "0.10.0"] ; library for dealing with date/time + [clojurewerkz/quartzite "2.0.0"] ; scheduling library [colorize "0.1.1" :exclusions [org.clojure/clojure]] ; string output with ANSI color codes (for logging) [com.cemerick/friend "0.2.1"] ; auth library [com.draines/postal "1.11.3"] ; SMTP library @@ -58,7 +59,8 @@ :javac-options ["-target" "1.6" "-source" "1.6"] ;; :jar-exclusions [#"\.java"] Circle CI doesn't like regexes because it's using the EDN reader and is retarded :ring {:handler metabase.core/app - :init metabase.core/init} + :init metabase.core/init + :destroy metabase.core/destroy} :eastwood {:exclude-namespaces [:test-paths] :add-linters [:unused-private-vars] :exclude-linters [:constant-test ; korma macros generate some forms with if statements that are always logically true or false diff --git a/src/metabase/core.clj b/src/metabase/core.clj index 60cedbe976e..0b1c21a3b9d 100644 --- a/src/metabase/core.clj +++ b/src/metabase/core.clj @@ -17,6 +17,7 @@ (metabase [config :as config] [db :as db] [driver :as driver] + [events :as events] [routes :as routes] [setup :as setup] [task :as task]) @@ -25,8 +26,7 @@ [format :refer :all]) (metabase.models [setting :refer [defsetting]] [database :refer [Database]] - [user :refer [User]]) - [metabase.events :as events])) + [user :refer [User]]))) ;; ## CONFIG @@ -85,11 +85,19 @@ setup-url "\n\n")))) +(defn destroy + "General application shutdown function which should be called once at application shuddown." + [] + (log/info "Metabase Shutting Down ...") + (task/stop-scheduler!) + (log/info "Metabase Shutdown COMPLETE")) (defn init "General application initialization function which should be run once at application startup." [] (log/info "Metabase Initializing ... ") + ;; First of all, lets register a shutdown hook that will tidy things up for us on app exit + (.addShutdownHook (Runtime/getRuntime) (Thread. destroy)) (log/debug "Using Config:\n" (with-out-str (clojure.pprint/pprint config/config-all))) ;; Bootstrap the event system @@ -106,6 +114,7 @@ (events/publish-event :install {})) ;; Now start the task runner + (task/start-scheduler!) (task/start-task-runner!) (log/info "Metabase Initialization COMPLETE") diff --git a/src/metabase/task.clj b/src/metabase/task.clj index e0e259a8a63..8f8046c72a7 100644 --- a/src/metabase/task.clj +++ b/src/metabase/task.clj @@ -1,192 +1,48 @@ -;; -*- comment-column: 60; -*- (ns metabase.task - "Function hooks; background task runner and related hooks. - Namespaces under `metabase.task.*` will be automatically loaded when the background task runner is started." + "Background task scheduling via Quartzite. Individual tasks are defined in `metabase.task.*`" (:require clojure.java.classpath [clojure.tools.logging :as log] [clojure.tools.namespace.find :as ns-find] - [metabase.util :as u]) - (:import java.util.Calendar)) + [clojurewerkz.quartzite.scheduler :as qs])) -;; # HOOKS -;; [Just like in Emacs Lisp](http://www.gnu.org/software/emacs/manual/html_node/elisp/Hooks.html) (well, almost) <3 -;; -;; Define a hook with `defhook`, add functions to it with `add-hook!`, and run those functions later with `run-hook`: -;; -;; (defhook hourly-tasks-hook \"Tasks to run hourly\") ; define a new hook. -;; (add-hook! #'hourly-tasks-hook my-fn-to-run-hourly) ; add a function to the hook -;; (run-hook #'hourly-tasks-hook :parallel) ; run the functions associated with a hook -;; -;; See the docstrs of these functions below for further discussion. -;; -;; ### Robert Hooke -;; Yes, I known [`robert-hooke`](https://github.com/technomancy/robert-hooke) exists. -;; This library is actually bascially an implementation of Emacs Lisp [`advice`](http://www.gnu.org/software/emacs/manual/html_node/elisp/Advising-Functions.html), -;; not `add-hook`. Which is what we want here. Also, the implementation below is only like ~20 LOC so no need to pull in a 3rd-party library IMO. -;; -;; ### Differences from Emacs Lisp -;; -;; (defun add-hook (hook function &optional append local) -;; ...) -;; (add-hook 'my-wacky-hook #'some-fun t t) -;; -;; 1. In Elisp, calling `add-hook` with an undefined hook will just create the hook for you. We're not allowing that here so we can do -;; some safety checking. -;; 2. You can't define a `buffer-local` hook because there's no such thing in Clojure -;; 3. Excution order of *hook functions* here are indeterminate since they're stored in a set -;; 4. We can run our *hook functions* in parallel <3 -;; -(defmacro defhook - "Define a new hook. - (defhook hourly-tasks-hook \"Tasks to run hourly\") - - A hook is simply an atom storing a set of functions that you can run at any time with `run-hook`." - [hook-name & [docstr?]] - {:arglists '([hook-name docstr?])} - `(defonce ~(vary-meta hook-name assoc - :doc docstr? - :type ::hook) - (atom #{}))) - -;; TODO Should we require that F be a var so as to avoid duplicate lambdas being added ? -(defn add-hook! - "Add function F to HOOK (hereafter, known as one of HOOK's *hook functions*). - Calling `(run-hook #'hook)` at a later time will call this function. - - (add-hook! #'hourly-tasks-hook my-fn-to-run-hourly) - - Note that you are expected to pass the var literal of HOOK; this is so we can check its metadata. - Hooks are tagged with `:type -> :metabase.task/hook` which is checked as a precondition so you - don't accidentally use this function the wrong way." - [hook f] - {:pre [(var? hook) - (= (:type (meta hook)) ::hook) - (fn? f)]} - (swap! (var-get hook) conj f)) - -;; TODO - remove-hook! function - -(defn run-hook - "Run the *hook functions* associated with HOOK sequentially (by default) or in parallel if `:parallel` is - passed as the first arg. All subsequent args will be passed directly to the *hook functions* themselves. - - (run-hook #'hourly-tasks-hook :parallel) - - Like `add-hook!`, you are expected to pass the var literal of HOOK so we can do some safety checks for you." - {:arglists '([hook parallel? & args])} - [hook & args] - {:pre [(var? hook) - (= (:type (meta hook)) ::hook)]} - (let [[parallel args] (u/optional (partial = :parallel) args) - map-fn (if parallel pmap map)] - (dorun - (map-fn #(try (apply % args) - (catch Throwable e - (log/error (format "Caught exception when running %s function %s with args %s : %s" - (:name (meta hook)) % args e)))) - @(var-get hook))))) - - -;; # TASK RUNNER -;; The task runner is a set of hooks like `hourly-tasks-hook` that get called at certain intervals on a background thread. -;; Just add functions to these hooks with `add-hook!` and they'll be ran at the appropriate time. - -;; ## STANDARD TASK RUNNER HOOKS - -(defhook hourly-tasks-hook - "Tasks to run hourly. - Functions will be passed a single function: the current hour (according to the system calendar) in 24-hour time. - - (defn some-task [hour] - do-something ...) - (add-hook! #'hourly-tasks-hook some-task) ; some-task will be called on a background thread every hour") - -(defhook nightly-tasks-hook - "Tasks to run nightly at midnight (according to the system calendar). - Functions will be passed no arguments.") - -;; ## RUN-HOURLY-TASKS / RUN-NIGHTLY-TASKS - -(defn- hour - "Current hour (0 - 23) according to the system calendar." - [] - (.get (Calendar/getInstance) Calendar/HOUR)) - -(defn- minute - "Current minute (0 - 59) according to the system calendar." - [] - (.get (Calendar/getInstance) Calendar/MINUTE)) - -(defn- minutes-until-next-hour - "Number of minutes (1 - 60) until the top of the *next* hour." - [] - (- 60 (minute))) - -(defn- hourly-task-delay - "Number of milliseconds to wait before running hourly tasks the next time around. - - This is the number of milliseconds until the top of the next hour; e.g., if the test runner is started - with `(start-task-runner!)` at 8:23 PM, this function will return the number of milliseconds until 9:00 PM, - which will be first time we'd want to run the `hourly-task-hook` functions. - - (This is provided here so the unit tests can replace this fn so we can test the scheduling mechanism.)" - [] - (* 1000 60 (minutes-until-next-hour))) - -(defn- run-hourly-tasks - "Sleep until the top of the next hour, then run the `hourly-tasks-hook` in parallel." - [] - ;; Sleep first, that way we're not trying to run a ton of tasks as soon as Metabase spins up - (Thread/sleep (hourly-task-delay)) - (log/info "Running hourly tasks...") - (run-hook #'hourly-tasks-hook :parallel (hour)) - (recur)) - -(defn- run-nightly-tasks - "Run the `nightly-tasks-hook` (in parallel). - This is acutally just a *hook function* added to the `hourly-tasks-hook`; - it just checks whether the system hour is `0`, and, if so, runs the `nightly-tasks-hook`." - [hour] - (when (= hour 0) - (log/info "Running nightly tasks...") - (run-hook #'nightly-tasks-hook :parallel))) -(add-hook! #'hourly-tasks-hook run-nightly-tasks) - - -;; ## COLLECT TASKS IN METABASE.TASK.* NAMESPACES +(defonce ^:private quartz-scheduler + (atom nil)) (defn- find-and-load-tasks - "Search JARs + files in the classpath for Clojure namespaces that start with `metabase.task.`, then `require` them so tasks will be loaded as needed." + "Search Classpath for namespaces that start with `metabase.tasks.`, then `require` them so initialization can happen." [] (->> (ns-find/find-namespaces (clojure.java.classpath/classpath)) (filter (fn [ns-symb] (re-find #"^metabase\.task\." (name ns-symb)))) set - (map (fn [task-ns] - (log/info "Loading tasks from namespace" task-ns "...") - (require task-ns))) + (map (fn [events-ns] + (log/info "\tloading tasks namespace: " events-ns) + (require events-ns))) dorun)) - -;; ## START/STOP TASK RUNNER - -(defonce ^:private task-runner - (atom nil)) - -(defn start-task-runner! - "Start a background thread that will run tasks on the `hourly-tasks-hook` and `nightly-tasks-hook` every hour / every night, respectively. - This also loads all namespaces under `metabase.task.*`, so tasks can defined in them without needing to load them elsewhere." +(defn start-scheduler! + "Start our Quartzite scheduler which allows jobs to be submitted and triggers to begin executing." [] - (when-not @task-runner - (find-and-load-tasks) - (log/info "Starting task runner...") - (reset! task-runner (future (run-hourly-tasks))))) + (when-not @quartz-scheduler + (log/debug "Starting Quartz Scheduler") + ;; keep a reference to our scheduler + (reset! quartz-scheduler (-> (qs/initialize) qs/start)) + ;; look for job/trigger definitions + (find-and-load-tasks))) -(defn stop-task-runner! - "Stop the task runner." +(defn stop-scheduler! + "Stop our Quartzite scheduler and shutdown any running executions." [] - (when @task-runner - (log/info "Stopping task runner...") - (future-cancel @task-runner) - (reset! task-runner nil))) + (log/debug "Stopping Quartz Scheduler") + ;; tell quartz to stop everything + (qs/shutdown @quartz-scheduler) + ;; reset our scheduler reference + (reset! quartz-scheduler nil)) + + +(defn schedule-task! + "Add a given job and trigger to our scheduler." + [job trigger] + (when @quartz-scheduler + (qs/schedule @quartz-scheduler job trigger))) diff --git a/src/metabase/task/sync_databases.clj b/src/metabase/task/sync_databases.clj new file mode 100644 index 00000000000..ff305d0e56e --- /dev/null +++ b/src/metabase/task/sync_databases.clj @@ -0,0 +1,42 @@ +(ns metabase.task.sync-databases + (:require [clojure.tools.logging :as log] + [clojurewerkz.quartzite.jobs :as jobs] + [clojurewerkz.quartzite.triggers :as triggers] + [clojurewerkz.quartzite.schedule.cron :as cron] + [metabase.config :as config] + [metabase.db :as db] + [metabase.driver :as driver] + [metabase.models.database :refer [Database]] + [metabase.task :as task])) + +(def sync-databases-job-key "metabase.task.sync-databases.job") +(def sync-databases-trigger-key "metabase.task.sync-databases.trigger") + +(defonce ^:private sync-databases-job (atom nil)) +(defonce ^:private sync-databases-trigger (atom nil)) + +;; simple job which looks up all databases and runs a sync on them +;; TODO - skip the sample dataset? +(jobs/defjob SyncDatabases + [ctx] + (dorun + (for [database (db/sel :many Database)] + (future (driver/sync-database! database))))) + +;; this is what actually adds our task to the scheduler +(when (config/is-prod?) + (log/info "Submitting sync-database task to scheduler") + ;; build our job + (reset! sync-databases-job (jobs/build + (jobs/of-type SyncDatabases) + (jobs/with-identity (jobs/key sync-databases-job-key)))) + ;; build our trigger + (reset! sync-databases-trigger (triggers/build + (triggers/with-identity (triggers/key sync-databases-trigger-key)) + (triggers/start-now) + (triggers/with-schedule + ;; run at midnight daily + (cron/schedule (cron/cron-schedule "0 0 0 * * ? *"))))) + ;; submit ourselves to the scheduler + (task/schedule-task! @sync-databases-job @sync-databases-trigger)) + -- GitLab