diff --git a/src/metabase/metabot.clj b/src/metabase/metabot.clj index efcb2b786bb44323d0d898637ae2fc2687933258..a60adcb7af128479786bff65129893bca85b4be0 100644 --- a/src/metabase/metabot.clj +++ b/src/metabase/metabot.clj @@ -9,6 +9,7 @@ (manifold [bus :as bus] [deferred :as d] [stream :as s]) + [throttle.core :as throttle] [metabase.db :as db] [metabase.integrations.slack :as slack] [metabase.models.setting :refer [defsetting], :as setting] @@ -227,21 +228,44 @@ ;;; Websocket monitor -;; Keep track of the Thread ID of the current monitor thread. Monitor threads should check this ID and if it is no longer equal to -;; theirs they should die +;; Keep track of the Thread ID of the current monitor thread. Monitor threads should check this ID +;; and if it is no longer equal to theirs they should die (defonce ^:private websocket-monitor-thread-id (atom nil)) +;; we'll use a THROTTLER to implement exponential backoff for recconenction attempts, since THROTTLERS are designed with for this sort of thing +;; e.g. after the first failed connection we'll wait 2 seconds, then each that amount increases by the `:delay-exponent` of 1.3 +;; so our reconnection schedule will look something like: +;; number of consecutive failed attempts | seconds before next try (rounded up to nearest multiple of 2 seconds) +;; --------------------------------------+---------------------------------------------------------------------- +;; 0 | 2 +;; 1 | 4 +;; 2 | 4 +;; 3 | 6 +;; 4 | 8 +;; 5 | 14 +;; 6 | 30 +;; we'll throttle this based on values of the `slack-token` setting; that way if someone changes its value they won't have to wait +;; whatever the exponential delay is before the connection is retried +(def ^:private reconnection-attempt-throttler + (throttle/make-throttler nil :attempts-threshold 1, :initial-delay-ms 2000, :delay-exponent 1.3)) + +(defn- should-attempt-to-reconnect? ^Boolean [] + (boolean (u/ignore-exceptions + (throttle/check reconnection-attempt-throttler (slack/slack-token)) + true))) + (defn- start-websocket-monitor! [] (future (reset! websocket-monitor-thread-id (.getId (Thread/currentThread))) ;; Every 2 seconds check to see if websocket connection is [still] open, [re-]open it if not (loop [] - (Thread/sleep 500) + (while (not (should-attempt-to-reconnect?)) + (Thread/sleep 2000)) (when (= (.getId (Thread/currentThread)) @websocket-monitor-thread-id) (try (when (or (not @websocket) (s/closed? @websocket)) - (log/debug "MetaBot WebSocket is closed. Reconnecting now.") + (log/debug "MetaBot WebSocket is closed. Reconnecting now.") (connect-websocket!)) (catch Throwable e (log/error "Error connecting websocket:" (.getMessage e)))) @@ -252,7 +276,7 @@ This will spin up a background thread that opens and maintains a Slack WebSocket connection." [] - (when (and (setting/get :slack-token) + (when (and (slack/slack-token) (metabot-enabled)) (log/info "Starting MetaBot WebSocket monitor thread...") (start-websocket-monitor!)))