Skip to content
Snippets Groups Projects
Unverified Commit 5bfd21b7 authored by Dennis Schridde's avatar Dennis Schridde Committed by GitHub
Browse files

Add a prometheus metric for emails sent (#31333)

# Context

In order to be able to alert operators to Metabase instances sending excessive
amounts of emails, we need to measure the number of emails sent.

# Notes

To break a dependency cycle [^1] `connection-pool-info` was moved from
`metabase.troubleshooting` to `metabase.analytics.prometheus` and
`GET "/diagnostic_info/connection_pool_info"` in `metabase.api.util`
was adjusted accordingly.

[^1]: metabase.analytics.prometheus -> metabase.troubleshooting -> metabase.analytics.stats -> metabase.email -> metabase.analytics.prometheus, and longer variations.

# Acceptance test

* On a Metabase instance with `MB_PROMETHEUS_SERVER_PORT` set, check
  `localhost:<port>/metrics` and find the number of successfully sent emails
  (e.g. when inviting users) in the `metabase_email_messages_total` sample and
  the number of failures in the `metabase_email_message_errors_total` sample,
  both without any labels.

Closes: https://github.com/metabase/metabase/issues/30241
parent d3546ce4
No related branches found
No related tags found
No related merge requests found
......@@ -6,12 +6,12 @@
Api is quite simple: [[setup!]] and [[shutdown!]]. After that you can retrieve metrics from
http://localhost:<prometheus-server-port>/metrics."
(:require
[clojure.java.jmx :as jmx]
[iapetos.collector :as collector]
[iapetos.collector.ring :as collector.ring]
[iapetos.core :as prometheus]
[metabase.models.setting :as setting :refer [defsetting]]
[metabase.server :as server]
[metabase.troubleshooting :as troubleshooting]
[metabase.util.i18n :refer [deferred-trs trs]]
[metabase.util.log :as log]
[potemkin :as p]
......@@ -22,6 +22,7 @@
(io.prometheus.client.hotspot GarbageCollectorExports MemoryPoolsExports StandardExports ThreadExports)
(io.prometheus.client.jetty JettyStatisticsCollector)
(java.util ArrayList List)
(javax.management ObjectName)
(org.eclipse.jetty.server Server)))
(set! *warn-on-reflection* true)
......@@ -44,8 +45,6 @@
raw-value))))]
(setting/get-raw-value :prometheus-server-port integer? parse))))
(defonce ^:private ^{:doc "Prometheus System for prometheus metrics"} system nil)
(p.types/defprotocol+ PrometheusActions
(stop-web-server [this]))
......@@ -57,6 +56,8 @@
(when-let [^Server web-server web-server]
(.stop web-server))))
(defonce ^:private ^{:doc "Prometheus System for prometheus metrics"} ^PrometheusSystem system nil)
(declare setup-metrics! start-web-server!)
(defn- make-prometheus-system
......@@ -64,7 +65,7 @@
serving metrics from that port."
[port registry-name]
(try
(let [registry (setup-metrics! registry-name)
(let [registry (setup-metrics! registry-name)
web-server (start-web-server! port registry)]
(->PrometheusSystem registry web-server))
(catch Exception e
......@@ -75,7 +76,7 @@
;;; Collectors
(defn c3p0-stats
"Takes `raw-stats` from [[metabase.troubleshooting/connection-pool-info]] and groups by each property type rather than each database.
"Takes `raw-stats` from [[connection-pool-info]] and groups by each property type rather than each database.
{\"metabase-postgres-app-db\" {:numConnections 15,
:numIdleConnections 15,
:numBusyConnections 0,
......@@ -142,11 +143,21 @@
raw-label))))
arr))
(defn- conn-pool-bean-diag-info [acc ^ObjectName jmx-bean]
(let [bean-id (.getCanonicalName jmx-bean)
props [:numConnections :numIdleConnections :numBusyConnections
:minPoolSize :maxPoolSize :numThreadsAwaitingCheckoutDefaultUser]]
(assoc acc (jmx/read bean-id :dataSourceName) (jmx/read bean-id props))))
(defn connection-pool-info
"Builds a map of info about the current c3p0 connection pools managed by this Metabase instance."
[]
(reduce conn-pool-bean-diag-info {} (jmx/mbean-names "com.mchange.v2.c3p0:type=PooledDataSource,*")))
(def c3p0-collector
"c3p0 collector delay"
(letfn [(collect-metrics []
(-> (troubleshooting/connection-pool-info)
:connection-pools
(-> (connection-pool-info)
c3p0-stats
stats->prometheus))]
(delay
......@@ -192,7 +203,12 @@
(apply prometheus/register registry
(concat (jvm-collectors)
(jetty-collectors)
[@c3p0-collector]))))
[@c3p0-collector]
; Iapetos will use "default" if we do not provide a namespace, so explicitly set `metabase-email`:
[(prometheus/counter :metabase-email/messages
{:description (trs "Number of emails sent.")})
(prometheus/counter :metabase-email/message-errors
{:description (trs "Number of errors when sending emails.")})]))))
(defn- start-web-server!
"Start the prometheus web-server. If [[prometheus-server-port]] is not set it will throw."
......@@ -229,11 +245,19 @@
(locking #'system
(when system
(try (stop-web-server system)
(prometheus/clear (.-registry system))
(alter-var-root #'system (constantly nil))
(log/info (trs "Prometheus web-server shut down"))
(catch Exception e
(log/warn e (trs "Error stopping prometheus web-server"))))))))
#_{:clj-kondo/ignore [:redefined-var]}
(defn inc
"Call iapetos.core/inc on the metric in the global registry,
if it has already been initialized and the metric is registered."
[metric]
(some-> system .-registry metric prometheus/inc))
(comment
(require 'iapetos.export)
(spit "metrics" (iapetos.export/text-format (.registry system))))
......@@ -4,6 +4,7 @@
(:require
[compojure.core :refer [GET POST]]
[crypto.random :as crypto-random]
[metabase.analytics.prometheus :as prometheus]
[metabase.analytics.stats :as stats]
[metabase.api.common :as api]
[metabase.api.common.validation :as validation]
......@@ -54,8 +55,8 @@
"Returns database connection pool info for the current Metabase instance."
[]
(validation/check-has-application-permission :monitoring)
(let [pool-info (troubleshooting/connection-pool-info)
(let [pool-info (prometheus/connection-pool-info)
headers {"Content-Disposition" "attachment; filename=\"connection_pool_info.json\""}]
(assoc (response/response pool-info) :headers headers, :status 200)))
(assoc (response/response {:connection-pools pool-info}) :headers headers, :status 200)))
(api/define-routes)
(ns metabase.email
(:require
[metabase.analytics.prometheus :as prometheus]
[metabase.models.setting :as setting :refer [defsetting]]
[metabase.util :as u]
[metabase.util.i18n :refer [deferred-tru trs tru]]
......@@ -121,23 +122,29 @@
does not catch and swallow thrown exceptions, it will bubble up."
{:style/indent 0}
[{:keys [subject recipients message-type message]} :- EmailMessage]
(when-not (email-smtp-host)
(throw (ex-info (tru "SMTP host is not set.") {:cause :smtp-host-not-set})))
;; Now send the email
(send-email! (smtp-settings)
(merge
{:from (if-let [from-name (email-from-name)]
(str from-name " <" (email-from-address) ">")
(email-from-address))
:to recipients
:subject subject
:body (case message-type
:attachments message
:text message
:html [{:type "text/html; charset=utf-8"
:content message}])}
(when-let [reply-to (email-reply-to)]
{:reply-to reply-to}))))
(try
(when-not (email-smtp-host)
(throw (ex-info (tru "SMTP host is not set.") {:cause :smtp-host-not-set})))
;; Now send the email
(send-email! (smtp-settings)
(merge
{:from (if-let [from-name (email-from-name)]
(str from-name " <" (email-from-address) ">")
(email-from-address))
:to recipients
:subject subject
:body (case message-type
:attachments message
:text message
:html [{:type "text/html; charset=utf-8"
:content message}])}
(when-let [reply-to (email-reply-to)]
{:reply-to reply-to})))
(catch Throwable e
(prometheus/inc :metabase-email/message-errors)
(throw e))
(finally
(prometheus/inc :metabase-email/messages))))
(def ^:private SMTPStatus
"Schema for the response returned by various functions in [[metabase.email]]. Response will be a map with the key
......
(ns metabase.troubleshooting
(:require
[clojure.java.jmx :as jmx]
[metabase.analytics.stats :as stats]
[metabase.config :as config]
[metabase.db :as mdb]
[metabase.driver :as driver]
[toucan2.core :as t2])
(:import
(javax.management ObjectName)))
[toucan2.core :as t2]))
(set! *warn-on-reflection* true)
......@@ -43,15 +40,3 @@
:run-mode (config/config-kw :mb-run-mode)
:version config/mb-version-info
:settings {:report-timezone (driver/report-timezone)}})
(defn- conn-pool-bean-diag-info [acc ^ObjectName jmx-bean]
(let [bean-id (.getCanonicalName jmx-bean)
props [:numConnections :numIdleConnections :numBusyConnections
:minPoolSize :maxPoolSize :numThreadsAwaitingCheckoutDefaultUser]]
(assoc acc (jmx/read bean-id :dataSourceName) (jmx/read bean-id props))))
(defn connection-pool-info
"Builds a map of info about the current c3p0 connection pools managed by this Metabase instance."
[]
(->> (reduce conn-pool-bean-diag-info {} (jmx/mbean-names "com.mchange.v2.c3p0:type=PooledDataSource,*"))
(assoc {} :connection-pools)))
......@@ -6,10 +6,11 @@
[clojure.test :refer :all]
[iapetos.registry :as registry]
[metabase.analytics.prometheus :as prometheus]
[metabase.test.fixtures :as fixtures]
[metabase.troubleshooting :as troubleshooting])
[metabase.test.fixtures :as fixtures])
(:import
(io.prometheus.client Collector GaugeMetricFamily)
(iapetos.registry IapetosRegistry)
(io.prometheus.client Collector Collector$MetricFamilySamples CollectorRegistry GaugeMetricFamily)
(metabase.analytics.prometheus PrometheusSystem)
(org.eclipse.jetty.server Server)))
(set! *warn-on-reflection* true)
......@@ -104,8 +105,9 @@
(#'prometheus/make-prometheus-system 0 (name (gensym "test-registry")))
server# ^Server (.web-server ~system)
~port (.. server# getURI getPort)]
(try ~@body
(finally (prometheus/stop-web-server ~system)))))
(with-redefs [prometheus/system ~system]
(try ~@body
(finally (prometheus/stop-web-server ~system))))))
(deftest web-server-test
(testing "Can get metrics from the web-server"
......@@ -128,7 +130,7 @@
:namespace "metabase_database"}
nil)]
(is c3p0-collector "c3p0 stats not found"))))
(testing "Registry has an entry for each database in [[troubleshooting/connection-pool-info]]"
(testing "Registry has an entry for each database in [[prometheus/connection-pool-info]]"
(with-prometheus-system [_ system]
(let [registry (.registry system)
c3p0-collector (registry/get registry {:name "c3p0_stats"
......@@ -138,12 +140,12 @@
measurements (.collect ^Collector c3p0-collector)
_ (is (pos? (count measurements))
"No measurements taken")]
(is (= (count (:connection-pools (troubleshooting/connection-pool-info)))
(is (= (count (prometheus/connection-pool-info))
(count (.samples ^GaugeMetricFamily (first measurements))))
"Expected one entry per database for each measurement"))))
(testing "Registry includes c3p0 stats"
(with-prometheus-system [port _]
(let [[db-name values] (first (:connection-pools (troubleshooting/connection-pool-info)))
(let [[db-name values] (first (prometheus/connection-pool-info))
tag-name (comp :label #'prometheus/label-translation)
expected-lines (set (for [[tag value] values]
(format "%s{database=\"%s\",} %s"
......@@ -152,3 +154,27 @@
(metric-lines port))]
(is (seq (set/intersection expected-lines actual-lines))
"Registry does not have c3p0 metrics in it")))))
(deftest email-collector-test
(testing "Registry has email metrics registered"
(with-prometheus-system [_ system]
(let [sample-names (set (for [metric-family-samples (enumeration-seq (.metricFamilySamples ^CollectorRegistry (.raw ^IapetosRegistry (.-registry system))))
sample-name (.getNames ^Collector$MetricFamilySamples metric-family-samples)]
sample-name))]
(is (contains? sample-names "metabase_email_messages") "messages sample not found")
(is (contains? sample-names "metabase_email_message_errors") "message_errors sample not found")))))
(defn- get-sample-value
[^PrometheusSystem system ^String metric]
(.getSampleValue ^CollectorRegistry (registry/raw (.-registry system)) metric))
(deftest inc-server-test
(testing "inc has no effect if system is not setup"
(prometheus/inc :metabase-email/messages)) ; << Does not throw.
(testing "inc has no effect when called with unknown metric"
(with-prometheus-system [_ _system]
(prometheus/inc :metabase-email/unknown-metric))) ; << Does not throw.
(testing "inc is recorded for known metrics"
(with-prometheus-system [_ system]
(prometheus/inc :metabase-email/messages)
(is (< 0 (get-sample-value system "metabase_email_messages_total"))))))
......@@ -4,6 +4,7 @@
[clojure.java.io :as io]
[clojure.test :refer :all]
[medley.core :as m]
[metabase.analytics.prometheus :as prometheus]
[metabase.email :as email]
[metabase.test.data.users :as test.users]
[metabase.test.util :as tu]
......@@ -237,6 +238,28 @@
:message-type :html
:message "101. Metabase will make you a better person")
(@inbox "test@test.com")))))
(testing "metrics collection"
(let [calls (atom nil)]
(with-redefs [prometheus/inc #(swap! calls conj %)]
(with-fake-inbox
(email/send-message!
:subject "101 Reasons to use Metabase"
:recipients ["test@test.com"]
:message-type :html
:message "101. Metabase will make you a better person")))
(is (= 1 (count (filter #{:metabase-email/messages} @calls))))
(is (= 0 (count (filter #{:metabase-email/message-errors} @calls))))))
(testing "error metrics collection"
(let [calls (atom nil)]
(with-redefs [prometheus/inc #(swap! calls conj %)
email/send-email! (fn [_ _] (throw (Exception. "test-exception")))]
(email/send-message!
:subject "101 Reasons to use Metabase"
:recipients ["test@test.com"]
:message-type :html
:message "101. Metabase will make you a better person"))
(is (= 1 (count (filter #{:metabase-email/messages} @calls))))
(is (= 1 (count (filter #{:metabase-email/message-errors} @calls))))))
(testing "basic sending without email-from-name"
(tu/with-temporary-setting-values [email-from-name nil]
(is (=
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment