Skip to content
Snippets Groups Projects
Unverified Commit 47ab8f22 authored by Jeff Evans's avatar Jeff Evans Committed by GitHub
Browse files

Capture and log diagnostic info about DW connection pools (#15682)

Add new namespace to manage diagnostic info about the DW DB connection pools

Adding new atom/fn/macro to wrap the capture of diagnostic info, modeled after the call counting functionality of toucan.db

Capturing diagnostic info from a new `datasource-with-diagnostic-info!` fn in the execute namespace, and calling that from the various drivers (instead of `datasource`)

Adding to log message generated for the "stats" portion of the line logged via middleware to output this information
parent 0684bc6d
No related branches found
No related tags found
No related merge requests found
......@@ -129,7 +129,7 @@
;; connection read-only, because that seems to be causing problems for people
(defmethod sql-jdbc.execute/connection-with-timezone :redshift
[driver database ^String timezone-id]
(let [conn (.getConnection (sql-jdbc.execute/datasource database))]
(let [conn (.getConnection (sql-jdbc.execute/datasource-with-diagnostic-info! driver database))]
(try
(sql-jdbc.execute/set-best-transaction-level! driver conn)
(sql-jdbc.execute/set-time-zone-if-supported! driver conn timezone-id)
......
......@@ -142,7 +142,7 @@
;; 4. SparkSQL doesn't support setting the default result set holdability
(defmethod sql-jdbc.execute/connection-with-timezone :sparksql
[driver database ^String timezone-id]
(let [conn (.getConnection (sql-jdbc.execute/datasource database))]
(let [conn (.getConnection (sql-jdbc.execute/datasource-with-diagnostic-info! driver database))]
(try
(.setTransactionIsolation conn Connection/TRANSACTION_READ_UNCOMMITTED)
conn
......
......@@ -336,7 +336,7 @@
;; SQLite's JDBC driver is fussy and won't let you change connections to read-only after you create them
(defmethod sql-jdbc.execute/connection-with-timezone :sqlite
[driver database ^String timezone-id]
(let [conn (.getConnection (sql-jdbc.execute/datasource database))]
(let [conn (.getConnection (sql-jdbc.execute/datasource-with-diagnostic-info! driver database))]
(try
(sql-jdbc.execute/set-best-transaction-level! driver conn)
conn
......
......@@ -301,7 +301,7 @@
[driver database ^String timezone-id]
;; h2 doesn't support setting timezones, or changing the transaction level without admin perms, so we can skip those
;; steps that are in the default impl
(let [conn (.getConnection (sql-jdbc.execute/datasource database))]
(let [conn (.getConnection (sql-jdbc.execute/datasource-with-diagnostic-info! driver database))]
(try
(doto conn
(.setReadOnly true))
......
......@@ -10,6 +10,7 @@
[java-time :as t]
[metabase.driver :as driver]
[metabase.driver.sql-jdbc.connection :as sql-jdbc.conn]
[metabase.driver.sql-jdbc.execute.diagnostic :as sql-jdbc.execute.diagnostic]
[metabase.driver.sql-jdbc.execute.old-impl :as execute.old]
[metabase.driver.sql-jdbc.sync.interface :as sql-jdbc.sync]
[metabase.mbql.util :as mbql.u]
......@@ -133,6 +134,15 @@
^DataSource [database]
(:datasource (sql-jdbc.conn/db->pooled-connection-spec database)))
(defn datasource-with-diagnostic-info!
"Fetch the connection pool `DataSource` associated with `database`, while also recording diagnostic info for the
pool. To be used in conjunction with `sql-jdbc.execute.diagnostic/capturing-diagnostic-info`."
{:added "0.40.0"}
^DataSource [driver database]
(let [ds (datasource database)]
(sql-jdbc.execute.diagnostic/record-diagnostic-info-for-pool driver (u/the-id database) ds)
ds))
(defn set-time-zone-if-supported!
"Execute `set-timezone-sql`, if implemented by driver, to set the session time zone. This way of setting the time zone
should be considered deprecated in favor of implementing `connection-with-time-zone` directly."
......@@ -177,7 +187,7 @@
(defmethod connection-with-timezone :sql-jdbc
[driver database ^String timezone-id]
(let [conn (.getConnection (datasource database))]
(let [conn (.getConnection (datasource-with-diagnostic-info! driver database))]
(try
(set-best-transaction-level! driver conn)
(set-time-zone-if-supported! driver conn timezone-id)
......
(ns metabase.driver.sql-jdbc.execute.diagnostic
"Code related to capturing diagnostic information for JDBC connection pools at execution time."
(:import com.mchange.v2.c3p0.PoolBackedDataSource))
(def ^:private ^:dynamic *diagnostic-info*
"Atom used to hold diagnostic info for the current query execution, to be made available via a helper macro/fn below."
nil)
(defn do-with-diagnostic-info
"Execute `f` with diagnostic info capturing enabled. `f` is passed a single argument, a function that can be used to
retrieve the current diagnostic info. Prefer to use the macro form instead: `capturing-diagnostic-info`."
{:style/indent 0}
[f]
(binding [*diagnostic-info* (atom {})]
(f (partial deref *diagnostic-info*))))
(defmacro capturing-diagnostic-info
"Execute `body` and store diagnostic info related to the query execution. `diagnostic-info-fn-binding` is bound to a
zero-arity function that can be used to fetch the current diagnostic info.
```
(sql-jdbc.execute.diagnostic/capturing-diagnostic-info [diag-info-fn]
;; various body forms
;; fetch the diagnostic info, which should be available if execute code called `record-diagnostic-info-for-pool`
(diag-info-fn))
```"
{:style/indent 1}
[[diagnostic-info-fn-binding] & body]
`(do-with-diagnostic-info (fn [~diagnostic-info-fn-binding] ~@body)))
(defn record-diagnostic-info-for-pool
"Captures diagnostic info related to the given `driver`, `database-id`, and `datasource` (which are all related).
The current information that is captured (in a map whose keys are namespaced keywords in this ns) is:
* `::database-id`: the database ID (from the parameter value)
* `::driver`: the driver (from the parameter value)
* `::active-connections`: the number of active connections in the given datasource's pool
* `::total-connections`: the number of total connections in the given datasource's pool
* `::threads-waiting`: the number of threads waiting to get a connection to the given datasource's pool (which happens
when the number of active connections has reached the max size)."
[driver database-id ^PoolBackedDataSource datasource]
(when *diagnostic-info*
(swap! *diagnostic-info* #(assoc % ::database-id database-id
::driver driver
::active-connections (.getNumBusyConnectionsAllUsers datasource)
::total-connections (.getNumConnectionsAllUsers datasource)
::threads-waiting (.getNumThreadsAwaitingCheckoutDefaultUser datasource)))))
......@@ -6,6 +6,7 @@
[metabase.async.streaming-response :as streaming-response]
[metabase.async.streaming-response.thread-pool :as streaming-response.thread-pool]
[metabase.async.util :as async.u]
[metabase.driver.sql-jdbc.execute.diagnostic :as sql-jdbc.execute.diagnostic]
[metabase.server :as server]
[metabase.server.request.util :as request.u]
[metabase.util :as u]
......@@ -39,14 +40,15 @@
(format " [%s: %s]" (trs "ASYNC") async-status))))
(defn- format-performance-info
[{:keys [start-time call-count-fn]
[{:keys [start-time call-count-fn diag-info-fn]
:or {start-time (System/nanoTime)
call-count-fn (constantly -1)}}]
call-count-fn (constantly -1)
diag-info-fn (constantly {})}}]
(let [elapsed-time (u/format-nanoseconds (- (System/nanoTime) start-time))
db-calls (call-count-fn)]
(trs "{0} ({1} DB calls)" elapsed-time db-calls)))
(defn- stats []
(defn- stats [diag-info-fn]
(str
(let [^PoolBackedDataSource pool (:datasource (db/connection))]
(trs "App DB connections: {0}/{1}"
......@@ -63,11 +65,20 @@
" "
(trs "Queries in flight: {0}" (streaming-response.thread-pool/active-thread-count))
" "
(trs "({0} queued)" (streaming-response.thread-pool/queued-thread-count))))
(defn- format-threads-info [{:keys [include-stats?]}]
(trs "({0} queued)" (streaming-response.thread-pool/queued-thread-count))
(when diag-info-fn
(if-let [diag-info (diag-info-fn)]
(format
"; %s DB %s connections: %d/%d (%d threads blocked)"
(-> diag-info ::sql-jdbc.execute.diagnostic/driver name)
(::sql-jdbc.execute.diagnostic/database-id diag-info)
(::sql-jdbc.execute.diagnostic/active-connections diag-info)
(::sql-jdbc.execute.diagnostic/total-connections diag-info)
(::sql-jdbc.execute.diagnostic/threads-waiting diag-info))))))
(defn- format-threads-info [{:keys [diag-info-fn]} {:keys [include-stats?]}]
(when include-stats?
(stats)))
(stats diag-info-fn)))
(defn- format-error-info [{{:keys [body]} :response} {:keys [error?]}]
(when (and error?
......@@ -77,7 +88,7 @@
(defn- format-info [info opts]
(str/join " " (filter some? [(format-status-info info)
(format-performance-info info)
(format-threads-info opts)
(format-threads-info info opts)
(format-error-info info opts)])))
......@@ -187,9 +198,12 @@
(handler request respond raise)
;; API call, log info about it
(db/with-call-counting [call-count-fn]
(let [info {:request request
:start-time (System/nanoTime)
:call-count-fn call-count-fn}
response->info #(assoc info :response %)
respond (comp respond logged-response response->info)]
(handler request respond raise))))))
(sql-jdbc.execute.diagnostic/capturing-diagnostic-info [diag-info-fn]
(let [info {:request request
:start-time (System/nanoTime)
:call-count-fn call-count-fn
:diag-info-fn diag-info-fn}
response->info (fn [response]
(assoc info :response response))
respond (comp respond logged-response response->info)]
(handler request respond raise)))))))
(ns metabase.driver.sql-jdbc.execute.diagnostic-test
(:require [clojure.test :refer :all]
[metabase.driver :as driver]
[metabase.driver.sql-jdbc.execute.diagnostic :as sql-jdbc.execute.diagnostic]
[metabase.driver.sql-jdbc.test-util :as sql-jdbc.tu]
[metabase.test :as mt]
[metabase.util :as u]))
(deftest diagnostic-info-capture-test
(mt/test-drivers (sql-jdbc.tu/sql-jdbc-drivers)
(testing "DW connection pool diagnostic info should be captured correctly"
(sql-jdbc.execute.diagnostic/capturing-diagnostic-info [diag-info-fn]
;; sanity check
(is (= "Red Medicine"
(-> (mt/formatted-rows [str]
(mt/run-mbql-query venues
{:fields [$name]
:where [[$id = 1]]}))
ffirst)))
;; now, check the actual diagnostic info map
(let [diag-info (diag-info-fn)]
(is map? diag-info)
(let [{:keys [::sql-jdbc.execute.diagnostic/driver
::sql-jdbc.execute.diagnostic/database-id
::sql-jdbc.execute.diagnostic/active-connections
::sql-jdbc.execute.diagnostic/total-connections
::sql-jdbc.execute.diagnostic/threads-waiting]} diag-info]
;; the diag info driver should match the current one
(is (= driver/*driver* driver))
;; the diag info database-id should also match the current one
(is (= (u/the-id (mt/db)) database-id))
;; the active connections may be between 0 and total-connections (inclusive)
(is (<= 0 active-connections total-connections))))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment