Skip to content
Snippets Groups Projects
Unverified Commit c5fdebe2 authored by Cam Saul's avatar Cam Saul Committed by GitHub
Browse files

Merge pull request #7483 from metabase/usage-stats-for-alerts

Usage stats for Alerts & CSV/XLS Pulses
parents fba821e4 740cd73b
No related branches found
No related tags found
No related merge requests found
......@@ -5,7 +5,6 @@
[clojure.tools.logging :as log]
[honeysql
[core :as hsql]
[format :as hformat]
[helpers :as h]]
[metabase
[driver :as driver]
......@@ -32,12 +31,6 @@
Each nested query increments this counter by 1."
0)
;; register the function "distinct-count" with HoneySQL
;; (hsql/format :%distinct-count.x) -> "count(distinct x)"
(defmethod hformat/fn-handler "distinct-count" [_ field]
(str "count(distinct " (hformat/to-sql field) ")"))
;;; ## Formatting
(defn- qualified-alias
......
......@@ -9,7 +9,8 @@
(alter-meta! #'honeysql.core/format assoc :style/indent 1)
(alter-meta! #'honeysql.core/call assoc :style/indent 1)
;; for some reason the metadata on these helper functions is wrong which causes Eastwood to fail, see https://github.com/jkk/honeysql/issues/123
;; for some reason the metadata on these helper functions is wrong which causes Eastwood to fail, see
;; https://github.com/jkk/honeysql/issues/123
(alter-meta! #'honeysql.helpers/merge-left-join assoc
:arglists '([m & clauses])
:style/indent 1)
......@@ -45,27 +46,35 @@
(defmethod hformat/fn-handler "extract" [_ unit expr]
(str "extract(" (name unit) " from " (hformat/to-sql expr) ")"))
;; register the function "distinct-count" with HoneySQL
;; (hsql/format :%distinct-count.x) -> "count(distinct x)"
(defmethod hformat/fn-handler "distinct-count" [_ field]
(str "count(distinct " (hformat/to-sql field) ")"))
;; HoneySQL 0.7.0+ parameterizes numbers to fix issues with NaN and infinity -- see https://github.com/jkk/honeysql/pull/122.
;; However, this broke some of Metabase's behavior, specifically queries with calculated columns with numeric literals --
;; some SQL databases can't recognize that a calculated field in a SELECT clause and a GROUP BY clause is the same thing if the calculation involves parameters.
;; Go ahead an use the old behavior so we can keep our HoneySQL dependency up to date.
;; HoneySQL 0.7.0+ parameterizes numbers to fix issues with NaN and infinity -- see
;; https://github.com/jkk/honeysql/pull/122. However, this broke some of Metabase's behavior, specifically queries
;; with calculated columns with numeric literals -- some SQL databases can't recognize that a calculated field in a
;; SELECT clause and a GROUP BY clause is the same thing if the calculation involves parameters. Go ahead an use the
;; old behavior so we can keep our HoneySQL dependency up to date.
(extend-protocol honeysql.format/ToSql
java.lang.Number
(to-sql [x] (str x)))
;; HoneySQL automatically assumes that dots within keywords are used to separate schema / table / field / etc.
;; To handle weird situations where people actually put dots *within* a single identifier we'll replace those dots with lozenges,
;; let HoneySQL do its thing, then switch them back at the last second
;; HoneySQL automatically assumes that dots within keywords are used to separate schema / table / field / etc. To
;; handle weird situations where people actually put dots *within* a single identifier we'll replace those dots with
;; lozenges, let HoneySQL do its thing, then switch them back at the last second
;;
;; TODO - Maybe instead of this lozengey hackiness it would make more sense just to add a new "identifier" record type that implements `ToSql` in a more intelligent way
;; TODO - Maybe instead of this lozengey hackiness it would make more sense just to add a new "identifier" record type
;; that implements `ToSql` in a more intelligent way
(defn escape-dots
"Replace dots in a string with WHITE MEDIUM LOZENGES (⬨)."
^String [s]
(s/replace (name s) #"\." "⬨"))
(defn qualify-and-escape-dots
"Combine several NAME-COMPONENTS into a single Keyword, and escape dots in each name by replacing them with WHITE MEDIUM LOZENGES (⬨).
"Combine several NAME-COMPONENTS into a single Keyword, and escape dots in each name by replacing them with WHITE
MEDIUM LOZENGES (⬨).
(qualify-and-escape-dots :ab.c :d) -> :ab⬨c.d"
^clojure.lang.Keyword [& name-components]
......
......@@ -102,9 +102,19 @@
(frequencies (map k many-maps)))
(defn- histogram
"Bin some frequencies using a passed in BINNING-FN."
[binning-fn many-maps k]
(frequencies (map binning-fn (vals (value-frequencies many-maps k)))))
"Bin some frequencies using a passed in `binning-fn`.
;; Generate histogram for values of :a; `1` appears 3 times and `2` and `3` both appear once
(histogram bin-micro-number [{:a 1} {:a 1} {:a 1} {:a 2} {:a 3}] :a)
;; -> {\"3+\" 1, \"1\" 2}
;; (or if you already have the counts)
(histogram bin-micro-number [3 1 1])
;; -> {\"3+\" 1, \"1\" 2}"
([binning-fn counts]
(frequencies (map binning-fn counts)))
([binning-fn many-maps k]
(histogram binning-fn (vals (value-frequencies many-maps k)))))
(def ^:private micro-histogram
"Return a histogram for micro numbers."
......@@ -212,19 +222,78 @@
:with_locked_params (contains? embedding-params-vals "locked")
:with_disabled_params (contains? embedding-params-vals "disabled")})))}))
(defn- db-frequencies
"Fetch the frequencies of a given `column` with a normal SQL `SELECT COUNT(*) ... GROUP BY` query. This is way more
efficient than fetching every single row and counting them in Clojure-land!
(db-frequencies Database :engine)
;; -> {\"h2\" 2, \"postgres\" 1, ...}
;; include `WHERE` conditions or other arbitrary HoneySQL
(db-frequencies Database :engine {:where [:= :is_sample false]})
;; -> {\"h2\" 1, \"postgres\" 1, ...}
;; Generate a histogram:
(micro-histogram (vals (db-frequencies Database :engine)))
;; -> {\"2\" 1, \"1\" 1, ...}
;; Include `WHERE` clause that includes conditions for a Table related by an FK relationship:
;; (Number of Tables per DB engine)
(db-frequencies Table (db/qualify Database :engine)
{:left-join [Database [:= (db/qualify Database :id)
(db/qualify Table :db_id)]]})
;; -> {\"googleanalytics\" 4, \"postgres\" 48, \"h2\" 9}"
{:style/indent 2}
[model column & [additonal-honeysql]]
(into {} (for [{:keys [k count]} (db/select [model [column :k] [:%count.* :count]]
(merge {:group-by [column]}
additonal-honeysql))]
[k count])))
(defn- num-notifications-with-xls-or-csv-cards
"Return the number of Notifications that satisfy `where-conditions` that have at least one PulseCard with `include_xls` or
`include_csv`.
;; Pulses only (filter out Alerts)
(num-notifications-with-xls-or-csv-cards [:= :alert_condition nil])"
[& where-conditions]
;; :%distinct-count is a custom fn we registered in `metabase.util.honeysql-extensions`!
(-> (db/query {:select [[:%distinct-count.pulse.id :count]]
:from [:pulse]
:left-join [:pulse_card [:= :pulse.id :pulse_card.pulse_id]]
:where (cons
:and
(cons
[:or [:= :pulse_card.include_csv true]
[:= :pulse_card.include_xls true]]
where-conditions))})
first
:count))
(defn- pulse-metrics
"Get mes based on pulses
"Get metrics based on pulses
TODO: characterize by non-user account emails, # emails"
[]
(let [pulses (db/select [Pulse :creator_id])
pulse-cards (db/select [PulseCard :card_id :pulse_id])
pulse-channels (db/select [PulseChannel :channel_type :schedule_type])]
{:pulses (count pulses)
:pulse_types (frequencies (map :channel_type pulse-channels))
:pulse_schedules (frequencies (map :schedule_type pulse-channels))
:num_pulses_per_user (medium-histogram pulses :creator_id)
:num_pulses_per_card (medium-histogram pulse-cards :card_id)
:num_cards_per_pulses (medium-histogram pulse-cards :pulse_id)}))
(let [pulse-conditions {:left-join [Pulse [:= :pulse.id :pulse_id]], :where [:= :pulse.alert_condition nil]}]
{:pulses (db/count Pulse :alert_condition nil)
;; "Table Cards" are Cards that include a Table you can download
:with_table_cards (num-notifications-with-xls-or-csv-cards [:= :alert_condition nil])
:pulse_types (db-frequencies PulseChannel :channel_type pulse-conditions)
:pulse_schedules (db-frequencies PulseChannel :schedule_type pulse-conditions)
:num_pulses_per_user (medium-histogram (vals (db-frequencies Pulse :creator_id (dissoc pulse-conditions :left-join))))
:num_pulses_per_card (medium-histogram (vals (db-frequencies PulseCard :card_id pulse-conditions)))
:num_cards_per_pulses (medium-histogram (vals (db-frequencies PulseCard :pulse_id pulse-conditions)))}))
(defn- alert-metrics []
(let [alert-conditions {:left-join [Pulse [:= :pulse.id :pulse_id]], :where [:not= (db/qualify Pulse :alert_condition) nil]}]
{:alerts (db/count Pulse :alert_condition [:not= nil])
:with_table_cards (num-notifications-with-xls-or-csv-cards [:not= :alert_condition nil])
:first_time_only (db/count Pulse :alert_condition [:not= nil], :alert_first_only true)
:above_goal (db/count Pulse :alert_condition [:not= nil], :alert_above_goal true)
:alert_types (db-frequencies PulseChannel :channel_type alert-conditions)
:num_alerts_per_user (medium-histogram (vals (db-frequencies Pulse :creator_id (dissoc alert-conditions :left-join))))
:num_alerts_per_card (medium-histogram (vals (db-frequencies PulseCard :card_id alert-conditions)))
:num_cards_per_alerts (medium-histogram (vals (db-frequencies PulseCard :pulse_id alert-conditions)))}))
(defn- label-metrics
......@@ -284,33 +353,10 @@
;;; Execution Metrics
;; Because the QueryExecution table can number in the millions of rows, it isn't safe to pull the entire thing into memory;
;; instead, we'll fetch rows of QueryExecutions in chunks, building the summary as we go
(def ^:private ^:const executions-chunk-size
"Number of QueryExecutions to fetch per chunk. This should be a good tradeoff between not being too large (which could
cause us to run out of memory) and not being too small (which would make calculating this summary excessively slow)."
5000)
;; fetch chunks by ID, e.g. 1-5000, 5001-10000, etc.
(defn- executions-chunk
"Fetch the chunk of QueryExecutions whose ID is greater than STARTING-ID."
[starting-id]
(db/select [QueryExecution :id :executor_id :running_time :error]
:id [:> starting-id]
{:order-by [:id], :limit executions-chunk-size}))
(defn- executions-lazy-seq
"Return a lazy seq of all QueryExecutions."
([]
(executions-lazy-seq 0))
([starting-id]
(when-let [chunk (seq (executions-chunk starting-id))]
(lazy-cat chunk (executions-lazy-seq (:id (last chunk)))))))
(defn summarize-executions
"Summarize EXECUTIONS, by incrementing approriate counts in a summary map."
"Summarize `executions`, by incrementing approriate counts in a summary map."
([]
(summarize-executions (db/select-reducible [QueryExecution :executor_id :running_time :error])))
([executions]
(reduce summarize-executions {:executions 0, :by_status {}, :num_per_user {}, :num_by_latency {}} executions))
([summary execution]
......@@ -330,8 +376,7 @@
(defn- execution-metrics
"Get metrics based on QueryExecutions."
[]
(-> (executions-lazy-seq)
summarize-executions
(-> (summarize-executions)
(update :num_per_user summarize-executions-per-user)))
......@@ -344,6 +389,7 @@
{:average_entry_size (int (or length 0))
:num_queries_cached (bin-small-number count)}))
;;; System Metrics
(defn- bytes->megabytes [b]
......@@ -381,6 +427,7 @@
:label (label-metrics)
:metric (metric-metrics)
:pulse (pulse-metrics)
:alert (alert-metrics)
:question (question-metrics)
:segment (segment-metrics)
:system (system-metrics)
......
(ns metabase.util.stats-test
(:require [expectations :refer :all]
[metabase.models.query-execution :refer [QueryExecution]]
[metabase.models [query-execution :refer [QueryExecution]]
[pulse :refer [Pulse]]
[pulse-channel :refer [PulseChannel]]
[card :refer [Card]]
[pulse-card :refer [PulseCard]]]
[metabase.test.util :as tu]
[metabase.util.stats :as stats-util :refer :all]
[toucan.db :as db]))
[toucan.db :as db]
[metabase.util :as u]
[toucan.util.test :as tt]))
(expect "0" (#'stats-util/bin-micro-number 0))
(expect "1" (#'stats-util/bin-micro-number 1))
......@@ -63,8 +69,8 @@
(expect false ((anonymous-usage-stats) :sso_configured))
(expect false ((anonymous-usage-stats) :has_sample_data))
;;Spot checking a few system stats to ensure conversion from property
;;names and presence in the anonymous-usage-stats
;; Spot checking a few system stats to ensure conversion from property
;; names and presence in the anonymous-usage-stats
(expect
#{true}
(let [system-stats (get-in (anonymous-usage-stats) [:stats :system])]
......@@ -87,3 +93,80 @@
(expect
(old-execution-metrics)
(#'stats-util/execution-metrics))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | Pulses & Alerts |
;;; +----------------------------------------------------------------------------------------------------------------+
;; make sure we get some reasonable Pulses & Alert metrics, and they filter each other out as appropriate
;; alert_condition character varying(254), -- Condition (i.e. "rows" or "goal") used as a guard for alerts
;; alert_first_only boolean, -- True if the alert should be disabled after the first notification
;; alert_above_goal boolean, -- For a goal condition, alert when above the goal
(defn- x []
{:pulses {:pulses 3
:with_table_cards 2
:pulse_types {"slack" 1, "email" 2}
:pulse_schedules {"daily" 2, "weekly" 1}
:num_pulses_per_user {"1-5" 1}
:num_pulses_per_card {"6-10" 1}
:num_cards_per_pulses {"1-5" 1, "6-10" 1}}
:alerts {:alerts 4
:with_table_cards 2
:first_time_only 1
:above_goal 1
:alert_types {"slack" 2, "email" 2}
:num_alerts_per_user {"1-5" 1}
:num_alerts_per_card {"11-25" 1}
:num_cards_per_alerts {"1-5" 1, "6-10" 1}}}
(tt/with-temp* [Card [c]
;; ---------- Pulses ----------
Pulse [p1]
Pulse [p2]
Pulse [p3]
PulseChannel [_ {:pulse_id (u/get-id p1), :schedule_type "daily", :channel_type "email"}]
PulseChannel [_ {:pulse_id (u/get-id p1), :schedule_type "weekly", :channel_type "email"}]
PulseChannel [_ {:pulse_id (u/get-id p2), :schedule_type "daily", :channel_type "slack"}]
;; Pulse 1 gets 2 Cards (1 CSV)
PulseCard [_ {:pulse_id (u/get-id p1), :card_id (u/get-id c)}]
PulseCard [_ {:pulse_id (u/get-id p1), :card_id (u/get-id c), :include_csv true}]
;; Pulse 2 gets 1 Card
PulseCard [_ {:pulse_id (u/get-id p1), :card_id (u/get-id c)}]
;; Pulse 3 gets 7 Cards (1 CSV, 2 XLS, 2 BOTH)
PulseCard [_ {:pulse_id (u/get-id p3), :card_id (u/get-id c)}]
PulseCard [_ {:pulse_id (u/get-id p3), :card_id (u/get-id c)}]
PulseCard [_ {:pulse_id (u/get-id p3), :card_id (u/get-id c), :include_csv true}]
PulseCard [_ {:pulse_id (u/get-id p3), :card_id (u/get-id c), :include_xls true}]
PulseCard [_ {:pulse_id (u/get-id p3), :card_id (u/get-id c), :include_xls true}]
PulseCard [_ {:pulse_id (u/get-id p3), :card_id (u/get-id c), :include_csv true, :include_xls true}]
PulseCard [_ {:pulse_id (u/get-id p3), :card_id (u/get-id c), :include_csv true, :include_xls true}]
;; ---------- Alerts ----------
Pulse [a1 {:alert_condition "rows", :alert_first_only false}]
Pulse [a2 {:alert_condition "rows", :alert_first_only true }]
Pulse [a3 {:alert_condition "goal", :alert_first_only false}]
Pulse [a4 {:alert_condition "goal", :alert_first_only false, :alert_above_goal true}]
;; Alert 1 is Email, Alert 2 is Email & Slack, Alert 3 is Slack-only
PulseChannel [_ {:pulse_id (u/get-id a1), :channel_type "email"}]
PulseChannel [_ {:pulse_id (u/get-id a1), :channel_type "email"}]
PulseChannel [_ {:pulse_id (u/get-id a2), :channel_type "slack"}]
PulseChannel [_ {:pulse_id (u/get-id a3), :channel_type "slack"}]
;; Alert 1 gets 2 Cards (1 CSV)
PulseCard [_ {:pulse_id (u/get-id a1), :card_id (u/get-id c)}]
PulseCard [_ {:pulse_id (u/get-id a1), :card_id (u/get-id c), :include_csv true}]
;; Alert 2 gets 1 Card
PulseCard [_ {:pulse_id (u/get-id a1), :card_id (u/get-id c)}]
;; Alert 3 gets 7 Cards (1 CSV, 2 XLS, 2 BOTH)
PulseCard [_ {:pulse_id (u/get-id a3), :card_id (u/get-id c)}]
PulseCard [_ {:pulse_id (u/get-id a3), :card_id (u/get-id c)}]
PulseCard [_ {:pulse_id (u/get-id a3), :card_id (u/get-id c), :include_csv true}]
PulseCard [_ {:pulse_id (u/get-id a3), :card_id (u/get-id c), :include_xls true}]
PulseCard [_ {:pulse_id (u/get-id a3), :card_id (u/get-id c), :include_xls true}]
PulseCard [_ {:pulse_id (u/get-id a3), :card_id (u/get-id c), :include_csv true, :include_xls true}]
PulseCard [_ {:pulse_id (u/get-id a3), :card_id (u/get-id c), :include_csv true, :include_xls true}]
;; Alert 4 gets 3 Cards
PulseCard [_ {:pulse_id (u/get-id a3), :card_id (u/get-id c)}]
PulseCard [_ {:pulse_id (u/get-id a3), :card_id (u/get-id c)}]
PulseCard [_ {:pulse_id (u/get-id a3), :card_id (u/get-id c)}]]
{:pulses (#'metabase.util.stats/pulse-metrics)
:alerts (#'metabase.util.stats/alert-metrics)}))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment