Skip to content
Snippets Groups Projects
Unverified Commit 1d195113 authored by bryan's avatar bryan Committed by GitHub
Browse files

More grouped metrics data (#48900)

* Adds query_executions_by_source_24h and entity_id_translations_last_24h

- add docstring

* clear eid translation count in stats-post-cleanup

* remove ->> with after? it's confusing

* dissoc the right path
parent a249c0bc
No related branches found
No related tags found
No related merge requests found
...@@ -612,27 +612,42 @@ ...@@ -612,27 +612,42 @@
(t/minus (t/offset-date-time) (t/days 1))) (t/minus (t/offset-date-time) (t/days 1)))
(defn- ->snowplow-grouped-metric-info [] (defn- ->snowplow-grouped-metric-info []
{:query_executions (merge (let [qe (t2/select [:model/QueryExecution :embedding_client :executor_id :started_at])
{"sdk_embed" 0 "interactive_embed" 0 "static_embed" 0 "public_link" 0 "internal" 0} one-day-ago (->one-day-ago)
(-> categorize-query-execution ;; reuse the query data:
(group-by qe-24h (filter (fn [{started-at :started_at}] (t/after? one-day-ago started-at)) qe)]
(t2/select [:model/QueryExecution :embedding_client :executor_id])) {:query-executions (merge
(update-vals count)))}) {"sdk_embed" 0 "interactive_embed" 0 "static_embed" 0 "public_link" 0 "internal" 0}
(-> (group-by categorize-query-execution qe)
(defn- snowplow-grouped-metrics [{query-executions :query_executions :as _snowplow-grouped-metric-info}] (update-vals count)))
(->> [{:name :query_executions_by_source :query-executions-24h (merge
:values (mapv (fn [qe-group] {"sdk_embed" 0 "interactive_embed" 0 "static_embed" 0 "public_link" 0 "internal" 0}
{:group qe-group :value (get query-executions qe-group)}) (-> (group-by categorize-query-execution qe-24h)
["interactive_embed" "internal" "public_link" "sdk_embed" "static_embed"]) (update-vals count)))
:tags ["embedding"]}] :eid-translations-24h (get-translation-count)}))
(walk/postwalk (fn [x] (if (keyword? x) (-> x u/->snake_case_en name) x)))))
(defn- snowplow-grouped-metrics [{:keys [eid-translations-24h
query-executions
query-executions-24h]
:as _snowplow-grouped-metric-info}]
[{:name :query_executions_by_source
:values (mapv (fn [qe-group]
{:group qe-group :value (get query-executions qe-group)})
["interactive_embed" "internal" "public_link" "sdk_embed" "static_embed"])
:tags ["embedding"]}
{:name :query_executions_by_source_24h
:values (mapv (fn [qe-group] {:group qe-group :value (get query-executions-24h qe-group)})
["interactive_embed" "internal" "public_link" "sdk_embed" "static_embed"])
:tags ["embedding"]}
{:name :entity_id_translations_last_24h
:values eid-translations-24h
:tags ["embedding"]}])
(defn- ->snowplow-metric-info (defn- ->snowplow-metric-info
"Collects Snowplow metrics data that is not in the legacy stats format. Also clears entity id translation count." "Collects Snowplow metrics data that is not in the legacy stats format. Also clears entity id translation count."
[] []
(let [one-day-ago (->one-day-ago) (let [one-day-ago (->one-day-ago)
total-translation-count (:total (get-translation-count)) total-translation-count (:total (get-translation-count))]
_ (clear-translation-count!)]
{:models (t2/count :model/Card :type :model :archived false) {:models (t2/count :model/Card :type :model :archived false)
:new_embedded_dashboards (t2/count :model/Dashboard :new_embedded_dashboards (t2/count :model/Dashboard
:enable_embedding true :enable_embedding true
...@@ -855,12 +870,12 @@ ...@@ -855,12 +870,12 @@
grouped-metrics (snowplow-grouped-metrics (->snowplow-grouped-metric-info)) grouped-metrics (snowplow-grouped-metrics (->snowplow-grouped-metric-info))
features (snowplow-features)] features (snowplow-features)]
;; grouped_metrics and settings are required in the json schema, but their data will be included in the next Milestone: ;; grouped_metrics and settings are required in the json schema, but their data will be included in the next Milestone:
{:analytics_uuid (snowplow/analytics-uuid) {"analytics_uuid" (snowplow/analytics-uuid)
:features features "features" features
:grouped_metrics grouped-metrics "grouped_metrics" grouped-metrics
:instance_attributes instance-attributes "instance_attributes" instance-attributes
:metrics metrics "metrics" metrics
:settings []})) "settings" []}))
(defn- generate-instance-stats! (defn- generate-instance-stats!
"Generate stats for this instance as data" "Generate stats for this instance as data"
...@@ -870,9 +885,19 @@ ...@@ -870,9 +885,19 @@
;; `:num_queries_cached_unbinned` is added to [[legacy-anonymous-usage-stats]]'s return value to make ;; `:num_queries_cached_unbinned` is added to [[legacy-anonymous-usage-stats]]'s return value to make
;; computing [[snowplow-anonymous-usage-stats]] more efficient. It shouldn't be sent by ;; computing [[snowplow-anonymous-usage-stats]] more efficient. It shouldn't be sent by
;; [[send-stats-deprecited!]]. ;; [[send-stats-deprecited!]].
(update-in [:stats :stats :cache] dissoc :num_queries_cached_unbinned)) (update-in [:stats :cache] dissoc :num_queries_cached_unbinned))
:snowplow-stats (snowplow-anonymous-usage-stats stats)})) :snowplow-stats (snowplow-anonymous-usage-stats stats)}))
(defn- deep-string-keywords
"Snowplow data will not work if you pass in keywords, but this will let use use keywords all over."
[data]
(walk/postwalk
(fn [x] (if (keyword? x) (-> x u/->snake_case_en name) x))
data))
(defn- stats-post-cleanup []
(clear-translation-count!))
(defn phone-home-stats! (defn phone-home-stats!
"Collect usage stats and phone them home" "Collect usage stats and phone them home"
[] []
...@@ -881,12 +906,14 @@ ...@@ -881,12 +906,14 @@
{:keys [stats snowplow-stats]} (generate-instance-stats!) {:keys [stats snowplow-stats]} (generate-instance-stats!)
end-time-ms (System/currentTimeMillis) end-time-ms (System/currentTimeMillis)
elapsed-secs (quot (- end-time-ms start-time-ms) 1000) elapsed-secs (quot (- end-time-ms start-time-ms) 1000)
snowplow-data (assoc snowplow-stats snowplow-data (-> snowplow-stats
:metadata [{"key" "stats_export_time_seconds" (assoc "metadata" [{"key" "stats_export_time_seconds"
"value" elapsed-secs}])] "value" elapsed-secs}])
(assert (= #{:analytics_uuid :features :grouped_metrics :instance_attributes :metadata :metrics :settings} deep-string-keywords)]
(assert (= #{"analytics_uuid" "features" "grouped_metrics" "instance_attributes" "metadata" "metrics" "settings"}
(set (keys snowplow-data))) (set (keys snowplow-data)))
(str "Missing required keys in snowplow-data. got:" (sort (keys snowplow-data)))) (str "Missing required keys in snowplow-data. got:" (sort (keys snowplow-data))))
#_{:clj-kondo/ignore [:deprecated-var]} #_{:clj-kondo/ignore [:deprecated-var]}
(send-stats-deprecated! stats) (send-stats-deprecated! stats)
(snowplow/track-event! ::snowplow/instance_stats snowplow-data)))) (snowplow/track-event! ::snowplow/instance_stats snowplow-data)
(stats-post-cleanup))))
(ns metabase.eid-translation) (ns metabase.eid-translation)
(def statuses
"Possible statuses from an entity-id -> id translation:
If the translation from entity-id -> id is successful, the status is `:ok`.
If the id is not found, the status is `:not-found`.
If the format of the entity-id is invalid, the status is `:invalid-format`."
[:ok :not-found :invalid-format])
(def Status (def Status
"Malli enum for possible statuses for entity_id -> id translations." "Malli enum for possible statuses for entity_id -> id translations."
[:enum :ok :not-found :invalid-format]) (into [:enum] statuses))
(def default-counter (def default-counter
"The empty counter for tracking the number of entity_id -> id translations." "The empty counter for tracking the number of entity_id -> id translations."
(zipmap (rest Status) (repeat 0))) (zipmap statuses (repeat 0)))
...@@ -461,3 +461,13 @@ ...@@ -461,3 +461,13 @@
;; make sure features are not duplicated ;; make sure features are not duplicated
(is (= (count included-features) (count included-features-set)))))) (is (= (count included-features) (count included-features-set))))))
(deftest snowplow-grouped-metric-info-test
(testing "query_executions"
(let [{:keys [query_executions query_executions_24h]} (#'stats/->snowplow-grouped-metric-info)]
(doseq [k (keys query_executions)]
(testing (str "> key " k))
(is (contains? query_executions_24h k))
(is (not (< (get query_executions k)
(get query_executions_24h k)))
"There are never more query executions in the 24h version than all-of-time.")))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment