Skip to content
Snippets Groups Projects
Unverified Commit 1d195113 authored by bryan's avatar bryan Committed by GitHub
Browse files

More grouped metrics data (#48900)

* Adds query_executions_by_source_24h and entity_id_translations_last_24h

- add docstring

* clear eid translation count in stats-post-cleanup

* remove ->> with after? it's confusing

* dissoc the right path
parent a249c0bc
No related branches found
No related tags found
No related merge requests found
......@@ -612,27 +612,42 @@
(t/minus (t/offset-date-time) (t/days 1)))
(defn- ->snowplow-grouped-metric-info []
{:query_executions (merge
{"sdk_embed" 0 "interactive_embed" 0 "static_embed" 0 "public_link" 0 "internal" 0}
(-> categorize-query-execution
(group-by
(t2/select [:model/QueryExecution :embedding_client :executor_id]))
(update-vals count)))})
(defn- snowplow-grouped-metrics [{query-executions :query_executions :as _snowplow-grouped-metric-info}]
(->> [{:name :query_executions_by_source
:values (mapv (fn [qe-group]
{:group qe-group :value (get query-executions qe-group)})
["interactive_embed" "internal" "public_link" "sdk_embed" "static_embed"])
:tags ["embedding"]}]
(walk/postwalk (fn [x] (if (keyword? x) (-> x u/->snake_case_en name) x)))))
(let [qe (t2/select [:model/QueryExecution :embedding_client :executor_id :started_at])
one-day-ago (->one-day-ago)
;; reuse the query data:
qe-24h (filter (fn [{started-at :started_at}] (t/after? one-day-ago started-at)) qe)]
{:query-executions (merge
{"sdk_embed" 0 "interactive_embed" 0 "static_embed" 0 "public_link" 0 "internal" 0}
(-> (group-by categorize-query-execution qe)
(update-vals count)))
:query-executions-24h (merge
{"sdk_embed" 0 "interactive_embed" 0 "static_embed" 0 "public_link" 0 "internal" 0}
(-> (group-by categorize-query-execution qe-24h)
(update-vals count)))
:eid-translations-24h (get-translation-count)}))
(defn- snowplow-grouped-metrics [{:keys [eid-translations-24h
query-executions
query-executions-24h]
:as _snowplow-grouped-metric-info}]
[{:name :query_executions_by_source
:values (mapv (fn [qe-group]
{:group qe-group :value (get query-executions qe-group)})
["interactive_embed" "internal" "public_link" "sdk_embed" "static_embed"])
:tags ["embedding"]}
{:name :query_executions_by_source_24h
:values (mapv (fn [qe-group] {:group qe-group :value (get query-executions-24h qe-group)})
["interactive_embed" "internal" "public_link" "sdk_embed" "static_embed"])
:tags ["embedding"]}
{:name :entity_id_translations_last_24h
:values eid-translations-24h
:tags ["embedding"]}])
(defn- ->snowplow-metric-info
"Collects Snowplow metrics data that is not in the legacy stats format. Also clears entity id translation count."
[]
(let [one-day-ago (->one-day-ago)
total-translation-count (:total (get-translation-count))
_ (clear-translation-count!)]
total-translation-count (:total (get-translation-count))]
{:models (t2/count :model/Card :type :model :archived false)
:new_embedded_dashboards (t2/count :model/Dashboard
:enable_embedding true
......@@ -855,12 +870,12 @@
grouped-metrics (snowplow-grouped-metrics (->snowplow-grouped-metric-info))
features (snowplow-features)]
;; grouped_metrics and settings are required in the json schema, but their data will be included in the next Milestone:
{:analytics_uuid (snowplow/analytics-uuid)
:features features
:grouped_metrics grouped-metrics
:instance_attributes instance-attributes
:metrics metrics
:settings []}))
{"analytics_uuid" (snowplow/analytics-uuid)
"features" features
"grouped_metrics" grouped-metrics
"instance_attributes" instance-attributes
"metrics" metrics
"settings" []}))
(defn- generate-instance-stats!
"Generate stats for this instance as data"
......@@ -870,9 +885,19 @@
;; `:num_queries_cached_unbinned` is added to [[legacy-anonymous-usage-stats]]'s return value to make
;; computing [[snowplow-anonymous-usage-stats]] more efficient. It shouldn't be sent by
;; [[send-stats-deprecited!]].
(update-in [:stats :stats :cache] dissoc :num_queries_cached_unbinned))
(update-in [:stats :cache] dissoc :num_queries_cached_unbinned))
:snowplow-stats (snowplow-anonymous-usage-stats stats)}))
(defn- deep-string-keywords
"Snowplow data will not work if you pass in keywords, but this will let use use keywords all over."
[data]
(walk/postwalk
(fn [x] (if (keyword? x) (-> x u/->snake_case_en name) x))
data))
(defn- stats-post-cleanup []
(clear-translation-count!))
(defn phone-home-stats!
"Collect usage stats and phone them home"
[]
......@@ -881,12 +906,14 @@
{:keys [stats snowplow-stats]} (generate-instance-stats!)
end-time-ms (System/currentTimeMillis)
elapsed-secs (quot (- end-time-ms start-time-ms) 1000)
snowplow-data (assoc snowplow-stats
:metadata [{"key" "stats_export_time_seconds"
"value" elapsed-secs}])]
(assert (= #{:analytics_uuid :features :grouped_metrics :instance_attributes :metadata :metrics :settings}
snowplow-data (-> snowplow-stats
(assoc "metadata" [{"key" "stats_export_time_seconds"
"value" elapsed-secs}])
deep-string-keywords)]
(assert (= #{"analytics_uuid" "features" "grouped_metrics" "instance_attributes" "metadata" "metrics" "settings"}
(set (keys snowplow-data)))
(str "Missing required keys in snowplow-data. got:" (sort (keys snowplow-data))))
#_{:clj-kondo/ignore [:deprecated-var]}
(send-stats-deprecated! stats)
(snowplow/track-event! ::snowplow/instance_stats snowplow-data))))
(snowplow/track-event! ::snowplow/instance_stats snowplow-data)
(stats-post-cleanup))))
(ns metabase.eid-translation)
(def statuses
"Possible statuses from an entity-id -> id translation:
If the translation from entity-id -> id is successful, the status is `:ok`.
If the id is not found, the status is `:not-found`.
If the format of the entity-id is invalid, the status is `:invalid-format`."
[:ok :not-found :invalid-format])
(def Status
"Malli enum for possible statuses for entity_id -> id translations."
[:enum :ok :not-found :invalid-format])
(into [:enum] statuses))
(def default-counter
"The empty counter for tracking the number of entity_id -> id translations."
(zipmap (rest Status) (repeat 0)))
(zipmap statuses (repeat 0)))
......@@ -461,3 +461,13 @@
;; make sure features are not duplicated
(is (= (count included-features) (count included-features-set))))))
(deftest snowplow-grouped-metric-info-test
(testing "query_executions"
(let [{:keys [query_executions query_executions_24h]} (#'stats/->snowplow-grouped-metric-info)]
(doseq [k (keys query_executions)]
(testing (str "> key " k))
(is (contains? query_executions_24h k))
(is (not (< (get query_executions k)
(get query_executions_24h k)))
"There are never more query executions in the 24h version than all-of-time.")))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment