Skip to content
Snippets Groups Projects
Unverified Commit 8c66c473 authored by Ngoc Khuat's avatar Ngoc Khuat Committed by GitHub
Browse files

Add snowplow tracking for new records on TaskHistory (#23893)

* add snowplow tracking for new records on TaskHistory

* more tests and have a better way to figure out db_id, db_engine
parent c413a00d
No related merge requests found
......@@ -242,28 +242,26 @@ describeWithSnowplow("scenarios > setup", () => {
});
it("should send snowplow events", () => {
// 1 - new_instance_created
// 2 - pageview
// 1 - pageview
cy.visit(`/setup`);
// 3 - setup/step_seen
// 2 - setup/step_seen
cy.findByText("Welcome to Metabase");
cy.button("Let's get started").click();
// 4 - setup/step_seen
// 3 - setup/step_seen
cy.findByText("What's your preferred language?");
expectGoodSnowplowEvents(4);
expectGoodSnowplowEvents(3);
});
it("should ignore snowplow failures and work as normal", () => {
// 1 - new_instance_created
blockSnowplow();
cy.visit(`/setup`);
cy.findByText("Welcome to Metabase");
cy.button("Let's get started").click();
expectGoodSnowplowEvents(1);
expectGoodSnowplowEvents(0);
});
});
{
"$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#",
"description": "Task history",
"self": {
"vendor": "com.metabase",
"name": "task",
"format": "jsonschema",
"version": "1-0-0"
},
"type": "object",
"properties": {
"task_id": {
"description": "Unique identifier for the TaskHistory within the Metabase instance",
"type": "integer",
"minimum": 0,
"maximum": 2147483647
},
"task_name": {
"description": "TaskHistory name",
"type": "string",
"maxLength": 512
},
"db_id": {
"description": "Database that the task was executed against",
"type": [
"integer",
"null"
],
"minimum": 0,
"maximum": 2147483647
},
"db_engine": {
"description": "The engine of the Database that the task was executed against",
"type": [
"string",
"null"
],
"maxLength": 64
},
"started_at": {
"description": "The start time of the task",
"type": [
"string",
"null"
],
"format": "date-time",
"maxLength": 1024
},
"ended_at": {
"description": "The finished time of the task",
"type": [
"string",
"null"
],
"format": "date-time",
"maxLength": 1024
},
"duration": {
"description": "The duration of the task in milliseconds",
"type": "integer",
"minimum": 0,
"maximum": 2147483647
},
"task-details": {
"description": "the JSON-encoded details of the task",
"type": [
"string",
"null"
],
"maxLength": 2048
}
},
"required": [
"task_id",
"task_name",
"duration"
],
"additionalProperties": true
}
......@@ -118,7 +118,8 @@
::dashboard "1-0-0"
::database "1-0-0"
::instance "1-1-0"
::timeline "1-0-0"})
::timeline "1-0-0"
::task "1-0-0"})
(defn- context
"Common context included in every analytics event"
......@@ -160,7 +161,8 @@
::question-added-to-dashboard ::dashboard
::database-connection-successful ::database
::database-connection-failed ::database
::new-event-created ::timeline})
::new-event-created ::timeline
::new-task-history ::task})
(defn track-event!
"Send a single analytics event to the Snowplow collector, if tracking is enabled for this MB instance and a collector
......
(ns metabase.models.task-history
(:require [cheshire.generate :refer [add-encoder encode-map]]
(:require [cheshire.core :as json]
[cheshire.generate :refer [add-encoder encode-map]]
[clojure.tools.logging :as log]
[java-time :as t]
[metabase.analytics.snowplow :as snowplow]
[metabase.api.common :refer [*current-user-id*]]
[metabase.models.database :refer [Database]]
[metabase.models.interface :as mi]
[metabase.models.permissions :as perms]
[metabase.public-settings.premium-features :as premium-features]
[metabase.util :as u]
[metabase.util.date-2 :as u.date]
[metabase.util.i18n :refer [trs]]
[metabase.util.schema :as su]
[schema.core :as s]
......@@ -37,10 +42,30 @@
(perms/application-perms-path :monitoring)
"/")})
(defn- task->snowplow-event
[task]
(let [task-details (:task_details task)]
(merge {:task_id (:id task)
:task_name (:task task)
:duration (:duration task)
:task_details (json/generate-string task-details)
:started_at (u.date/format (:started_at task))
:ended_at (u.date/format (:ended_at task))}
(when-let [db-id (:db_id task)]
{:db_id db-id
:db_engine (db/select-one-field :engine Database :id db-id)}))))
(defn- post-insert
[task]
(u/prog1 task
(snowplow/track-event! ::snowplow/new-task-history *current-user-id* (task->snowplow-event <>))))
(u/strict-extend (class TaskHistory)
models/IModel
(merge models/IModelDefaults
{:types (constantly {:task_details :json})})
{:types (constantly {:task_details :json})
:post-insert post-insert})
mi/IObjectPermissions
(merge mi/IObjectPermissionsDefaults
{:can-read? (partial mi/current-user-has-full-permissions? :read)
......
......@@ -458,13 +458,17 @@
database :- i/DatabaseInstance
{:keys [steps] :as sync-md} :- SyncOperationMetadata]
(try
(db/insert-many! TaskHistory
(cons (create-task-history operation database sync-md)
(for [[step-name step-info] steps
:let [task-details (dissoc step-info :start-time :end-time :log-summary-fn)]]
(assoc (create-task-history step-name database step-info)
:task_details (when (seq task-details)
task-details)))))
(->> (for [[step-name step-info] steps
:let [task-details (dissoc step-info :start-time :end-time :log-summary-fn)]]
(assoc (create-task-history step-name database step-info)
:task_details (when (seq task-details)
task-details)))
(cons (create-task-history operation database sync-md))
;; Using `insert!` instead of `insert-many!` here to make sure
;; `post-insert` is triggered
(map #(db/insert! TaskHistory %))
(map :id)
doall)
(catch Throwable e
(log/warn e (trs "Error saving task history")))))
......
......@@ -49,7 +49,8 @@
(let [pulse-id->channels (group-by :pulse_id (pulse-channel/retrieve-scheduled-channels hour weekday monthday monthweek))]
(doseq [[pulse-id channels] pulse-id->channels]
(try
(task-history/with-task-history {:task (format "send-pulse %s" pulse-id)}
(task-history/with-task-history {:task "send-pulse"
:task_details {:pulse-id pulse-id}}
(log/debug (trs "Starting Pulse Execution: {0}" pulse-id))
(when-let [pulse (pulse/retrieve-notification pulse-id :archived false)]
(metabase.pulse/send-pulse! pulse :channel-ids (map :id channels)))
......
(ns metabase.models.task-history-test
(:require [clojure.test :refer :all]
(:require [cheshire.core :as json]
[clojure.test :refer :all]
[java-time :as t]
[metabase.models.task-history :as task-history :refer [TaskHistory]]
[metabase.analytics.snowplow-test :as snowplow-test]
[metabase.api.common :refer [*current-user-id*]]
[metabase.models :refer [Database TaskHistory]]
[metabase.models.task-history :as task-history]
[metabase.test :as mt]
[metabase.util :as u]
[toucan.db :as db]))
......@@ -67,3 +71,62 @@
(task-history/cleanup-task-history! 100)
(is (= #{task-1 task-2}
(set (map :task (TaskHistory)))))))))
(defn- insert-then-pop!
"Insert a task history and get the last snowplow event."
[task]
(db/insert! TaskHistory task)
(-> (snowplow-test/pop-event-data-and-user-id!)
last
mt/boolean-ids-and-timestamps
(update-in [:data "task_details"] json/parse-string)))
(deftest snowplow-tracking-test
(snowplow-test/with-fake-snowplow-collector
(let [t1 (t/zoned-date-time)]
(testing "inserting a task history should track a snowplow event"
(is (= {:data {"duration" 10
"ended_at" true
"started_at" true
"event" "new_task_history"
"task_details" {"apple" 40, "orange" 2}
"task_id" true
"task_name" "a fake task"}
:user-id nil}
(insert-then-pop! (assoc (make-10-millis-task t1)
:task "a fake task"
:task_details {:apple 40
:orange 2}))))
(testing "should have user id if *current-user-id* is bound"
(binding [*current-user-id* 1]
(is (= {:data {"duration" 10
"ended_at" true
"started_at" true
"event" "new_task_history"
"task_details" {"apple" 40, "orange" 2}
"task_id" true
"task_name" "a fake task"}
:user-id "1"}
(insert-then-pop! (assoc (make-10-millis-task t1)
:task "a fake task"
:task_details {:apple 40
:orange 2}))))))
(testing "infer db_engine if db_id exists"
(mt/with-temp Database [{db-id :id} {:engine "postgres"}]
(is (= {:data {"duration" 10
"ended_at" true
"started_at" true
"db_id" true
"db_engine" "postgres"
"event" "new_task_history"
"task_details" {"apple" 40, "orange" 2}
"task_id" true
"task_name" "a fake task"}
:user-id nil}
(insert-then-pop! (assoc (make-10-millis-task t1)
:task "a fake task"
:db_id db-id
:task_details {:apple 40
:orange 2}))))))))))
(ns metabase.sync.analyze-test
(:require [clojure.test :refer :all]
[metabase.analytics.snowplow-test :as snowplow-test]
[metabase.models.database :refer [Database]]
[metabase.models.field :as field :refer [Field]]
[metabase.models.table :refer [Table]]
......@@ -98,8 +99,8 @@
(defn- classified-semantic-type [values]
(let [field (field/map->FieldInstance {:base_type :type/Text})]
(:semantic_type (classifiers.text-fingerprint/infer-semantic-type
field
(transduce identity (fingerprinters/fingerprinter field) values)))))
field
(transduce identity (fingerprinters/fingerprinter field) values)))))
(deftest classify-json-test
(doseq [[group values->expected] {"When all the values are valid JSON dicts they're valid JSON"
......@@ -252,3 +253,24 @@
(is (= last-sync-time
(latest-sync-time table))
"sync time shouldn't change")))))
(deftest analyze-should-send-a-snowplow-event-test
(testing "the recorded event should include db-id and db-engine"
(snowplow-test/with-fake-snowplow-collector
(mt/with-temp* [Table [table (fake-table)]
Field [_field (fake-field table)]]
(analyze-table! table)
(is (= {:data {"task_id" true
"event" "new_task_history"
"started_at" true
"ended_at" true
"duration" true
"db_engine" (name (db/select-one-field :engine Database :id (mt/id)))
"db_id" true
"task_name" "classify-tables"}
:user-id nil}
(-> (snowplow-test/pop-event-data-and-user-id!)
last
mt/boolean-ids-and-timestamps
(update :data dissoc "task_details")
(update-in [:data "duration"] some?))))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment