diff --git a/frontend/test/metabase/scenarios/onboarding/setup/setup.cy.spec.js b/frontend/test/metabase/scenarios/onboarding/setup/setup.cy.spec.js index 8f6047f1ac06384aaa13beba3734ead14297f6a4..dc5af1847ee65ea62bcfe6b889477282cd884ecb 100644 --- a/frontend/test/metabase/scenarios/onboarding/setup/setup.cy.spec.js +++ b/frontend/test/metabase/scenarios/onboarding/setup/setup.cy.spec.js @@ -242,28 +242,26 @@ describeWithSnowplow("scenarios > setup", () => { }); it("should send snowplow events", () => { - // 1 - new_instance_created - // 2 - pageview + // 1 - pageview cy.visit(`/setup`); - // 3 - setup/step_seen + // 2 - setup/step_seen cy.findByText("Welcome to Metabase"); cy.button("Let's get started").click(); - // 4 - setup/step_seen + // 3 - setup/step_seen cy.findByText("What's your preferred language?"); - expectGoodSnowplowEvents(4); + expectGoodSnowplowEvents(3); }); it("should ignore snowplow failures and work as normal", () => { - // 1 - new_instance_created blockSnowplow(); cy.visit(`/setup`); cy.findByText("Welcome to Metabase"); cy.button("Let's get started").click(); - expectGoodSnowplowEvents(1); + expectGoodSnowplowEvents(0); }); }); diff --git a/snowplow/iglu-client-embedded/schemas/com.metabase/task/jsonschema/1-0-0 b/snowplow/iglu-client-embedded/schemas/com.metabase/task/jsonschema/1-0-0 new file mode 100644 index 0000000000000000000000000000000000000000..7c6b67c5edac9009ac5e318167e5645f290eb010 --- /dev/null +++ b/snowplow/iglu-client-embedded/schemas/com.metabase/task/jsonschema/1-0-0 @@ -0,0 +1,79 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Task history", + "self": { + "vendor": "com.metabase", + "name": "task", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "task_id": { + "description": "Unique identifier for the TaskHistory within the Metabase instance", + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "task_name": { + "description": "TaskHistory name", + "type": "string", + "maxLength": 512 + }, + "db_id": { + "description": "Database that the task was executed against", + "type": [ + "integer", + "null" + ], + "minimum": 0, + "maximum": 2147483647 + }, + "db_engine": { + "description": "The engine of the Database that the task was executed against", + "type": [ + "string", + "null" + ], + "maxLength": 64 + }, + "started_at": { + "description": "The start time of the task", + "type": [ + "string", + "null" + ], + "format": "date-time", + "maxLength": 1024 + }, + "ended_at": { + "description": "The finished time of the task", + "type": [ + "string", + "null" + ], + "format": "date-time", + "maxLength": 1024 + }, + "duration": { + "description": "The duration of the task in milliseconds", + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + }, + "task-details": { + "description": "the JSON-encoded details of the task", + "type": [ + "string", + "null" + ], + "maxLength": 2048 + } + }, + "required": [ + "task_id", + "task_name", + "duration" + ], + "additionalProperties": true +} diff --git a/src/metabase/analytics/snowplow.clj b/src/metabase/analytics/snowplow.clj index 0dd066997cbaeffae33a334d216a3a2b29e337de..d53519826295803c98746d3d5609646cc5ab1531 100644 --- a/src/metabase/analytics/snowplow.clj +++ b/src/metabase/analytics/snowplow.clj @@ -118,7 +118,8 @@ ::dashboard "1-0-0" ::database "1-0-0" ::instance "1-1-0" - ::timeline "1-0-0"}) + ::timeline "1-0-0" + ::task "1-0-0"}) (defn- context "Common context included in every analytics event" @@ -160,7 +161,8 @@ ::question-added-to-dashboard ::dashboard ::database-connection-successful ::database ::database-connection-failed ::database - ::new-event-created ::timeline}) + ::new-event-created ::timeline + ::new-task-history ::task}) (defn track-event! "Send a single analytics event to the Snowplow collector, if tracking is enabled for this MB instance and a collector diff --git a/src/metabase/models/task_history.clj b/src/metabase/models/task_history.clj index d443d549797d3a8ab6b6b1b6f7d957886a8fcf46..bbf450f28de22e5cfb5152656de1c547cd8e686a 100644 --- a/src/metabase/models/task_history.clj +++ b/src/metabase/models/task_history.clj @@ -1,11 +1,16 @@ (ns metabase.models.task-history - (:require [cheshire.generate :refer [add-encoder encode-map]] + (:require [cheshire.core :as json] + [cheshire.generate :refer [add-encoder encode-map]] [clojure.tools.logging :as log] [java-time :as t] + [metabase.analytics.snowplow :as snowplow] + [metabase.api.common :refer [*current-user-id*]] + [metabase.models.database :refer [Database]] [metabase.models.interface :as mi] [metabase.models.permissions :as perms] [metabase.public-settings.premium-features :as premium-features] [metabase.util :as u] + [metabase.util.date-2 :as u.date] [metabase.util.i18n :refer [trs]] [metabase.util.schema :as su] [schema.core :as s] @@ -37,10 +42,30 @@ (perms/application-perms-path :monitoring) "/")}) +(defn- task->snowplow-event + [task] + (let [task-details (:task_details task)] + (merge {:task_id (:id task) + :task_name (:task task) + :duration (:duration task) + :task_details (json/generate-string task-details) + :started_at (u.date/format (:started_at task)) + :ended_at (u.date/format (:ended_at task))} + (when-let [db-id (:db_id task)] + {:db_id db-id + :db_engine (db/select-one-field :engine Database :id db-id)})))) + +(defn- post-insert + [task] + (u/prog1 task + (snowplow/track-event! ::snowplow/new-task-history *current-user-id* (task->snowplow-event <>)))) + (u/strict-extend (class TaskHistory) models/IModel (merge models/IModelDefaults - {:types (constantly {:task_details :json})}) + {:types (constantly {:task_details :json}) + :post-insert post-insert}) + mi/IObjectPermissions (merge mi/IObjectPermissionsDefaults {:can-read? (partial mi/current-user-has-full-permissions? :read) diff --git a/src/metabase/sync/util.clj b/src/metabase/sync/util.clj index b0f997bdf63b05a368688446171045b041852e50..13d1bcf760449342d09d241fe994668b4d7d4d0e 100644 --- a/src/metabase/sync/util.clj +++ b/src/metabase/sync/util.clj @@ -458,13 +458,17 @@ database :- i/DatabaseInstance {:keys [steps] :as sync-md} :- SyncOperationMetadata] (try - (db/insert-many! TaskHistory - (cons (create-task-history operation database sync-md) - (for [[step-name step-info] steps - :let [task-details (dissoc step-info :start-time :end-time :log-summary-fn)]] - (assoc (create-task-history step-name database step-info) - :task_details (when (seq task-details) - task-details))))) + (->> (for [[step-name step-info] steps + :let [task-details (dissoc step-info :start-time :end-time :log-summary-fn)]] + (assoc (create-task-history step-name database step-info) + :task_details (when (seq task-details) + task-details))) + (cons (create-task-history operation database sync-md)) + ;; Using `insert!` instead of `insert-many!` here to make sure + ;; `post-insert` is triggered + (map #(db/insert! TaskHistory %)) + (map :id) + doall) (catch Throwable e (log/warn e (trs "Error saving task history"))))) diff --git a/src/metabase/task/send_pulses.clj b/src/metabase/task/send_pulses.clj index b27ef5edb4abaf7cdc81a219be4b4779a656ca56..5eeb6ab4f71e51c233a5a1b80d74edae5f90cb04 100644 --- a/src/metabase/task/send_pulses.clj +++ b/src/metabase/task/send_pulses.clj @@ -49,7 +49,8 @@ (let [pulse-id->channels (group-by :pulse_id (pulse-channel/retrieve-scheduled-channels hour weekday monthday monthweek))] (doseq [[pulse-id channels] pulse-id->channels] (try - (task-history/with-task-history {:task (format "send-pulse %s" pulse-id)} + (task-history/with-task-history {:task "send-pulse" + :task_details {:pulse-id pulse-id}} (log/debug (trs "Starting Pulse Execution: {0}" pulse-id)) (when-let [pulse (pulse/retrieve-notification pulse-id :archived false)] (metabase.pulse/send-pulse! pulse :channel-ids (map :id channels))) diff --git a/test/metabase/models/task_history_test.clj b/test/metabase/models/task_history_test.clj index b75b7ce258439e8a6cf322da8330d65f682537ea..fb69bfe4fefdf6379cad5abdf56ced908b9401e9 100644 --- a/test/metabase/models/task_history_test.clj +++ b/test/metabase/models/task_history_test.clj @@ -1,7 +1,11 @@ (ns metabase.models.task-history-test - (:require [clojure.test :refer :all] + (:require [cheshire.core :as json] + [clojure.test :refer :all] [java-time :as t] - [metabase.models.task-history :as task-history :refer [TaskHistory]] + [metabase.analytics.snowplow-test :as snowplow-test] + [metabase.api.common :refer [*current-user-id*]] + [metabase.models :refer [Database TaskHistory]] + [metabase.models.task-history :as task-history] [metabase.test :as mt] [metabase.util :as u] [toucan.db :as db])) @@ -67,3 +71,62 @@ (task-history/cleanup-task-history! 100) (is (= #{task-1 task-2} (set (map :task (TaskHistory))))))))) + +(defn- insert-then-pop! + "Insert a task history and get the last snowplow event." + [task] + (db/insert! TaskHistory task) + (-> (snowplow-test/pop-event-data-and-user-id!) + last + mt/boolean-ids-and-timestamps + (update-in [:data "task_details"] json/parse-string))) + +(deftest snowplow-tracking-test + (snowplow-test/with-fake-snowplow-collector + (let [t1 (t/zoned-date-time)] + (testing "inserting a task history should track a snowplow event" + (is (= {:data {"duration" 10 + "ended_at" true + "started_at" true + "event" "new_task_history" + "task_details" {"apple" 40, "orange" 2} + "task_id" true + "task_name" "a fake task"} + :user-id nil} + (insert-then-pop! (assoc (make-10-millis-task t1) + :task "a fake task" + :task_details {:apple 40 + :orange 2})))) + + (testing "should have user id if *current-user-id* is bound" + (binding [*current-user-id* 1] + (is (= {:data {"duration" 10 + "ended_at" true + "started_at" true + "event" "new_task_history" + "task_details" {"apple" 40, "orange" 2} + "task_id" true + "task_name" "a fake task"} + :user-id "1"} + (insert-then-pop! (assoc (make-10-millis-task t1) + :task "a fake task" + :task_details {:apple 40 + :orange 2})))))) + + (testing "infer db_engine if db_id exists" + (mt/with-temp Database [{db-id :id} {:engine "postgres"}] + (is (= {:data {"duration" 10 + "ended_at" true + "started_at" true + "db_id" true + "db_engine" "postgres" + "event" "new_task_history" + "task_details" {"apple" 40, "orange" 2} + "task_id" true + "task_name" "a fake task"} + :user-id nil} + (insert-then-pop! (assoc (make-10-millis-task t1) + :task "a fake task" + :db_id db-id + :task_details {:apple 40 + :orange 2})))))))))) diff --git a/test/metabase/sync/analyze_test.clj b/test/metabase/sync/analyze_test.clj index 0859d5c8fcab089002f99a4011cc3bb0aa09a888..1c5ec1b3f38b856f003a9a88d3d6c05c9da37a46 100644 --- a/test/metabase/sync/analyze_test.clj +++ b/test/metabase/sync/analyze_test.clj @@ -1,5 +1,6 @@ (ns metabase.sync.analyze-test (:require [clojure.test :refer :all] + [metabase.analytics.snowplow-test :as snowplow-test] [metabase.models.database :refer [Database]] [metabase.models.field :as field :refer [Field]] [metabase.models.table :refer [Table]] @@ -98,8 +99,8 @@ (defn- classified-semantic-type [values] (let [field (field/map->FieldInstance {:base_type :type/Text})] (:semantic_type (classifiers.text-fingerprint/infer-semantic-type - field - (transduce identity (fingerprinters/fingerprinter field) values))))) + field + (transduce identity (fingerprinters/fingerprinter field) values))))) (deftest classify-json-test (doseq [[group values->expected] {"When all the values are valid JSON dicts they're valid JSON" @@ -252,3 +253,24 @@ (is (= last-sync-time (latest-sync-time table)) "sync time shouldn't change"))))) + +(deftest analyze-should-send-a-snowplow-event-test + (testing "the recorded event should include db-id and db-engine" + (snowplow-test/with-fake-snowplow-collector + (mt/with-temp* [Table [table (fake-table)] + Field [_field (fake-field table)]] + (analyze-table! table) + (is (= {:data {"task_id" true + "event" "new_task_history" + "started_at" true + "ended_at" true + "duration" true + "db_engine" (name (db/select-one-field :engine Database :id (mt/id))) + "db_id" true + "task_name" "classify-tables"} + :user-id nil} + (-> (snowplow-test/pop-event-data-and-user-id!) + last + mt/boolean-ids-and-timestamps + (update :data dissoc "task_details") + (update-in [:data "duration"] some?))))))))