From 8c66c47361d11a0789a3e4a77bf96223e34074e5 Mon Sep 17 00:00:00 2001
From: Ngoc Khuat <qn.khuat@gmail.com>
Date: Mon, 18 Jul 2022 20:41:58 +0700
Subject: [PATCH] Add snowplow tracking for new records on TaskHistory (#23893)

* add snowplow tracking for new records on TaskHistory

* more tests and have a better way to figure out db_id, db_engine
---
 .../onboarding/setup/setup.cy.spec.js         | 12 ++-
 .../com.metabase/task/jsonschema/1-0-0        | 79 +++++++++++++++++++
 src/metabase/analytics/snowplow.clj           |  6 +-
 src/metabase/models/task_history.clj          | 29 ++++++-
 src/metabase/sync/util.clj                    | 18 +++--
 src/metabase/task/send_pulses.clj             |  3 +-
 test/metabase/models/task_history_test.clj    | 67 +++++++++++++++-
 test/metabase/sync/analyze_test.clj           | 26 +++++-
 8 files changed, 217 insertions(+), 23 deletions(-)
 create mode 100644 snowplow/iglu-client-embedded/schemas/com.metabase/task/jsonschema/1-0-0

diff --git a/frontend/test/metabase/scenarios/onboarding/setup/setup.cy.spec.js b/frontend/test/metabase/scenarios/onboarding/setup/setup.cy.spec.js
index 8f6047f1ac0..dc5af1847ee 100644
--- a/frontend/test/metabase/scenarios/onboarding/setup/setup.cy.spec.js
+++ b/frontend/test/metabase/scenarios/onboarding/setup/setup.cy.spec.js
@@ -242,28 +242,26 @@ describeWithSnowplow("scenarios > setup", () => {
   });
 
   it("should send snowplow events", () => {
-    // 1 - new_instance_created
-    // 2 - pageview
+    // 1 - pageview
     cy.visit(`/setup`);
 
-    // 3 - setup/step_seen
+    // 2 - setup/step_seen
     cy.findByText("Welcome to Metabase");
     cy.button("Let's get started").click();
 
-    // 4 - setup/step_seen
+    // 3 - setup/step_seen
     cy.findByText("What's your preferred language?");
 
-    expectGoodSnowplowEvents(4);
+    expectGoodSnowplowEvents(3);
   });
 
   it("should ignore snowplow failures and work as normal", () => {
-    // 1 - new_instance_created
     blockSnowplow();
     cy.visit(`/setup`);
 
     cy.findByText("Welcome to Metabase");
     cy.button("Let's get started").click();
 
-    expectGoodSnowplowEvents(1);
+    expectGoodSnowplowEvents(0);
   });
 });
diff --git a/snowplow/iglu-client-embedded/schemas/com.metabase/task/jsonschema/1-0-0 b/snowplow/iglu-client-embedded/schemas/com.metabase/task/jsonschema/1-0-0
new file mode 100644
index 00000000000..7c6b67c5eda
--- /dev/null
+++ b/snowplow/iglu-client-embedded/schemas/com.metabase/task/jsonschema/1-0-0
@@ -0,0 +1,79 @@
+{
+  "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#",
+  "description": "Task history",
+  "self": {
+    "vendor": "com.metabase",
+    "name": "task",
+    "format": "jsonschema",
+    "version": "1-0-0"
+  },
+  "type": "object",
+  "properties": {
+    "task_id": {
+      "description": "Unique identifier for the TaskHistory within the Metabase instance",
+      "type": "integer",
+      "minimum": 0,
+      "maximum": 2147483647
+    },
+    "task_name": {
+      "description": "TaskHistory name",
+      "type": "string",
+      "maxLength": 512
+    },
+    "db_id": {
+      "description": "Database that the task was executed against",
+      "type": [
+        "integer",
+        "null"
+      ],
+      "minimum": 0,
+      "maximum": 2147483647
+    },
+    "db_engine": {
+      "description": "The engine of the Database that the task was executed against",
+      "type": [
+        "string",
+        "null"
+      ],
+      "maxLength": 64
+    },
+    "started_at": {
+      "description": "The start time of the task",
+      "type": [
+        "string",
+        "null"
+      ],
+      "format": "date-time",
+      "maxLength": 1024
+    },
+    "ended_at": {
+      "description": "The finished time of the task",
+      "type": [
+        "string",
+        "null"
+      ],
+      "format": "date-time",
+      "maxLength": 1024
+    },
+    "duration": {
+      "description": "The duration of the task in milliseconds",
+      "type": "integer",
+      "minimum": 0,
+      "maximum": 2147483647
+    },
+    "task-details": {
+      "description": "the JSON-encoded details of the task",
+      "type": [
+        "string",
+        "null"
+      ],
+      "maxLength": 2048
+    }
+  },
+  "required": [
+    "task_id",
+    "task_name",
+    "duration"
+  ],
+  "additionalProperties": true
+}
diff --git a/src/metabase/analytics/snowplow.clj b/src/metabase/analytics/snowplow.clj
index 0dd066997cb..d5351982629 100644
--- a/src/metabase/analytics/snowplow.clj
+++ b/src/metabase/analytics/snowplow.clj
@@ -118,7 +118,8 @@
    ::dashboard "1-0-0"
    ::database  "1-0-0"
    ::instance  "1-1-0"
-   ::timeline  "1-0-0"})
+   ::timeline  "1-0-0"
+   ::task      "1-0-0"})
 
 (defn- context
   "Common context included in every analytics event"
@@ -160,7 +161,8 @@
    ::question-added-to-dashboard    ::dashboard
    ::database-connection-successful ::database
    ::database-connection-failed     ::database
-   ::new-event-created              ::timeline})
+   ::new-event-created              ::timeline
+   ::new-task-history               ::task})
 
 (defn track-event!
   "Send a single analytics event to the Snowplow collector, if tracking is enabled for this MB instance and a collector
diff --git a/src/metabase/models/task_history.clj b/src/metabase/models/task_history.clj
index d443d549797..bbf450f28de 100644
--- a/src/metabase/models/task_history.clj
+++ b/src/metabase/models/task_history.clj
@@ -1,11 +1,16 @@
 (ns metabase.models.task-history
-  (:require [cheshire.generate :refer [add-encoder encode-map]]
+  (:require [cheshire.core :as json]
+            [cheshire.generate :refer [add-encoder encode-map]]
             [clojure.tools.logging :as log]
             [java-time :as t]
+            [metabase.analytics.snowplow :as snowplow]
+            [metabase.api.common :refer [*current-user-id*]]
+            [metabase.models.database :refer [Database]]
             [metabase.models.interface :as mi]
             [metabase.models.permissions :as perms]
             [metabase.public-settings.premium-features :as premium-features]
             [metabase.util :as u]
+            [metabase.util.date-2 :as u.date]
             [metabase.util.i18n :refer [trs]]
             [metabase.util.schema :as su]
             [schema.core :as s]
@@ -37,10 +42,30 @@
       (perms/application-perms-path :monitoring)
       "/")})
 
+(defn- task->snowplow-event
+  [task]
+  (let [task-details (:task_details task)]
+   (merge {:task_id      (:id task)
+           :task_name    (:task task)
+           :duration     (:duration task)
+           :task_details (json/generate-string task-details)
+           :started_at   (u.date/format (:started_at task))
+           :ended_at     (u.date/format (:ended_at task))}
+          (when-let [db-id (:db_id task)]
+            {:db_id     db-id
+             :db_engine (db/select-one-field :engine Database :id db-id)}))))
+
+(defn- post-insert
+  [task]
+  (u/prog1 task
+    (snowplow/track-event! ::snowplow/new-task-history *current-user-id* (task->snowplow-event <>))))
+
 (u/strict-extend (class TaskHistory)
   models/IModel
   (merge models/IModelDefaults
-         {:types (constantly {:task_details :json})})
+         {:types      (constantly {:task_details :json})
+          :post-insert post-insert})
+
   mi/IObjectPermissions
   (merge mi/IObjectPermissionsDefaults
          {:can-read?         (partial mi/current-user-has-full-permissions? :read)
diff --git a/src/metabase/sync/util.clj b/src/metabase/sync/util.clj
index b0f997bdf63..13d1bcf7604 100644
--- a/src/metabase/sync/util.clj
+++ b/src/metabase/sync/util.clj
@@ -458,13 +458,17 @@
    database  :- i/DatabaseInstance
    {:keys [steps] :as sync-md} :- SyncOperationMetadata]
   (try
-    (db/insert-many! TaskHistory
-      (cons (create-task-history operation database sync-md)
-            (for [[step-name step-info] steps
-                  :let                  [task-details (dissoc step-info :start-time :end-time :log-summary-fn)]]
-              (assoc (create-task-history step-name database step-info)
-                :task_details (when (seq task-details)
-                                task-details)))))
+    (->> (for [[step-name step-info] steps
+               :let                  [task-details (dissoc step-info :start-time :end-time :log-summary-fn)]]
+           (assoc (create-task-history step-name database step-info)
+                  :task_details (when (seq task-details)
+                                  task-details)))
+         (cons (create-task-history operation database sync-md))
+         ;; Using `insert!` instead of `insert-many!` here to make sure
+         ;; `post-insert` is triggered
+         (map #(db/insert! TaskHistory %))
+         (map :id)
+         doall)
     (catch Throwable e
       (log/warn e (trs "Error saving task history")))))
 
diff --git a/src/metabase/task/send_pulses.clj b/src/metabase/task/send_pulses.clj
index b27ef5edb4a..5eeb6ab4f71 100644
--- a/src/metabase/task/send_pulses.clj
+++ b/src/metabase/task/send_pulses.clj
@@ -49,7 +49,8 @@
    (let [pulse-id->channels (group-by :pulse_id (pulse-channel/retrieve-scheduled-channels hour weekday monthday monthweek))]
      (doseq [[pulse-id channels] pulse-id->channels]
        (try
-         (task-history/with-task-history {:task (format "send-pulse %s" pulse-id)}
+         (task-history/with-task-history {:task         "send-pulse"
+                                          :task_details {:pulse-id pulse-id}}
            (log/debug (trs "Starting Pulse Execution: {0}" pulse-id))
            (when-let [pulse (pulse/retrieve-notification pulse-id :archived false)]
              (metabase.pulse/send-pulse! pulse :channel-ids (map :id channels)))
diff --git a/test/metabase/models/task_history_test.clj b/test/metabase/models/task_history_test.clj
index b75b7ce2584..fb69bfe4fef 100644
--- a/test/metabase/models/task_history_test.clj
+++ b/test/metabase/models/task_history_test.clj
@@ -1,7 +1,11 @@
 (ns metabase.models.task-history-test
-  (:require [clojure.test :refer :all]
+  (:require [cheshire.core :as json]
+            [clojure.test :refer :all]
             [java-time :as t]
-            [metabase.models.task-history :as task-history :refer [TaskHistory]]
+            [metabase.analytics.snowplow-test :as snowplow-test]
+            [metabase.api.common :refer [*current-user-id*]]
+            [metabase.models :refer [Database TaskHistory]]
+            [metabase.models.task-history :as task-history]
             [metabase.test :as mt]
             [metabase.util :as u]
             [toucan.db :as db]))
@@ -67,3 +71,62 @@
         (task-history/cleanup-task-history! 100)
         (is (= #{task-1 task-2}
                (set (map :task (TaskHistory)))))))))
+
+(defn- insert-then-pop!
+  "Insert a task history and get the last snowplow event."
+  [task]
+  (db/insert! TaskHistory task)
+  (-> (snowplow-test/pop-event-data-and-user-id!)
+      last
+      mt/boolean-ids-and-timestamps
+      (update-in [:data "task_details"] json/parse-string)))
+
+(deftest snowplow-tracking-test
+  (snowplow-test/with-fake-snowplow-collector
+    (let [t1 (t/zoned-date-time)]
+      (testing "inserting a task history should track a snowplow event"
+        (is (= {:data   {"duration"     10
+                         "ended_at"     true
+                         "started_at"   true
+                         "event"        "new_task_history"
+                         "task_details" {"apple" 40, "orange" 2}
+                         "task_id"      true
+                         "task_name"   "a fake task"}
+                :user-id nil}
+               (insert-then-pop! (assoc (make-10-millis-task t1)
+                                        :task         "a fake task"
+                                        :task_details {:apple  40
+                                                       :orange 2}))))
+
+        (testing "should have user id if *current-user-id* is bound"
+          (binding [*current-user-id* 1]
+            (is (= {:data    {"duration"     10
+                              "ended_at"     true
+                              "started_at"   true
+                              "event"        "new_task_history"
+                              "task_details" {"apple" 40, "orange" 2}
+                              "task_id"      true
+                              "task_name"   "a fake task"}
+                    :user-id "1"}
+                   (insert-then-pop! (assoc (make-10-millis-task t1)
+                                            :task         "a fake task"
+                                            :task_details {:apple  40
+                                                           :orange 2}))))))
+
+        (testing "infer db_engine if db_id exists"
+          (mt/with-temp Database [{db-id :id} {:engine "postgres"}]
+           (is (= {:data    {"duration"     10
+                             "ended_at"     true
+                             "started_at"   true
+                             "db_id"        true
+                             "db_engine"    "postgres"
+                             "event"        "new_task_history"
+                             "task_details" {"apple" 40, "orange" 2}
+                             "task_id"      true
+                             "task_name"   "a fake task"}
+                   :user-id nil}
+                  (insert-then-pop! (assoc (make-10-millis-task t1)
+                                           :task         "a fake task"
+                                           :db_id        db-id
+                                           :task_details {:apple  40
+                                                          :orange 2}))))))))))
diff --git a/test/metabase/sync/analyze_test.clj b/test/metabase/sync/analyze_test.clj
index 0859d5c8fca..1c5ec1b3f38 100644
--- a/test/metabase/sync/analyze_test.clj
+++ b/test/metabase/sync/analyze_test.clj
@@ -1,5 +1,6 @@
 (ns metabase.sync.analyze-test
   (:require [clojure.test :refer :all]
+            [metabase.analytics.snowplow-test :as snowplow-test]
             [metabase.models.database :refer [Database]]
             [metabase.models.field :as field :refer [Field]]
             [metabase.models.table :refer [Table]]
@@ -98,8 +99,8 @@
 (defn- classified-semantic-type [values]
   (let [field (field/map->FieldInstance {:base_type :type/Text})]
     (:semantic_type (classifiers.text-fingerprint/infer-semantic-type
-                    field
-                    (transduce identity (fingerprinters/fingerprinter field) values)))))
+                     field
+                     (transduce identity (fingerprinters/fingerprinter field) values)))))
 
 (deftest classify-json-test
   (doseq [[group values->expected] {"When all the values are valid JSON dicts they're valid JSON"
@@ -252,3 +253,24 @@
         (is (= last-sync-time
                (latest-sync-time table))
             "sync time shouldn't change")))))
+
+(deftest analyze-should-send-a-snowplow-event-test
+  (testing "the recorded event should include db-id and db-engine"
+    (snowplow-test/with-fake-snowplow-collector
+      (mt/with-temp* [Table [table  (fake-table)]
+                      Field [_field (fake-field table)]]
+        (analyze-table! table)
+        (is (= {:data {"task_id"    true
+                       "event"      "new_task_history"
+                       "started_at" true
+                       "ended_at"   true
+                       "duration"   true
+                       "db_engine"  (name (db/select-one-field :engine Database :id (mt/id)))
+                       "db_id"      true
+                       "task_name"  "classify-tables"}
+                :user-id nil}
+               (-> (snowplow-test/pop-event-data-and-user-id!)
+                   last
+                   mt/boolean-ids-and-timestamps
+                   (update :data dissoc "task_details")
+                   (update-in [:data "duration"] some?))))))))
-- 
GitLab