Skip to content
Snippets Groups Projects
Unverified Commit 09123258 authored by Ngoc Khuat's avatar Ngoc Khuat Committed by GitHub
Browse files

Sun set snowplow tracking for task history (#31182)

parent 5fdce46a
No related merge requests found
......@@ -287,19 +287,20 @@ describeWithSnowplow("scenarios > setup", () => {
});
it("should send snowplow events", () => {
// 1 - pageview
// 1 - new_instance_created
// 2 - pageview
cy.visit(`/setup`);
// 2 - setup/step_seen
// 3 - setup/step_seen
// eslint-disable-next-line no-unscoped-text-selectors -- deprecated usage
cy.findByText("Welcome to Metabase");
cy.button("Let's get started").click();
// 3 - setup/step_seen
// 4 - setup/step_seen
// eslint-disable-next-line no-unscoped-text-selectors -- deprecated usage
cy.findByText("What's your preferred language?");
expectGoodSnowplowEvents(3);
expectGoodSnowplowEvents(4);
});
it("should ignore snowplow failures and work as normal", () => {
......@@ -310,6 +311,6 @@ describeWithSnowplow("scenarios > setup", () => {
cy.findByText("Welcome to Metabase");
cy.button("Let's get started").click();
expectGoodSnowplowEvents(0);
expectGoodSnowplowEvents(1);
});
});
(ns metabase.models.task-history
(:require
[cheshire.core :as json]
[cheshire.generate :refer [add-encoder encode-map]]
[java-time :as t]
[metabase.analytics.snowplow :as snowplow]
[metabase.api.common :refer [*current-user-id*]]
[metabase.models.database :refer [Database]]
[metabase.models.interface :as mi]
[metabase.models.permissions :as perms]
[metabase.public-settings.premium-features :as premium-features]
[metabase.util :as u]
[metabase.util.date-2 :as u.date]
[metabase.util.i18n :refer [trs]]
[metabase.util.log :as log]
[metabase.util.schema :as su]
......@@ -48,58 +43,9 @@
(perms/application-perms-path :monitoring)
"/")})
(defn- task-details-for-snowplow
"Ensure task_details is less than 2048 characters.
2048 is the length limit for task_details in our snowplow schema, if exceeds this limit,
the event will be considered a bad rows and ignored.
Most of the times it's < 200 characters, but there are cases task-details contains an exception.
In those case, we want to make sure the stacktrace are ignored from the task-details.
Return nil if After trying to strip out the stacktraces and the stringified task-details
still has more than 2048 chars."
[task-details]
(let [;; task-details is {:throwable e} during sync
;; check [[metabase.sync.util/run-step-with-metadata]]
task-details (cond-> task-details
(some? (:throwable task-details))
(update :throwable dissoc :trace :via)
;; if task-history is created via `with-task-history
;; the exception is manually caught and includes a stacktrace
true
(dissoc :stacktrace)
true
(dissoc :trace :via))
as-string (json/generate-string task-details)]
(if (>= (count as-string) 2048)
nil
as-string)))
(defn- task->snowplow-event
[task]
(let [task-details (:task_details task)]
(merge {:task_id (:id task)
:task_name (:task task)
:duration (:duration task)
:task_details (task-details-for-snowplow task-details)
:started_at (u.date/format-rfc3339 (:started_at task))
:ended_at (u.date/format-rfc3339 (:ended_at task))}
(when-let [db-id (:db_id task)]
{:db_id db-id
:db_engine (t2/select-one-fn :engine Database :id db-id)}))))
(defn- post-insert
[task]
(u/prog1 task
(snowplow/track-event! ::snowplow/new-task-history *current-user-id* (task->snowplow-event <>))))
(mi/define-methods
TaskHistory
{:types (constantly {:task_details :json})
:post-insert post-insert})
{:types (constantly {:task_details :json})})
(s/defn all
"Return all TaskHistory entries, applying `limit` and `offset` if not nil"
......
(ns metabase.models.task-history-test
(:require
[cheshire.core :as json]
[clojure.test :refer :all]
[java-time :as t]
[metabase.analytics.snowplow-test :as snowplow-test]
[metabase.api.common :refer [*current-user-id*]]
[metabase.models :refer [Database TaskHistory]]
[metabase.models :refer [TaskHistory]]
[metabase.models.task-history :as task-history]
[metabase.test :as mt]
[metabase.util :as u]
......@@ -74,124 +71,3 @@
(task-history/cleanup-task-history! 100)
(is (= #{task-1 task-2}
(set (map :task (t2/select TaskHistory)))))))))
(defn- last-snowplow-event
[]
(-> (snowplow-test/pop-event-data-and-user-id!)
last
mt/boolean-ids-and-timestamps
(update-in [:data "task_details"] json/parse-string)))
(defn- insert-then-pop!
"Insert a task history then returns the last snowplow event."
[task]
(t2/insert! TaskHistory task)
(last-snowplow-event))
(deftest snowplow-tracking-test
(snowplow-test/with-fake-snowplow-collector
(let [t (t/zoned-date-time)]
(testing "inserting a task history should track a snowplow event"
(is (= {:data {"duration" 10
"ended_at" true
"started_at" true
"event" "new_task_history"
"task_details" {"apple" 40, "orange" 2}
"task_id" true
"task_name" "a fake task"}
:user-id nil}
(insert-then-pop! (assoc (make-10-millis-task t)
:task "a fake task"
:task_details {:apple 40
:orange 2}))))
(testing "should have user id if *current-user-id* is bound"
(binding [*current-user-id* 1]
(is (= {:data {"duration" 10
"ended_at" true
"started_at" true
"event" "new_task_history"
"task_details" {"apple" 40, "orange" 2}
"task_id" true
"task_name" "a fake task"}
:user-id "1"}
(insert-then-pop! (assoc (make-10-millis-task t)
:task "a fake task"
:task_details {:apple 40
:orange 2}))))))
(testing "infer db_engine if db_id exists"
(mt/with-temp Database [{db-id :id} {:engine "postgres"}]
(is (= {:data {"duration" 10
"ended_at" true
"started_at" true
"db_id" true
"db_engine" "postgres"
"event" "new_task_history"
"task_details" {"apple" 40, "orange" 2}
"task_id" true
"task_name" "a fake task"}
:user-id nil}
(insert-then-pop! (assoc (make-10-millis-task t)
:task "a fake task"
:db_id db-id
:task_details {:apple 40
:orange 2}))))))
(testing "date-time properties should be correctly formatted"
(t2/insert! TaskHistory (assoc (make-10-millis-task t) :task "a fake task"))
(let [event (:data (first (snowplow-test/pop-event-data-and-user-id!)))]
(is (snowplow-test/valid-datetime-for-snowplow? (get event "started_at")))
(is (snowplow-test/valid-datetime-for-snowplow? (get event "ended_at")))))))))
(deftest ensure-no-stacktrace-send-to-snowplow
;; The snowplow schema for task history has a limit of 2048 chars for task_details.
;; Most of the time the task_details < 200 chars, but when an exception happens it could exceed the limit.
;; And the stack trace is the longest part of the exception, so we need to make sure no stacktrace
;; is serialized before sending it to snowplow (#28006)
(snowplow-test/with-fake-snowplow-collector
(testing "Filter out exceptions's stacktrace when send to snowplow"
(testing "when using `with-task-history`"
;; register a task history with exception
(u/ignore-exceptions
(task-history/with-task-history {:task "test-exception"}
(throw (ex-info "Oops" {:oops true}))))
(is (= {"ex-data" {"oops" true}
"exception" "class clojure.lang.ExceptionInfo"
"message" "Oops"
"original-info" nil
"status" "failed"}
(get-in (last-snowplow-event) [:data "task_details"]))))
(testing "when task_details is an exception from sync task"
(is (= {"throwable" {"cause" "Oops", "data" {"oops" true}}}
(-> (assoc (make-10-millis-task (t/local-date-time))
:task "a fake task"
;; sync task include the exception as {:throwable e}
:task_details {:throwable (ex-info "Oops" {:oops true})})
insert-then-pop!
(get-in [:data "task_details"])))))
(testing "when task_details is an exception"
(is (= {"cause" "Oops", "data" {"oops" true}}
(-> (assoc (make-10-millis-task (t/local-date-time))
:task "a fake task"
;; sync task include the exception as {:throwable e}
:task_details (ex-info "Oops" {:oops true}))
insert-then-pop!
(get-in [:data "task_details"]))))))
(testing "if task_details is more than 2048 chars, ignore it but still send the infos"
(let [task-details {:a (apply str (repeat 2048 "a"))}]
(is (= {"duration" 10
"ended_at" true
"started_at" true
"event" "new_task_history"
"task_details" nil
"task_id" true
"task_name" "a fake task"}
(-> (assoc (make-10-millis-task (t/local-date-time))
:task "a fake task"
:task_details task-details)
insert-then-pop!
(get-in [:data]))))))))
(ns ^:mb/once metabase.sync.analyze-test
(:require
[clojure.test :refer :all]
[metabase.analytics.snowplow-test :as snowplow-test]
[metabase.models.database :refer [Database]]
[metabase.models.field :as field :refer [Field]]
[metabase.models.interface :as mi]
......@@ -255,24 +254,3 @@
(is (= last-sync-time
(latest-sync-time table))
"sync time shouldn't change")))))
(deftest analyze-should-send-a-snowplow-event-test
(testing "the recorded event should include db-id and db-engine"
(snowplow-test/with-fake-snowplow-collector
(mt/with-temp* [Table [table (fake-table)]
Field [_field (fake-field table)]]
(analyze-table! table)
(is (= {:data {"task_id" true
"event" "new_task_history"
"started_at" true
"ended_at" true
"duration" true
"db_engine" (name (t2/select-one-fn :engine Database :id (mt/id)))
"db_id" true
"task_name" "classify-tables"}
:user-id nil}
(-> (snowplow-test/pop-event-data-and-user-id!)
last
mt/boolean-ids-and-timestamps
(update :data dissoc "task_details")
(update-in [:data "duration"] some?))))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment