diff --git a/e2e/test/scenarios/onboarding/setup/setup.cy.spec.js b/e2e/test/scenarios/onboarding/setup/setup.cy.spec.js index ad4a6e4855df36536d823b50864820034d82188d..e17c99c1afc89c9492cb33eeb874247979734dce 100644 --- a/e2e/test/scenarios/onboarding/setup/setup.cy.spec.js +++ b/e2e/test/scenarios/onboarding/setup/setup.cy.spec.js @@ -287,19 +287,20 @@ describeWithSnowplow("scenarios > setup", () => { }); it("should send snowplow events", () => { - // 1 - pageview + // 1 - new_instance_created + // 2 - pageview cy.visit(`/setup`); - // 2 - setup/step_seen + // 3 - setup/step_seen // eslint-disable-next-line no-unscoped-text-selectors -- deprecated usage cy.findByText("Welcome to Metabase"); cy.button("Let's get started").click(); - // 3 - setup/step_seen + // 4 - setup/step_seen // eslint-disable-next-line no-unscoped-text-selectors -- deprecated usage cy.findByText("What's your preferred language?"); - expectGoodSnowplowEvents(3); + expectGoodSnowplowEvents(4); }); it("should ignore snowplow failures and work as normal", () => { @@ -310,6 +311,6 @@ describeWithSnowplow("scenarios > setup", () => { cy.findByText("Welcome to Metabase"); cy.button("Let's get started").click(); - expectGoodSnowplowEvents(0); + expectGoodSnowplowEvents(1); }); }); diff --git a/src/metabase/models/task_history.clj b/src/metabase/models/task_history.clj index 3f23978a0dd5b21f5742c70c6e6216d8203be9ce..568125065d7155f7ac6da72c08d7da7acca4f3db 100644 --- a/src/metabase/models/task_history.clj +++ b/src/metabase/models/task_history.clj @@ -1,16 +1,11 @@ (ns metabase.models.task-history (:require - [cheshire.core :as json] [cheshire.generate :refer [add-encoder encode-map]] [java-time :as t] - [metabase.analytics.snowplow :as snowplow] - [metabase.api.common :refer [*current-user-id*]] - [metabase.models.database :refer [Database]] [metabase.models.interface :as mi] [metabase.models.permissions :as perms] [metabase.public-settings.premium-features :as premium-features] [metabase.util :as u] - [metabase.util.date-2 :as u.date] [metabase.util.i18n :refer [trs]] [metabase.util.log :as log] [metabase.util.schema :as su] @@ -48,58 +43,9 @@ (perms/application-perms-path :monitoring) "/")}) -(defn- task-details-for-snowplow - "Ensure task_details is less than 2048 characters. - - 2048 is the length limit for task_details in our snowplow schema, if exceeds this limit, - the event will be considered a bad rows and ignored. - - Most of the times it's < 200 characters, but there are cases task-details contains an exception. - In those case, we want to make sure the stacktrace are ignored from the task-details. - - Return nil if After trying to strip out the stacktraces and the stringified task-details - still has more than 2048 chars." - [task-details] - (let [;; task-details is {:throwable e} during sync - ;; check [[metabase.sync.util/run-step-with-metadata]] - task-details (cond-> task-details - (some? (:throwable task-details)) - (update :throwable dissoc :trace :via) - - ;; if task-history is created via `with-task-history - ;; the exception is manually caught and includes a stacktrace - true - (dissoc :stacktrace) - - true - (dissoc :trace :via)) - as-string (json/generate-string task-details)] - (if (>= (count as-string) 2048) - nil - as-string))) - -(defn- task->snowplow-event - [task] - (let [task-details (:task_details task)] - (merge {:task_id (:id task) - :task_name (:task task) - :duration (:duration task) - :task_details (task-details-for-snowplow task-details) - :started_at (u.date/format-rfc3339 (:started_at task)) - :ended_at (u.date/format-rfc3339 (:ended_at task))} - (when-let [db-id (:db_id task)] - {:db_id db-id - :db_engine (t2/select-one-fn :engine Database :id db-id)})))) - -(defn- post-insert - [task] - (u/prog1 task - (snowplow/track-event! ::snowplow/new-task-history *current-user-id* (task->snowplow-event <>)))) - (mi/define-methods TaskHistory - {:types (constantly {:task_details :json}) - :post-insert post-insert}) + {:types (constantly {:task_details :json})}) (s/defn all "Return all TaskHistory entries, applying `limit` and `offset` if not nil" diff --git a/test/metabase/models/task_history_test.clj b/test/metabase/models/task_history_test.clj index 2319b80c84bb0e389b9ef2e9359df3d1b5e4e6b6..a85daac9305609ddc1b7b5dab2e843e6a6857548 100644 --- a/test/metabase/models/task_history_test.clj +++ b/test/metabase/models/task_history_test.clj @@ -1,11 +1,8 @@ (ns metabase.models.task-history-test (:require - [cheshire.core :as json] [clojure.test :refer :all] [java-time :as t] - [metabase.analytics.snowplow-test :as snowplow-test] - [metabase.api.common :refer [*current-user-id*]] - [metabase.models :refer [Database TaskHistory]] + [metabase.models :refer [TaskHistory]] [metabase.models.task-history :as task-history] [metabase.test :as mt] [metabase.util :as u] @@ -74,124 +71,3 @@ (task-history/cleanup-task-history! 100) (is (= #{task-1 task-2} (set (map :task (t2/select TaskHistory))))))))) - -(defn- last-snowplow-event - [] - (-> (snowplow-test/pop-event-data-and-user-id!) - last - mt/boolean-ids-and-timestamps - (update-in [:data "task_details"] json/parse-string))) - -(defn- insert-then-pop! - "Insert a task history then returns the last snowplow event." - [task] - (t2/insert! TaskHistory task) - (last-snowplow-event)) - -(deftest snowplow-tracking-test - (snowplow-test/with-fake-snowplow-collector - (let [t (t/zoned-date-time)] - (testing "inserting a task history should track a snowplow event" - (is (= {:data {"duration" 10 - "ended_at" true - "started_at" true - "event" "new_task_history" - "task_details" {"apple" 40, "orange" 2} - "task_id" true - "task_name" "a fake task"} - :user-id nil} - (insert-then-pop! (assoc (make-10-millis-task t) - :task "a fake task" - :task_details {:apple 40 - :orange 2})))) - - (testing "should have user id if *current-user-id* is bound" - (binding [*current-user-id* 1] - (is (= {:data {"duration" 10 - "ended_at" true - "started_at" true - "event" "new_task_history" - "task_details" {"apple" 40, "orange" 2} - "task_id" true - "task_name" "a fake task"} - :user-id "1"} - (insert-then-pop! (assoc (make-10-millis-task t) - :task "a fake task" - :task_details {:apple 40 - :orange 2})))))) - - (testing "infer db_engine if db_id exists" - (mt/with-temp Database [{db-id :id} {:engine "postgres"}] - (is (= {:data {"duration" 10 - "ended_at" true - "started_at" true - "db_id" true - "db_engine" "postgres" - "event" "new_task_history" - "task_details" {"apple" 40, "orange" 2} - "task_id" true - "task_name" "a fake task"} - :user-id nil} - (insert-then-pop! (assoc (make-10-millis-task t) - :task "a fake task" - :db_id db-id - :task_details {:apple 40 - :orange 2})))))) - (testing "date-time properties should be correctly formatted" - (t2/insert! TaskHistory (assoc (make-10-millis-task t) :task "a fake task")) - (let [event (:data (first (snowplow-test/pop-event-data-and-user-id!)))] - (is (snowplow-test/valid-datetime-for-snowplow? (get event "started_at"))) - (is (snowplow-test/valid-datetime-for-snowplow? (get event "ended_at"))))))))) - -(deftest ensure-no-stacktrace-send-to-snowplow - ;; The snowplow schema for task history has a limit of 2048 chars for task_details. - ;; Most of the time the task_details < 200 chars, but when an exception happens it could exceed the limit. - ;; And the stack trace is the longest part of the exception, so we need to make sure no stacktrace - ;; is serialized before sending it to snowplow (#28006) - (snowplow-test/with-fake-snowplow-collector - (testing "Filter out exceptions's stacktrace when send to snowplow" - (testing "when using `with-task-history`" - ;; register a task history with exception - (u/ignore-exceptions - (task-history/with-task-history {:task "test-exception"} - (throw (ex-info "Oops" {:oops true})))) - - (is (= {"ex-data" {"oops" true} - "exception" "class clojure.lang.ExceptionInfo" - "message" "Oops" - "original-info" nil - "status" "failed"} - (get-in (last-snowplow-event) [:data "task_details"])))) - - (testing "when task_details is an exception from sync task" - (is (= {"throwable" {"cause" "Oops", "data" {"oops" true}}} - (-> (assoc (make-10-millis-task (t/local-date-time)) - :task "a fake task" - ;; sync task include the exception as {:throwable e} - :task_details {:throwable (ex-info "Oops" {:oops true})}) - insert-then-pop! - (get-in [:data "task_details"]))))) - - (testing "when task_details is an exception" - (is (= {"cause" "Oops", "data" {"oops" true}} - (-> (assoc (make-10-millis-task (t/local-date-time)) - :task "a fake task" - ;; sync task include the exception as {:throwable e} - :task_details (ex-info "Oops" {:oops true})) - insert-then-pop! - (get-in [:data "task_details"])))))) - - (testing "if task_details is more than 2048 chars, ignore it but still send the infos" - (let [task-details {:a (apply str (repeat 2048 "a"))}] - (is (= {"duration" 10 - "ended_at" true - "started_at" true - "event" "new_task_history" - "task_details" nil - "task_id" true - "task_name" "a fake task"} - (-> (assoc (make-10-millis-task (t/local-date-time)) - :task "a fake task" - :task_details task-details) - insert-then-pop! - (get-in [:data])))))))) diff --git a/test/metabase/sync/analyze_test.clj b/test/metabase/sync/analyze_test.clj index 86053681c0925da04a74a11c0c3dc9cd6e868d11..605291661a6fd39945a46d422f3265f9d9526acb 100644 --- a/test/metabase/sync/analyze_test.clj +++ b/test/metabase/sync/analyze_test.clj @@ -1,7 +1,6 @@ (ns ^:mb/once metabase.sync.analyze-test (:require [clojure.test :refer :all] - [metabase.analytics.snowplow-test :as snowplow-test] [metabase.models.database :refer [Database]] [metabase.models.field :as field :refer [Field]] [metabase.models.interface :as mi] @@ -255,24 +254,3 @@ (is (= last-sync-time (latest-sync-time table)) "sync time shouldn't change"))))) - -(deftest analyze-should-send-a-snowplow-event-test - (testing "the recorded event should include db-id and db-engine" - (snowplow-test/with-fake-snowplow-collector - (mt/with-temp* [Table [table (fake-table)] - Field [_field (fake-field table)]] - (analyze-table! table) - (is (= {:data {"task_id" true - "event" "new_task_history" - "started_at" true - "ended_at" true - "duration" true - "db_engine" (name (t2/select-one-fn :engine Database :id (mt/id))) - "db_id" true - "task_name" "classify-tables"} - :user-id nil} - (-> (snowplow-test/pop-event-data-and-user-id!) - last - mt/boolean-ids-and-timestamps - (update :data dissoc "task_details") - (update-in [:data "duration"] some?))))))))