Skip to content
Snippets Groups Projects
Unverified Commit 3bfa4e16 authored by Ngoc Khuat's avatar Ngoc Khuat Committed by GitHub
Browse files

makes sure snowplow tracking for taskhistory does not exceeds the chars limit (#28019)

* makes sure snowplow tracking for taskhistory does not exceeds the 2048 limit

* explains why 2048 limit is needed and return nil instead of empty string
parent e45c5939
No related branches found
No related tags found
No related merge requests found
......@@ -46,13 +46,43 @@
(perms/application-perms-path :monitoring)
"/")})
(defn- task-details-for-snowplow
"Ensure task_details is less than 2048 characters.
2048 is the length limit for task_details in our snowplow schema, if exceeds this limit,
the event will be considered a bad rows and ignored.
Most of the times it's < 200 characters, but there are cases task-details contains an exception.
In those case, we want to make sure the stacktrace are ignored from the task-details.
Return nil if After trying to strip out the stacktraces and the stringified task-details
still has more than 2048 chars."
[task-details]
(let [;; task-details is {:throwable e} during sync
;; check [[metabase.sync.util/run-step-with-metadata]]
task-details (cond-> task-details
(some? (:throwable task-details))
(update :throwable dissoc :trace :via)
;; if task-history is created via `with-task-history
;; the exception is manually caught and includes a stacktrace
true
(dissoc :stacktrace)
true
(dissoc :trace :via))
as-string (json/generate-string task-details)]
(if (>= (count as-string) 2048)
nil
as-string)))
(defn- task->snowplow-event
[task]
(let [task-details (:task_details task)]
(merge {:task_id (:id task)
:task_name (:task task)
:duration (:duration task)
:task_details (json/generate-string task-details)
:task_details (task-details-for-snowplow task-details)
:started_at (u.date/format-rfc3339 (:started_at task))
:ended_at (u.date/format-rfc3339 (:ended_at task))}
(when-let [db-id (:db_id task)]
......
......@@ -73,15 +73,19 @@
(is (= #{task-1 task-2}
(set (map :task (db/select TaskHistory)))))))))
(defn- insert-then-pop!
"Insert a task history and get the last snowplow event."
[task]
(db/insert! TaskHistory task)
(defn- last-snowplow-event
[]
(-> (snowplow-test/pop-event-data-and-user-id!)
last
mt/boolean-ids-and-timestamps
(update-in [:data "task_details"] json/parse-string)))
(defn- insert-then-pop!
"Insert a task history then returns the last snowplow event."
[task]
(db/insert! TaskHistory task)
(last-snowplow-event))
(deftest snowplow-tracking-test
(snowplow-test/with-fake-snowplow-collector
(let [t (t/zoned-date-time)]
......@@ -136,3 +140,56 @@
(let [event (:data (first (snowplow-test/pop-event-data-and-user-id!)))]
(is (snowplow-test/valid-datetime-for-snowplow? (get event "started_at")))
(is (snowplow-test/valid-datetime-for-snowplow? (get event "ended_at")))))))))
(deftest ensure-no-stacktrace-send-to-snowplow
;; The snowplow schema for task history has a limit of 2048 chars for task_details.
;; Most of the time the task_details < 200 chars, but when an exception happens it could exceed the limit.
;; And the stack trace is the longest part of the exception, so we need to make sure no stacktrace
;; is serialized before sending it to snowplow (#28006)
(snowplow-test/with-fake-snowplow-collector
(testing "Filter out exceptions's stacktrace when send to snowplow"
(testing "when using `with-task-history`"
;; register a task history with exception
(u/ignore-exceptions
(task-history/with-task-history {:task "test-exception"}
(throw (ex-info "Oops" {:oops true}))))
(is (= {"ex-data" {"oops" true}
"exception" "class clojure.lang.ExceptionInfo"
"message" "Oops"
"original-info" nil
"status" "failed"}
(get-in (last-snowplow-event) [:data "task_details"]))))
(testing "when task_details is an exception from sync task"
(is (= {"throwable" {"cause" "Oops", "data" {"oops" true}}}
(-> (assoc (make-10-millis-task (t/local-date-time))
:task "a fake task"
;; sync task include the exception as {:throwable e}
:task_details {:throwable (ex-info "Oops" {:oops true})})
insert-then-pop!
(get-in [:data "task_details"])))))
(testing "when task_details is an exception"
(is (= {"cause" "Oops", "data" {"oops" true}}
(-> (assoc (make-10-millis-task (t/local-date-time))
:task "a fake task"
;; sync task include the exception as {:throwable e}
:task_details (ex-info "Oops" {:oops true}))
insert-then-pop!
(get-in [:data "task_details"]))))))
(testing "if task_details is more than 2048 chars, ignore it but still send the infos"
(let [task-details {:a (apply str (repeat 2048 "a"))}]
(is (= {"duration" 10
"ended_at" true
"started_at" true
"event" "new_task_history"
"task_details" nil
"task_id" true
"task_name" "a fake task"}
(-> (assoc (make-10-millis-task (t/local-date-time))
:task "a fake task"
:task_details task-details)
insert-then-pop!
(get-in [:data]))))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment