From 8a02e75ffd3a8c21bcb219362442e24d80337bc3 Mon Sep 17 00:00:00 2001 From: adam-james <21064735+adam-james-v@users.noreply.github.com> Date: Thu, 5 Sep 2024 11:57:53 -0700 Subject: [PATCH] Catch Errors in Download process and only write error message (#47482) * walk the error to remove any visualization settings Thanks Dan for the patch, this approach is nice because it doesn't require changing all 3 streaming-response-writer implementations (json csv and xlsx). It keeps behaviour basically the same, but just cleans up the output to not include potentially sensitive column/dataset details. * add a test that confirms json/csv errors are cleaned * walk the error to remove any visualization settings Thanks Dan for the patch, this approach is nice because it doesn't require changing all 3 streaming-response-writer implementations (json csv and xlsx). It keeps behaviour basically the same, but just cleans up the output to not include potentially sensitive column/dataset details. * add a test that confirms json/csv errors are cleaned * fix dataset error test * cljfmt * try to be a little more surgical with what we take out of the error * formatting again, oops * pass export format, only modify the obj for file exports In the case of errors being saved to files, the obj is a map with a :status :failed key assoc'd by the qp somewhere along the way. The format-exception cond branch is not used in this case, so we can't rely on modifying `format-exception` to just dissoc ex-data. Instead, I've kept the obj modification, but it excludes the query and the preprocessed keys, which are the only 2 keys where we see :viz-settings. This doesn't eliminate the problem of needing to exclude other keys in the future, but it does improve the surface area - the query and viz settings are the most likely candidates for holding potentially sensitive info. I'd prefer to stick with this solution, as we can get a fix for the security concern in quicker, and perhaps a non-security bug can be opened regarding the most correct way to indicate errors to users whose downloads have failed. * keep format-exception the same * cleaner passing of export-format to write-error! --- src/metabase/async/streaming_response.clj | 20 ++++++--- src/metabase/query_processor/streaming.clj | 2 +- test/metabase/api/downloads_exports_test.clj | 41 +++++++++++++++++++ .../query_processor/streaming/csv_test.clj | 23 ++++++++++- 4 files changed, 79 insertions(+), 7 deletions(-) diff --git a/src/metabase/async/streaming_response.clj b/src/metabase/async/streaming_response.clj index 76050dd3993..015d682bfc0 100644 --- a/src/metabase/async/streaming_response.clj +++ b/src/metabase/async/streaming_response.clj @@ -2,6 +2,7 @@ (:require [cheshire.core :as json] [clojure.core.async :as a] + [clojure.walk :as walk] [compojure.response] [metabase.async.streaming-response.thread-pool :as thread-pool] [metabase.async.util :as async.u] @@ -44,21 +45,30 @@ (defn write-error! "Write an error to the output stream, formatting it nicely. Closes output stream afterwards." - [^OutputStream os obj] + [^OutputStream os obj export-format] (cond (some #(instance? % obj) [InterruptedException EofException]) (log/trace "Error is an InterruptedException or EofException, not writing to output stream") (instance? Throwable obj) - (recur os (format-exception obj)) + (recur os (format-exception obj) export-format) :else (with-open [os os] (log/trace (u/pprint-to-str (list 'write-error! obj))) (try - (with-open [writer (BufferedWriter. (OutputStreamWriter. os StandardCharsets/UTF_8))] - (json/generate-stream obj writer)) + (let [obj (-> (if (not= :api export-format) + (walk/prewalk + (fn [x] + (if (map? x) + (apply dissoc x [:json_query :preprocessed]) + x)) + obj) + obj) + (dissoc :export-format))] + (with-open [writer (BufferedWriter. (OutputStreamWriter. os StandardCharsets/UTF_8))] + (json/generate-stream obj writer))) (catch EofException _) (catch Throwable e (log/error e "Error writing error to output stream" obj)))))) @@ -74,7 +84,7 @@ nil) (catch Throwable e (log/error e "Caught unexpected Exception in streaming response body") - (write-error! os e) + (write-error! os e nil) nil))) (defn- do-f-async diff --git a/src/metabase/query_processor/streaming.clj b/src/metabase/query_processor/streaming.clj index 7af805e9490..5da0b115f54 100644 --- a/src/metabase/query_processor/streaming.clj +++ b/src/metabase/query_processor/streaming.clj @@ -181,7 +181,7 @@ (assert (not (instance? ManyToManyChannel result)) "QP should not return a core.async channel.") (when (or (instance? Throwable result) (= (:status result) :failed)) - (streaming-response/write-error! os result))))))) + (streaming-response/write-error! os result export-format))))))) (defmacro streaming-response "Return results of processing a query as a streaming response. This response implements the appropriate Ring/Compojure diff --git a/test/metabase/api/downloads_exports_test.clj b/test/metabase/api/downloads_exports_test.clj index 28feac1ddb6..6529d9e3eb1 100644 --- a/test/metabase/api/downloads_exports_test.clj +++ b/test/metabase/api/downloads_exports_test.clj @@ -13,8 +13,10 @@ [clojure.data.csv :as csv] [clojure.java.io :as io] [clojure.set :as set] + [clojure.string :as str] [clojure.test :refer :all] [dk.ative.docjure.spreadsheet :as spreadsheet] + [metabase.formatter :as formatter] [metabase.public-settings :as public-settings] [metabase.pulse :as pulse] [metabase.pulse.test-util :as pulse.test-util] @@ -742,3 +744,42 @@ ;; the [$$] part will appear as $ when you open the Excel file in a spreadsheet app (is (= [["Discount"] ["[$$]6.42"]] (-> (card-download card :xlsx true))))))))) + +(deftest clean-errors-test + (testing "Queries that error should not include visualization settings (metabase-private #233)" + (with-redefs [formatter/number-formatter (fn [& _args] (fn [_] (throw (Exception. "Test Exception"))))] + (mt/with-temp [:model/Card {card-id :id} {:display :table + :type :model + :dataset_query {:database (mt/id) + :type :query + :query {:source-table (mt/id :orders) + :filter [:not-null [:field (mt/id :orders :discount) {:base-type :type/Float}]] + :limit 1}} + :visualization_settings {:table.columns + [{:name "ID" :enabled false} + {:name "USER_ID" :enabled false} + {:name "PRODUCT_ID" :enabled false} + {:name "SUBTOTAL" :enabled false} + {:name "TAX" :enabled false} + {:name "TOTAL" :enabled false} + {:name "DISCOUNT" :enabled true} + {:name "CREATED_AT" :enabled false} + {:name "QUANTITY" :enabled false}] + :table.cell_column "SUBTOTAL" + :column_settings {(format "[\"ref\",[\"field\",%s,null]]" (mt/id :orders :discount)) + {:currency_in_header false}}}}] + (let [illegal-strings ["visualization-settings" ":viz-settings" "visualization_settings"]] + (doseq [export-format ["csv" "json" #_"xlsx"]] + ;; for now, don't try to read xlsx back in, it will not be correct since we end up writing + ;; a json blob to the output stream, it creates an invalid xlsx anyway. + ;; This is not new behaviour, we'll just fix it when a better solution to 'errors in downloaded files' comes along + (let [results (mt/user-http-request :rasta :post 200 (format "card/%d/query/%s?format_rows=true" card-id export-format)) + results-string (if (= "xlsx" export-format) + (read-xlsx results) + (str results))] + (testing (format "Testing export format: %s" export-format) + (doseq [illegal illegal-strings] + (is (false? (str/blank? results-string))) + (is (true? (str/includes? results-string "Test Exception"))) + (testing (format "String \"%s\" is not in the error message." illegal) + (is (false? (str/includes? results-string illegal))))))))))))) diff --git a/test/metabase/query_processor/streaming/csv_test.clj b/test/metabase/query_processor/streaming/csv_test.clj index aca1ee75751..155dd29ffac 100644 --- a/test/metabase/query_processor/streaming/csv_test.clj +++ b/test/metabase/query_processor/streaming/csv_test.clj @@ -2,12 +2,16 @@ (:require [cheshire.core :as json] [clojure.data.csv :as csv] + [clojure.string :as str] [clojure.test :refer :all] [metabase.driver :as driver] + [metabase.models.data-permissions :as data-perms] + [metabase.models.permissions-group :as perms-group] [metabase.query-processor :as qp] [metabase.query-processor.streaming.interface :as qp.si] [metabase.test :as mt] - [metabase.test.data.dataset-definitions :as defs]) + [metabase.test.data.dataset-definitions :as defs] + [metabase.util :as u]) (:import (java.io BufferedOutputStream ByteArrayOutputStream))) @@ -46,6 +50,23 @@ (json/generate-string (mt/mbql-query checkins {:order-by [[:asc $id]], :limit 5})))] (take 5 (parse-and-sort-csv result))))))) +(deftest errors-not-include-visualization-settings + (testing "Queries that error should not include visualization settings" + (mt/with-temp [:model/Card {card-id :id} {:dataset_query (mt/mbql-query orders + {:order-by [[:asc $id]], :limit 5}) + :visualization_settings {:column_settings {} + :notvisiblekey :notvisiblevalue}}] + (mt/with-no-data-perms-for-all-users! + (data-perms/set-database-permission! (perms-group/all-users) + (u/the-id (mt/db)) + :perms/create-queries :query-builder) + (let [results (mt/user-http-request :rasta :post 200 (format "card/%d/query/csv" card-id)) + results-string (str results) + illegal-strings ["notvisiblekey" "notvisiblevalue" "column_settings" + "visualization-settings" "viz-settings"]] + (doseq [illegal illegal-strings] + (is (false? (str/includes? results-string illegal))))))))) + (deftest check-an-empty-date-column (testing "NULL values should be written correctly" (mt/dataset defs/test-data-null-date -- GitLab