From 47d9fc19c58034b7522e63628ce7669d3b3e73c2 Mon Sep 17 00:00:00 2001 From: adam-james <21064735+adam-james-v@users.noreply.github.com> Date: Mon, 12 Aug 2024 07:57:52 -0700 Subject: [PATCH] Dowload Row Limit Env Var (#46401) * Dowload Row Limit Env Var Adds `MB_DOWNLOAD_ROW_LIMIT` to enable changing the row limit on downloads and alert/subscription attachments (not the rendered tables, but the .csv, etc.). Based on: #44982 (Thanks, @r-kot) Partially implements: #28144 The difference in this PR compared to #44982 is that the download limit applies to all downloads, the only exception being when the limit is above `qp.i/absolute-max-results` (1048575, based on Excel's limitation); in such a case, the user supplied limit is only used if the download is csv or json, and `qp.i/absolute-max-results` is used for xlsx. This PR also fixes alert/subscription attachment limits; prior to this, they were set to the in-app limit of 2000 rows, but now they will follow the user supplied download-row-limit. This PR also adds a test to the downloads-and-exports test namespace, confirming that they follow the supplied limit, or the max limit if none is supplied. * add test confirming the default limit works * fix test to use the download-row-limit * address review feedback * Update src/metabase/public_settings.clj Co-authored-by: Cal Herries <39073188+calherries@users.noreply.github.com> * add test covering case where download-row-limit is unset --------- Co-authored-by: Cal Herries <39073188+calherries@users.noreply.github.com> --- src/metabase/public_settings.clj | 7 +++ src/metabase/pulse/util.clj | 37 ++++++------ .../query_processor/middleware/limit.clj | 18 +++++- test/metabase/api/downloads_exports_test.clj | 37 ++++++++++++ test/metabase/pulse_test.clj | 5 +- .../query_processor/middleware/limit_test.clj | 56 +++++++++++++++++++ 6 files changed, 137 insertions(+), 23 deletions(-) diff --git a/src/metabase/public_settings.clj b/src/metabase/public_settings.clj index 8e41b4eb61e..7f96368d0f9 100644 --- a/src/metabase/public_settings.clj +++ b/src/metabase/public_settings.clj @@ -942,3 +942,10 @@ See [fonts](../configuring-metabase/fonts.md).") :export? false :default true :type :boolean) + +(defsetting download-row-limit + (deferred-tru "Exports row limit excluding the header. xlsx downloads are limited to 1048575 rows even if this limit is higher.") + :visibility :internal + :export? true + :type :integer + :doc false) diff --git a/src/metabase/pulse/util.clj b/src/metabase/pulse/util.clj index 68f268e904a..dc867655939 100644 --- a/src/metabase/pulse/util.clj +++ b/src/metabase/pulse/util.clj @@ -27,10 +27,11 @@ process-query (fn [] (binding [qp.perms/*card-id* card-id] (qp/process-query - (qp/userland-query-with-default-constraints - (assoc query :middleware {:skip-results-metadata? true - :process-viz-settings? true - :js-int-to-string? false}) + (qp/userland-query + (assoc query :middleware {:skip-results-metadata? true + :process-viz-settings? true + :js-int-to-string? false + :add-default-userland-constraints? false}) (merge (cond-> {:executed-by pulse-creator-id :context :pulse :card-id card-id} @@ -75,19 +76,21 @@ :dashcard dashcard :type :card :result (qp.dashboard/process-query-for-dashcard - :dashboard-id dashboard-id - :card-id card-id - :dashcard-id (u/the-id dashcard) - :context :dashboard-subscription - :export-format :api - :parameters parameters - :middleware {:process-viz-settings? true - :js-int-to-string? false} - :make-run (fn make-run [qp _export-format] - (^:once fn* [query info] - (qp - (qp/userland-query-with-default-constraints query info) - nil))))}) + :dashboard-id dashboard-id + :card-id card-id + :dashcard-id (u/the-id dashcard) + :context :dashboard-subscription + :export-format :api + :parameters parameters + :constraints {} + :middleware {:process-viz-settings? true + :js-int-to-string? false + :add-default-userland-constraints? false} + :make-run (fn make-run [qp _export-format] + (^:once fn* [query info] + (qp + (qp/userland-query query info) + nil))))}) result (result-fn card-id) series-results (map (comp result-fn :id) multi-cards)] (when-not (and (get-in dashcard [:visualization_settings :card.hide_empty]) diff --git a/src/metabase/query_processor/middleware/limit.clj b/src/metabase/query_processor/middleware/limit.clj index 45c9bb8d612..9027554fa11 100644 --- a/src/metabase/query_processor/middleware/limit.clj +++ b/src/metabase/query_processor/middleware/limit.clj @@ -2,6 +2,7 @@ "Middleware that handles limiting the maximum number of rows returned by a query." (:require [metabase.legacy-mbql.util :as mbql.u] + [metabase.public-settings :as public-settings] [metabase.query-processor.interface :as qp.i] [metabase.query-processor.util :as qp.util])) @@ -23,14 +24,25 @@ (qp.util/query-without-aggregations-or-limits? query)) (update :query assoc :limit max-rows, ::original-limit original-limit))) +(defn- xlsx-export? + [& {info :info}] + (let [context (:context info)] + (= context :xlsx-download))) + (defn determine-query-max-rows "Given a `query`, return the max rows that should be returned. This is either: 1. the output of [[metabase.legacy-mbql.util/query->max-rows-limit]] when called on the given query - 2. [[metabase.query-processor.interface/absolute-max-results]] (a constant, non-nil backstop value)" + 2. the value of `pubic-settings/download-row-limit` + a. if it is less than [[metabase.query-processor.interface/absolute-max-results]] or + b. if it is greater and the export is NOT xlsx otherwise, + 3. [[metabase.query-processor.interface/absolute-max-results]] (a constant, non-nil backstop value)" [query] (when-not (disable-max-results? query) - (or (mbql.u/query->max-rows-limit query) - qp.i/absolute-max-results))) + (cond-> (or (mbql.u/query->max-rows-limit query) + (public-settings/download-row-limit) + qp.i/absolute-max-results) + (xlsx-export? query) + (min qp.i/absolute-max-results)))) (defn add-default-limit "Pre-processing middleware. Add default `:limit` to MBQL queries without any aggregations." diff --git a/test/metabase/api/downloads_exports_test.clj b/test/metabase/api/downloads_exports_test.clj index bd5bb8aef91..a921a43d9aa 100644 --- a/test/metabase/api/downloads_exports_test.clj +++ b/test/metabase/api/downloads_exports_test.clj @@ -15,6 +15,7 @@ [clojure.set :as set] [clojure.test :refer :all] [dk.ative.docjure.spreadsheet :as spreadsheet] + [metabase.public-settings :as public-settings] [metabase.pulse :as pulse] [metabase.pulse.test-util :as pulse.test-util] [metabase.query-processor.streaming.csv :as qp.csv] @@ -151,6 +152,15 @@ :user_id (mt/user->id :rasta)}] (subscription-attachment* pulse))))) +(defn all-outputs! + [card-or-dashcard export-format format-rows?] + (merge + (when-not (contains? card-or-dashcard :dashboard_id) + {:card-download (card-download card-or-dashcard export-format format-rows?) + :alert-attachment (alert-attachment! card-or-dashcard export-format format-rows?)}) + {:dashcard-download (card-download card-or-dashcard export-format format-rows?) + :subscription-attachment (subscription-attachment! card-or-dashcard export-format format-rows?)})) + (set! *warn-on-reflection* true) (def ^:private pivot-rows-query @@ -618,6 +628,33 @@ {:alert-attachment (first alert-result) :subscription-attachment (first subscription-result)})))))))) +(deftest downloads-row-limit-test + (testing "Downloads row limit works." + (mt/with-temporary-setting-values [public-settings/download-row-limit 1050000] + (mt/dataset test-data + (mt/with-temp [:model/Card card {:display :table + :dataset_query {:database (mt/id) + :type :native + :native {:query "SELECT 1 as A FROM generate_series(1,1100000);"}}}] + (let [results (all-outputs! card :csv true)] + (is (= {:card-download 1050001 + :alert-attachment 1050001 + :dashcard-download 1050001 + :subscription-attachment 1050001} + (update-vals results count)))))))) + (testing "Downloads row limit default works." + (mt/dataset test-data + (mt/with-temp [:model/Card card {:display :table + :dataset_query {:database (mt/id) + :type :native + :native {:query "SELECT 1 as A FROM generate_series(1,1100000);"}}}] + (let [results (all-outputs! card :csv true)] + (is (= {:card-download 1048576 + :alert-attachment 1048576 + :dashcard-download 1048576 + :subscription-attachment 1048576} + (update-vals results count)))))))) + (deftest ^:parallel model-viz-settings-downloads-test (testing "A model's visualization settings are respected in downloads." (testing "for csv" diff --git a/test/metabase/pulse_test.clj b/test/metabase/pulse_test.clj index bb2d63dc3a4..724a065a359 100644 --- a/test/metabase/pulse_test.clj +++ b/test/metabase/pulse_test.clj @@ -12,11 +12,11 @@ [metabase.models.permissions :as perms] [metabase.models.permissions-group :as perms-group] [metabase.models.pulse :as pulse] + [metabase.public-settings :as public-settings] [metabase.pulse] [metabase.pulse.render :as render] [metabase.pulse.render.body :as body] [metabase.pulse.test-util :as pulse.test-util] - [metabase.query-processor.middleware.constraints :as qp.constraints] [metabase.test :as mt] [metabase.test.util :as tu] [metabase.util :as u] @@ -325,8 +325,7 @@ :fixture (fn [_ thunk] - (with-redefs [qp.constraints/default-query-constraints (constantly {:max-results 10000 - :max-results-bare-rows 30})] + (mt/with-temporary-setting-values [public-settings/download-row-limit 30] (thunk))) :pulse-card {:include_csv true} :assert diff --git a/test/metabase/query_processor/middleware/limit_test.clj b/test/metabase/query_processor/middleware/limit_test.clj index ea03e7f739a..e70f4a925da 100644 --- a/test/metabase/query_processor/middleware/limit_test.clj +++ b/test/metabase/query_processor/middleware/limit_test.clj @@ -60,3 +60,59 @@ ::limit/original-limit nil}} (#'limit/add-default-limit query)) "Preprocessed query should have :limit added to it")))) + +(deftest ^:parallel download-row-max-results-test + (testing "Apply `absolute-max-results` when `download-row-limit` is not set." + (let [query {:type :query + :query {} + :info {:context :csv-download}}] + (is (= {:type :query + :query {:limit qp.i/absolute-max-results + ::limit/original-limit nil} + :info {:context :csv-download}} + (limit/add-default-limit query)))))) + +(deftest download-row-limit-test + (testing "Apply custom download row limits when" + (doseq [[limit expected context] [[1000 1000 :csv-download] + [1000 1000 :json-download] + [1000 1000 :xlsx-download] + [1100000 1100000 :csv-download] + [1100000 1100000 :json-download] + [1100000 qp.i/absolute-max-results :xlsx-download] + [nil qp.i/absolute-max-results :csv-download] + [nil qp.i/absolute-max-results :json-download] + [nil qp.i/absolute-max-results :xlsx-download]]] + (testing (format "%s the absolute limit for %s" + (if (< expected qp.i/absolute-max-results) + "below" + "above") + context) + (mt/with-temp-env-var-value! [mb-download-row-limit limit] + (is (= expected + (get-in (limit/add-default-limit + {:type :query + :query {} + :info {:context context}}) + [:query :limit]))))))) + (testing "Apply appropriate maximum when download-row-limit is unset, but `(mbql.u/query->max-rows-limit query)` returns a value above absolute-max-results" + (doseq [[limit expected context] [[1000 1000 :csv-download] + [1000 1000 :json-download] + [1000 1000 :xlsx-download] + [1100000 1100000 :csv-download] + [1100000 1100000 :json-download] + [1100000 qp.i/absolute-max-results :xlsx-download]]] + (testing (format "%s the absolute limit for %s" + (if (< expected qp.i/absolute-max-results) + "below" + "above") + context) + (is (= expected + (get-in (limit/add-default-limit + {:type :query + :query {} + ;; setting a constraint here will result in `(mbql.u/query->max-rows-limit query)` returning that limit + ;; so we can use this to check the behaviour of `limit/add-default-limit` when download-row-limit is unset + :constraints (when limit {:max-results-bare-rows limit}) + :info {:context context}}) + [:query :limit]))))))) -- GitLab