From 47d9fc19c58034b7522e63628ce7669d3b3e73c2 Mon Sep 17 00:00:00 2001
From: adam-james <21064735+adam-james-v@users.noreply.github.com>
Date: Mon, 12 Aug 2024 07:57:52 -0700
Subject: [PATCH] Dowload Row Limit Env Var (#46401)

* Dowload Row Limit Env Var

Adds `MB_DOWNLOAD_ROW_LIMIT` to enable changing the row limit on downloads and alert/subscription attachments (not the
rendered tables, but the .csv, etc.).

Based on: #44982 (Thanks, @r-kot)

Partially implements: #28144

The difference in this PR compared to #44982 is that the download limit applies to all downloads, the only exception
being when the limit is above `qp.i/absolute-max-results` (1048575, based on Excel's limitation); in such a case, the
user supplied limit is only used if the download is csv or json, and `qp.i/absolute-max-results` is used for xlsx.

This PR also fixes alert/subscription attachment limits; prior to this, they were set to the in-app limit of 2000
rows, but now they will follow the user supplied download-row-limit.

This PR also adds a test to the downloads-and-exports test namespace, confirming that they follow the supplied limit,
or the max limit if none is supplied.

* add test confirming the default limit works

* fix test to use the download-row-limit

* address review feedback

* Update src/metabase/public_settings.clj

Co-authored-by: Cal Herries <39073188+calherries@users.noreply.github.com>

* add test covering case where download-row-limit is unset

---------

Co-authored-by: Cal Herries <39073188+calherries@users.noreply.github.com>
---
 src/metabase/public_settings.clj              |  7 +++
 src/metabase/pulse/util.clj                   | 37 ++++++------
 .../query_processor/middleware/limit.clj      | 18 +++++-
 test/metabase/api/downloads_exports_test.clj  | 37 ++++++++++++
 test/metabase/pulse_test.clj                  |  5 +-
 .../query_processor/middleware/limit_test.clj | 56 +++++++++++++++++++
 6 files changed, 137 insertions(+), 23 deletions(-)

diff --git a/src/metabase/public_settings.clj b/src/metabase/public_settings.clj
index 8e41b4eb61e..7f96368d0f9 100644
--- a/src/metabase/public_settings.clj
+++ b/src/metabase/public_settings.clj
@@ -942,3 +942,10 @@ See [fonts](../configuring-metabase/fonts.md).")
   :export?    false
   :default    true
   :type       :boolean)
+
+(defsetting download-row-limit
+  (deferred-tru "Exports row limit excluding the header. xlsx downloads are limited to 1048575 rows even if this limit is higher.")
+  :visibility :internal
+  :export?    true
+  :type       :integer
+  :doc false)
diff --git a/src/metabase/pulse/util.clj b/src/metabase/pulse/util.clj
index 68f268e904a..dc867655939 100644
--- a/src/metabase/pulse/util.clj
+++ b/src/metabase/pulse/util.clj
@@ -27,10 +27,11 @@
               process-query (fn []
                               (binding [qp.perms/*card-id* card-id]
                                 (qp/process-query
-                                 (qp/userland-query-with-default-constraints
-                                  (assoc query :middleware {:skip-results-metadata? true
-                                                            :process-viz-settings?  true
-                                                            :js-int-to-string?      false})
+                                 (qp/userland-query
+                                  (assoc query :middleware {:skip-results-metadata?            true
+                                                            :process-viz-settings?             true
+                                                            :js-int-to-string?                 false
+                                                            :add-default-userland-constraints? false})
                                   (merge (cond-> {:executed-by pulse-creator-id
                                                   :context     :pulse
                                                   :card-id     card-id}
@@ -75,19 +76,21 @@
                                           :dashcard dashcard
                                           :type     :card
                                           :result   (qp.dashboard/process-query-for-dashcard
-                                                      :dashboard-id  dashboard-id
-                                                      :card-id       card-id
-                                                      :dashcard-id   (u/the-id dashcard)
-                                                      :context       :dashboard-subscription
-                                                      :export-format :api
-                                                      :parameters    parameters
-                                                      :middleware    {:process-viz-settings? true
-                                                                      :js-int-to-string?     false}
-                                                      :make-run      (fn make-run [qp _export-format]
-                                                                       (^:once fn* [query info]
-                                                                               (qp
-                                                                                 (qp/userland-query-with-default-constraints query info)
-                                                                                 nil))))})
+                                                     :dashboard-id  dashboard-id
+                                                     :card-id       card-id
+                                                     :dashcard-id   (u/the-id dashcard)
+                                                     :context       :dashboard-subscription
+                                                     :export-format :api
+                                                     :parameters    parameters
+                                                     :constraints   {}
+                                                     :middleware    {:process-viz-settings?             true
+                                                                     :js-int-to-string?                 false
+                                                                     :add-default-userland-constraints? false}
+                                                     :make-run      (fn make-run [qp _export-format]
+                                                                      (^:once fn* [query info]
+                                                                       (qp
+                                                                        (qp/userland-query query info)
+                                                                        nil))))})
           result                       (result-fn card-id)
           series-results               (map (comp result-fn :id) multi-cards)]
       (when-not (and (get-in dashcard [:visualization_settings :card.hide_empty])
diff --git a/src/metabase/query_processor/middleware/limit.clj b/src/metabase/query_processor/middleware/limit.clj
index 45c9bb8d612..9027554fa11 100644
--- a/src/metabase/query_processor/middleware/limit.clj
+++ b/src/metabase/query_processor/middleware/limit.clj
@@ -2,6 +2,7 @@
   "Middleware that handles limiting the maximum number of rows returned by a query."
   (:require
    [metabase.legacy-mbql.util :as mbql.u]
+   [metabase.public-settings :as public-settings]
    [metabase.query-processor.interface :as qp.i]
    [metabase.query-processor.util :as qp.util]))
 
@@ -23,14 +24,25 @@
          (qp.util/query-without-aggregations-or-limits? query))
     (update :query assoc :limit max-rows, ::original-limit original-limit)))
 
+(defn- xlsx-export?
+  [& {info :info}]
+  (let [context (:context info)]
+    (= context :xlsx-download)))
+
 (defn determine-query-max-rows
   "Given a `query`, return the max rows that should be returned. This is either:
   1. the output of [[metabase.legacy-mbql.util/query->max-rows-limit]] when called on the given query
-  2. [[metabase.query-processor.interface/absolute-max-results]] (a constant, non-nil backstop value)"
+  2. the value of `pubic-settings/download-row-limit`
+     a. if it is less than [[metabase.query-processor.interface/absolute-max-results]] or
+     b. if it is greater and the export is NOT xlsx otherwise,
+  3. [[metabase.query-processor.interface/absolute-max-results]] (a constant, non-nil backstop value)"
   [query]
   (when-not (disable-max-results? query)
-    (or (mbql.u/query->max-rows-limit query)
-        qp.i/absolute-max-results)))
+    (cond-> (or (mbql.u/query->max-rows-limit query)
+                (public-settings/download-row-limit)
+                qp.i/absolute-max-results)
+      (xlsx-export? query)
+      (min qp.i/absolute-max-results))))
 
 (defn add-default-limit
   "Pre-processing middleware. Add default `:limit` to MBQL queries without any aggregations."
diff --git a/test/metabase/api/downloads_exports_test.clj b/test/metabase/api/downloads_exports_test.clj
index bd5bb8aef91..a921a43d9aa 100644
--- a/test/metabase/api/downloads_exports_test.clj
+++ b/test/metabase/api/downloads_exports_test.clj
@@ -15,6 +15,7 @@
    [clojure.set :as set]
    [clojure.test :refer :all]
    [dk.ative.docjure.spreadsheet :as spreadsheet]
+   [metabase.public-settings :as public-settings]
    [metabase.pulse :as pulse]
    [metabase.pulse.test-util :as pulse.test-util]
    [metabase.query-processor.streaming.csv :as qp.csv]
@@ -151,6 +152,15 @@
                                                      :user_id          (mt/user->id :rasta)}]
         (subscription-attachment* pulse)))))
 
+(defn all-outputs!
+  [card-or-dashcard export-format format-rows?]
+  (merge
+   (when-not (contains? card-or-dashcard :dashboard_id)
+     {:card-download    (card-download card-or-dashcard export-format format-rows?)
+      :alert-attachment (alert-attachment! card-or-dashcard export-format format-rows?)})
+   {:dashcard-download       (card-download card-or-dashcard export-format format-rows?)
+    :subscription-attachment (subscription-attachment! card-or-dashcard export-format format-rows?)}))
+
 (set! *warn-on-reflection* true)
 
 (def ^:private pivot-rows-query
@@ -618,6 +628,33 @@
                    {:alert-attachment        (first alert-result)
                     :subscription-attachment (first subscription-result)}))))))))
 
+(deftest downloads-row-limit-test
+  (testing "Downloads row limit works."
+    (mt/with-temporary-setting-values [public-settings/download-row-limit 1050000]
+      (mt/dataset test-data
+        (mt/with-temp [:model/Card card {:display       :table
+                                         :dataset_query {:database (mt/id)
+                                                         :type     :native
+                                                         :native   {:query "SELECT 1 as A FROM generate_series(1,1100000);"}}}]
+          (let [results (all-outputs! card :csv true)]
+            (is (= {:card-download           1050001
+                    :alert-attachment        1050001
+                    :dashcard-download       1050001
+                    :subscription-attachment 1050001}
+                     (update-vals results count))))))))
+  (testing "Downloads row limit default works."
+    (mt/dataset test-data
+      (mt/with-temp [:model/Card card {:display       :table
+                                       :dataset_query {:database (mt/id)
+                                                       :type     :native
+                                                       :native   {:query "SELECT 1 as A FROM generate_series(1,1100000);"}}}]
+        (let [results (all-outputs! card :csv true)]
+          (is (= {:card-download           1048576
+                  :alert-attachment        1048576
+                  :dashcard-download       1048576
+                  :subscription-attachment 1048576}
+                 (update-vals results count))))))))
+
 (deftest ^:parallel model-viz-settings-downloads-test
   (testing "A model's visualization settings are respected in downloads."
     (testing "for csv"
diff --git a/test/metabase/pulse_test.clj b/test/metabase/pulse_test.clj
index bb2d63dc3a4..724a065a359 100644
--- a/test/metabase/pulse_test.clj
+++ b/test/metabase/pulse_test.clj
@@ -12,11 +12,11 @@
    [metabase.models.permissions :as perms]
    [metabase.models.permissions-group :as perms-group]
    [metabase.models.pulse :as pulse]
+   [metabase.public-settings :as public-settings]
    [metabase.pulse]
    [metabase.pulse.render :as render]
    [metabase.pulse.render.body :as body]
    [metabase.pulse.test-util :as pulse.test-util]
-   [metabase.query-processor.middleware.constraints :as qp.constraints]
    [metabase.test :as mt]
    [metabase.test.util :as tu]
    [metabase.util :as u]
@@ -325,8 +325,7 @@
 
       :fixture
       (fn [_ thunk]
-        (with-redefs [qp.constraints/default-query-constraints (constantly {:max-results           10000
-                                                                            :max-results-bare-rows 30})]
+        (mt/with-temporary-setting-values [public-settings/download-row-limit 30]
           (thunk)))
       :pulse-card {:include_csv true}
       :assert
diff --git a/test/metabase/query_processor/middleware/limit_test.clj b/test/metabase/query_processor/middleware/limit_test.clj
index ea03e7f739a..e70f4a925da 100644
--- a/test/metabase/query_processor/middleware/limit_test.clj
+++ b/test/metabase/query_processor/middleware/limit_test.clj
@@ -60,3 +60,59 @@
                             ::limit/original-limit nil}}
              (#'limit/add-default-limit query))
           "Preprocessed query should have :limit added to it"))))
+
+(deftest ^:parallel download-row-max-results-test
+  (testing "Apply `absolute-max-results` when `download-row-limit` is not set."
+    (let [query {:type  :query
+                 :query {}
+                 :info  {:context :csv-download}}]
+      (is (= {:type  :query
+              :query {:limit                 qp.i/absolute-max-results
+                      ::limit/original-limit nil}
+              :info  {:context :csv-download}}
+              (limit/add-default-limit query))))))
+
+(deftest download-row-limit-test
+  (testing "Apply custom download row limits when"
+    (doseq [[limit expected context] [[1000 1000 :csv-download]
+                                      [1000 1000 :json-download]
+                                      [1000 1000 :xlsx-download]
+                                      [1100000 1100000 :csv-download]
+                                      [1100000 1100000 :json-download]
+                                      [1100000 qp.i/absolute-max-results :xlsx-download]
+                                      [nil qp.i/absolute-max-results :csv-download]
+                                      [nil qp.i/absolute-max-results :json-download]
+                                      [nil qp.i/absolute-max-results :xlsx-download]]]
+      (testing (format "%s the absolute limit for %s"
+                       (if (< expected qp.i/absolute-max-results)
+                         "below"
+                         "above")
+                       context)
+          (mt/with-temp-env-var-value! [mb-download-row-limit limit]
+            (is (= expected
+                   (get-in (limit/add-default-limit
+                            {:type  :query
+                             :query {}
+                             :info  {:context context}})
+                           [:query :limit])))))))
+  (testing "Apply appropriate maximum when download-row-limit is unset, but `(mbql.u/query->max-rows-limit query)` returns a value above absolute-max-results"
+    (doseq [[limit expected context] [[1000 1000 :csv-download]
+                                      [1000 1000 :json-download]
+                                      [1000 1000 :xlsx-download]
+                                      [1100000 1100000 :csv-download]
+                                      [1100000 1100000 :json-download]
+                                      [1100000 qp.i/absolute-max-results :xlsx-download]]]
+      (testing (format "%s the absolute limit for %s"
+                       (if (< expected qp.i/absolute-max-results)
+                         "below"
+                         "above")
+                       context)
+          (is (= expected
+                 (get-in (limit/add-default-limit
+                          {:type        :query
+                           :query       {}
+                           ;; setting a constraint here will result in `(mbql.u/query->max-rows-limit query)` returning that limit
+                           ;; so we can use this to check the behaviour of `limit/add-default-limit` when download-row-limit is unset
+                           :constraints (when limit {:max-results-bare-rows limit})
+                           :info        {:context context}})
+                         [:query :limit])))))))
-- 
GitLab