Skip to content
Snippets Groups Projects
Unverified Commit aa76efb2 authored by John Swanson's avatar John Swanson Committed by GitHub
Browse files

Query row limit doesn't limit in-product downloads (#37716)

I misunderstood the desired behavior when I implemented this before. We
only want the limit to apply when the `add-default-userland-constraints`
middleware is applied to the query, not when we're downloading the data
directly.

This is a bit of a rat's nest. If there's a better way to approach the
problem, I'd be happy to hear it. I added documentation for the odd approach
as much as possible.

The main difficulty is that the userland middleware does not have access
to db-local settings. So if we want db-local settings to apply only to
userland, we have to hack our way around it.

The solution here is to divide the userland middleware into two parts:

- the actually-userland bit, which just marks the query as needing
default limits, and

- a middleware inserted deeper in the middleware stack (just before we
actually apply limits) that actually calculates those limits and
attaches them to the query.
parent 9166c58e
No related branches found
No related tags found
No related merge requests found
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
[metabase.models :refer [Database]] [metabase.models :refer [Database]]
[metabase.query-processor :as qp] [metabase.query-processor :as qp]
[metabase.query-processor.interface :as qp.i] [metabase.query-processor.interface :as qp.i]
[metabase.query-processor.middleware.constraints :as qp.constraints]
[metabase.query-processor.timezone :as qp.timezone] [metabase.query-processor.timezone :as qp.timezone]
[metabase.test :as mt] [metabase.test :as mt]
[toucan2.tools.with-temp :as t2.with-temp])) [toucan2.tools.with-temp :as t2.with-temp]))
...@@ -401,9 +400,6 @@ ...@@ -401,9 +400,6 @@
"\n" "\n"
"SELECT COUNT(1) FROM @TEMP\n")} "SELECT COUNT(1) FROM @TEMP\n")}
mt/native-query mt/native-query
;; add default query constraints to ensure the default limit of 2000 is overridden by the qp/process-userland-query
;; `:rowcount-override` connection property we defined in the details above
(assoc :constraints (qp.constraints/default-query-constraints))
qp/process-query
mt/rows mt/rows
ffirst)))))))) ffirst))))))))
...@@ -149,6 +149,7 @@ ...@@ -149,6 +149,7 @@
#'auto-parse-filter-values/auto-parse-filter-values #'auto-parse-filter-values/auto-parse-filter-values
#'validate-temporal-bucketing/validate-temporal-bucketing #'validate-temporal-bucketing/validate-temporal-bucketing
#'optimize-temporal-filters/optimize-temporal-filters #'optimize-temporal-filters/optimize-temporal-filters
#'qp.constraints/maybe-add-default-userland-constraints
#'limit/add-default-limit #'limit/add-default-limit
#'qp.middleware.enterprise/apply-download-limit #'qp.middleware.enterprise/apply-download-limit
#'check-features/check-features]) #'check-features/check-features])
...@@ -382,7 +383,7 @@ ...@@ -382,7 +383,7 @@
(f (f query rff context)) -> (f query rff context)" (f (f query rff context)) -> (f query rff context)"
(concat (concat
default-middleware default-middleware
[#'qp.constraints/add-default-userland-constraints [#'qp.constraints/mark-needs-default-userland-constraints
#'process-userland-query/process-userland-query #'process-userland-query/process-userland-query
#'catch-exceptions/catch-exceptions])) #'catch-exceptions/catch-exceptions]))
......
...@@ -43,15 +43,6 @@ ...@@ -43,15 +43,6 @@
:database-local :allowed :database-local :allowed
:audit :getter) :audit :getter)
(defn query->max-rows
"Given a query, returns the max rows that should be returned *as defined by settings*. In other words,
return `(aggregated-query-row-limit)` or `(unaggregated-query-row-limit)` depending on whether the query is
aggregated or not."
[{{aggregations :aggregation} :query}]
(if-not aggregations
(unaggregated-query-row-limit)
(aggregated-query-row-limit)))
(defn default-query-constraints (defn default-query-constraints
"Default map of constraints that we apply on dataset queries executed by the api." "Default map of constraints that we apply on dataset queries executed by the api."
[] []
...@@ -68,16 +59,46 @@ ...@@ -68,16 +59,46 @@
(defn- merge-default-constraints [constraints] (defn- merge-default-constraints [constraints]
(merge (default-query-constraints) constraints)) (merge (default-query-constraints) constraints))
(defn- add-default-userland-constraints* (defn add-constraints
"Add default values of `:max-results` and `:max-results-bare-rows` to `:constraints` map `m`." "Add default values of `:max-results` and `:max-results-bare-rows` to `:constraints` map `m`."
[query]
(update query :constraints (comp ensure-valid-constraints merge-default-constraints)))
(defn- should-add-userland-constraints? [query]
(get-in query [:middleware ::add-userland-constraints?]))
(defn maybe-add-default-userland-constraints
"If the query is marked as requiring userland constraints, actually calculate the constraints and add them to the
query."
[query]
(cond-> query
(should-add-userland-constraints? query) add-constraints))
(defn- mark-needs-default-userland-constraints*
[{{:keys [add-default-userland-constraints?]} :middleware, :as query}] [{{:keys [add-default-userland-constraints?]} :middleware, :as query}]
;; this may seem silly - we're just adding `::add-userland-constraints?` if `add-default-userland-constraints?` is
;; true, why not just use `add-default-userland-constraints?`. Answer: we need to only apply the default constraints
;; when this middleware is in the stack.
(cond-> query (cond-> query
add-default-userland-constraints? (update :constraints (comp ensure-valid-constraints merge-default-constraints)))) add-default-userland-constraints? (update :middleware assoc ::add-userland-constraints? true)))
(defn mark-needs-default-userland-constraints
"Middleware that marks the query as requiring userland constraints. Note that we can't actually calculate the
constraints yet, because of this middleware's position in the middleware stack (we don't yet have access to db-local
settings). So this is a bit awkward, because *three* separate middlewares are involved in the calculation of default
userland constraints: here, we mark the query as needing userland constraints, then later the
`maybe-add-default-userland-constraints` middleware actually calculates the relevant constraints. Finally, the
constraints, placed into `max-results` and `max-results-bare-rows` are *actually* used
by [[metabase.query-processor/process-query-and-save-with-max-results-constraints!]], which ultimately powers most
QP API endpoints.
To sum up:
- `mark-needs-default-userland-constraints` should be part of the middleware stack for queries from the UI, when we
want to apply default userland constraints.
(defn add-default-userland-constraints - `maybe-add-default-userland-constraints` can always be part of the middleware stack - it will be a no-op if
"Middleware that optionally adds default `max-results` and `max-results-bare-rows` constraints to queries, meant for `mark-needs-default-userland-constraints` was not *also* part of the middleware stack."
use with [[metabase.query-processor/process-query-and-save-with-max-results-constraints!]], which ultimately powers
most QP API endpoints."
[qp] [qp]
(fn [query rff context] (fn [query rff context]
(qp (add-default-userland-constraints* query) rff context))) (qp (mark-needs-default-userland-constraints* query) rff context)))
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
(:require (:require
[metabase.mbql.util :as mbql.u] [metabase.mbql.util :as mbql.u]
[metabase.query-processor.interface :as qp.i] [metabase.query-processor.interface :as qp.i]
[metabase.query-processor.middleware.constraints :as qp.constraints]
[metabase.query-processor.util :as qp.util])) [metabase.query-processor.util :as qp.util]))
;;;; Pre-processing ;;;; Pre-processing
...@@ -25,17 +24,12 @@ ...@@ -25,17 +24,12 @@
(update :query assoc :limit max-rows, ::original-limit original-limit))) (update :query assoc :limit max-rows, ::original-limit original-limit)))
(defn determine-query-max-rows (defn determine-query-max-rows
"Given a `query`, return the max rows that should be returned, or `nil` if no limit should be applied. "Given a `query`, return the max rows that should be returned. This is either:
If a limit should be applied, this is the first non-nil value from (in decreasing priority order): 1. the output of [[metabase.mbql.util/query->max-rows-limit]] when called on the given query
2. [[metabase.query-processor.interface/absolute-max-results]] (a constant, non-nil backstop value)"
1. the value of the [[metabase.query-processor.middleware.constraints/query->max-rows]] setting, which allows
for database-local override
2. the output of [[metabase.mbql.util/query->max-rows-limit]] when called on the given query
3. [[metabase.query-processor.interface/absolute-max-results]] (a constant, non-nil backstop value)"
[query] [query]
(when-not (disable-max-results? query) (when-not (disable-max-results? query)
(or (qp.constraints/query->max-rows query) (or (mbql.u/query->max-rows-limit query)
(mbql.u/query->max-rows-limit query)
qp.i/absolute-max-results))) qp.i/absolute-max-results)))
(defn add-default-limit (defn add-default-limit
...@@ -45,7 +39,6 @@ ...@@ -45,7 +39,6 @@
(add-limit max-rows query) (add-limit max-rows query)
query)) query))
;;;; Post-processing ;;;; Post-processing
(defn- limit-xform [max-rows rf] (defn- limit-xform [max-rows rf]
......
...@@ -5,7 +5,9 @@ ...@@ -5,7 +5,9 @@
[metabase.test :as mt])) [metabase.test :as mt]))
(defn- add-default-userland-constraints [query] (defn- add-default-userland-constraints [query]
(:pre (mt/test-qp-middleware qp.constraints/add-default-userland-constraints query))) (qp.constraints/maybe-add-default-userland-constraints
(:pre
(mt/test-qp-middleware qp.constraints/mark-needs-default-userland-constraints query))))
(deftest ^:parallel no-op-without-middleware-options-test (deftest ^:parallel no-op-without-middleware-options-test
(testing "don't do anything to queries without [:middleware :add-default-userland-constraints?] set" (testing "don't do anything to queries without [:middleware :add-default-userland-constraints?] set"
...@@ -14,7 +16,8 @@ ...@@ -14,7 +16,8 @@
(deftest ^:parallel add-constraints-test (deftest ^:parallel add-constraints-test
(testing "if it is *truthy* add the constraints" (testing "if it is *truthy* add the constraints"
(is (= {:middleware {:add-default-userland-constraints? true}, (is (= {:middleware {:add-default-userland-constraints? true
::qp.constraints/add-userland-constraints? true}
:constraints {:max-results @#'qp.constraints/default-aggregated-query-row-limit :constraints {:max-results @#'qp.constraints/default-aggregated-query-row-limit
:max-results-bare-rows @#'qp.constraints/default-unaggregated-query-row-limit}} :max-results-bare-rows @#'qp.constraints/default-unaggregated-query-row-limit}}
(add-default-userland-constraints (add-default-userland-constraints
...@@ -28,7 +31,8 @@ ...@@ -28,7 +31,8 @@
(deftest ^:parallel dont-overwrite-existing-constraints-test (deftest ^:parallel dont-overwrite-existing-constraints-test
(testing "if it already has constraints, don't overwrite those!" (testing "if it already has constraints, don't overwrite those!"
(is (= {:middleware {:add-default-userland-constraints? true} (is (= {:middleware {:add-default-userland-constraints? true
::qp.constraints/add-userland-constraints? true}
:constraints {:max-results @#'qp.constraints/default-aggregated-query-row-limit :constraints {:max-results @#'qp.constraints/default-aggregated-query-row-limit
:max-results-bare-rows 1}} :max-results-bare-rows 1}}
(add-default-userland-constraints (add-default-userland-constraints
...@@ -37,15 +41,18 @@ ...@@ -37,15 +41,18 @@
(deftest ^:parallel max-results-bare-rows-should-be-less-than-max-results-test (deftest ^:parallel max-results-bare-rows-should-be-less-than-max-results-test
(testing "if you specify just `:max-results` it should make sure `:max-results-bare-rows` is <= `:max-results`" (testing "if you specify just `:max-results` it should make sure `:max-results-bare-rows` is <= `:max-results`"
(is (= {:middleware {:add-default-userland-constraints? true} (is (= {:middleware {:add-default-userland-constraints? true
::qp.constraints/add-userland-constraints? true}
:constraints {:max-results 5 :constraints {:max-results 5
:max-results-bare-rows 5}} :max-results-bare-rows 5}}
(add-default-userland-constraints (add-default-userland-constraints
{:constraints {:max-results 5} {:constraints {:max-results 5}
:middleware {:add-default-userland-constraints? true}})))) :middleware {:add-default-userland-constraints? true
::qp.constraints/add-userland-constraints? true}}))))
(testing "if you specify both it should still make sure `:max-results-bare-rows` is <= `:max-results`" (testing "if you specify both it should still make sure `:max-results-bare-rows` is <= `:max-results`"
(is (= {:middleware {:add-default-userland-constraints? true} (is (= {:middleware {:add-default-userland-constraints? true
::qp.constraints/add-userland-constraints? true}
:constraints {:max-results 5 :constraints {:max-results 5
:max-results-bare-rows 5}} :max-results-bare-rows 5}}
(add-default-userland-constraints (add-default-userland-constraints
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment