Skip to content
Snippets Groups Projects
Unverified Commit 97ed4f4d authored by Cal Herries's avatar Cal Herries Committed by GitHub
Browse files

Lift max results limit for cached models and questions (#24970)

* Add disable-max-results? to query middleware options when refreshing cached card results

* Add docstrings

* Add test for `add-default-limit`
parent fa6ee214
Branches
Tags
No related merge requests found
......@@ -1487,6 +1487,11 @@
(s/optional-key :disable-mbql->native?)
s/Bool
;; Disable applying a default limit on the query results. Handled in the `add-default-limit` middleware.
;; If true, this will override the `:max-results` and `:max-results-bare-rows` values in [[Constraints]].
(s/optional-key :disable-max-results?)
s/Bool
;; Userland queries are ones ran as a result of an API call, Pulse, or the like. Special handling is done in the
;; `process-userland-query` middleware for such queries -- results are returned in a slightly different format, and
;; QueryExecution entries are normally saved, unless you pass `:no-save` as the option.
......
......@@ -41,7 +41,7 @@
(String. ^bytes (codecs/bytes->b64 (qp.util/query-hash query))))
(def ^:dynamic *allow-persisted-substitution*
"Allow persisted substitution. When refreshing, set this to nil to ensure that all undelrying queries are used to
"Allow persisted substitution. When refreshing, set this to nil to ensure that all underlying queries are used to
rebuild the persisted table."
true)
......
......@@ -7,6 +7,16 @@
;;;; Pre-processing
(defn disable-max-results?
"Returns the value of the disable-max-results? option in this query."
[query]
(get-in query [:middleware :disable-max-results?] false))
(defn disable-max-results
"Sets the value of the disable-max-results? option in this query."
[query]
(assoc-in query [:middleware :disable-max-results?] true))
(defn- add-limit [max-rows {query-type :type, {original-limit :limit}, :query, :as query}]
(cond-> query
(and (= query-type :query)
......@@ -14,22 +24,25 @@
(update :query assoc :limit max-rows, ::original-limit original-limit)))
(defn determine-query-max-rows
"Given a `query`, return the max rows that should be returned. This is the first non-nil value from (in decreasing
priority order):
"Given a `query`, return the max rows that should be returned, or `nil` if no limit should be applied.
If a limit should be applied, this is the first non-nil value from (in decreasing priority order):
1. the value of the [[metabase.query-processor.middleware.constraints/max-results-bare-rows]] setting, which allows
for database-local override
2. the output of [[metabase.mbql.util/query->max-rows-limit]] when called on the given query
3. [[metabase.query-processor.interface/absolute-max-results]] (a constant, non-nil backstop value)"
[query]
(or (qp.constraints/max-results-bare-rows)
(mbql.u/query->max-rows-limit query)
qp.i/absolute-max-results))
(when-not (disable-max-results? query)
(or (qp.constraints/max-results-bare-rows)
(mbql.u/query->max-rows-limit query)
qp.i/absolute-max-results)))
(defn add-default-limit
"Pre-processing middleware. Add default `:limit` to MBQL queries without any aggregations."
[query]
(add-limit (determine-query-max-rows query) query))
(if-let [max-rows (determine-query-max-rows query)]
(add-limit max-rows query)
query))
;;;; Post-processing
......
......@@ -17,6 +17,7 @@
[metabase.models.persisted-info :as persisted-info :refer [PersistedInfo]]
[metabase.models.task-history :refer [TaskHistory]]
[metabase.public-settings :as public-settings]
[metabase.query-processor.middleware.limit :as limit]
[metabase.query-processor.timezone :as qp.timezone]
[metabase.task :as task]
[metabase.util :as u]
......@@ -53,7 +54,8 @@
(reify Refresher
(refresh! [_ database definition card]
(binding [persisted-info/*allow-persisted-substitution* false]
(ddl.i/refresh! (:engine database) database definition (:dataset_query card))))
(let [query (limit/disable-max-results (:dataset_query card))]
(ddl.i/refresh! (:engine database) database definition query))))
(unpersist! [_ database persisted-info]
(ddl.i/unpersist! (:engine database) database persisted-info))))
......
......@@ -19,6 +19,20 @@
(is (= test-max-results
(-> (limit {:type :native}) mt/rows count)))))
(deftest disable-max-results-test
(testing "Apply `absolute-max-results` limit in the default case"
(let [query {:type :query
:query {}}]
(is (= {:type :query
:query {:limit qp.i/absolute-max-results
::limit/original-limit nil}}
(limit/add-default-limit query)))))
(testing "Don't apply the `absolute-max-results` limit when `disable-max-results` is used."
(let [query (limit/disable-max-results {:type :query
:query {}})]
(is (= query
(limit/add-default-limit query))))))
(deftest max-results-constraint-test
(testing "Apply an arbitrary max-results on the query and ensure our results size is appropriately constrained"
(is (= 1234
......
......@@ -3,7 +3,11 @@
[clojurewerkz.quartzite.conversion :as qc]
[java-time :as t]
[medley.core :as m]
[metabase.driver.ddl.interface :as ddl.i]
[metabase.models :refer [Card Database PersistedInfo TaskHistory]]
[metabase.models.persisted-info :as persisted-info]
[metabase.query-processor :as qp]
[metabase.query-processor.interface :as qp.i]
[metabase.query-processor.timezone :as qp.timezone]
[metabase.task.persist-refresh :as pr]
[metabase.test :as mt]
......@@ -75,8 +79,8 @@
[& dbs]
(let [ids (into #{} (map u/the-id dbs))]
(m/map-vals
#(select-keys % [:data :schedule :key])
(select-keys (pr/job-info-by-db-id) ids))))
#(select-keys % [:data :schedule :key])
(select-keys (pr/job-info-by-db-id) ids))))
(deftest reschedule-refresh-test
(mt/with-temp-scheduler
......@@ -189,6 +193,34 @@
:task "unpersist-tables"
{:order-by [[:id :desc]]}))))))))
(comment
(run-tests)
)
(deftest persisted-models-max-rows-test
(testing "Persisted models should have the full number of rows of the underlying query,
not limited by `absolute-max-results` (#24793)"
(with-redefs [qp.i/absolute-max-results 3]
(mt/dataset daily-bird-counts
(mt/test-driver :postgres
(mt/with-temporary-setting-values [:persisted-models-enabled true]
(db/update! Database (mt/id) :options {:persist-models-enabled true})
(mt/with-temp* [Card [model {:dataset true
:database_id (mt/id)
:query_type :query
:dataset_query {:database (mt/id)
:type :query
:query {:source-table (mt/id :bird-count)}}}]]
(let [;; Get the number of rows before the model is persisted
query-on-top {:database (mt/id)
:type :query
:query {:aggregation [[:count]]
:source-table (str "card__" (:id model))}}
[[num-rows-query]] (mt/rows (qp/process-query query-on-top))
;; Create PersistedInfo for model
[persisted-info-id] (persisted-info/ready-unpersisted-models! (mt/id))]
;; Persist the model
(ddl.i/check-can-persist (db/select-one Database :id (mt/id)))
(#'pr/refresh-individual! persisted-info-id (var-get #'pr/dispatching-refresher))
;; Check the number of rows is the same after persisting
(let [query-on-top {:database (mt/id)
:type :query
:query {:aggregation [[:count]]
:source-table (str "card__" (:id model))}}]
(is (= [[num-rows-query]] (mt/rows (qp/process-query query-on-top)))))))))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment