Skip to content
Snippets Groups Projects
Unverified Commit 03434555 authored by Cam Saul's avatar Cam Saul Committed by GitHub
Browse files

Split QP middleware into different groups (#19935)

parent 5d5b33a1
No related branches found
No related tags found
No related merge requests found
Showing
with 795 additions and 730 deletions
......@@ -491,13 +491,14 @@
(defmulti mbql->native
"Transpile an MBQL query into the appropriate native query form. `query` will match the schema for an MBQL query in
`metabase.mbql.schema/Query`; this function should return a native query that conforms to that schema.
[[metabase.mbql.schema/Query]]; this function should return a native query that conforms to that schema.
If the underlying query language supports remarks or comments, the driver should use `query->remark` to generate an
appropriate message and include that in an appropriate place; alternatively a driver might directly include the
query's `:info` dictionary if the underlying language is JSON-based.
If the underlying query language supports remarks or comments, the driver should
use [[metabase.query-processor.util/query->remark]] to generate an appropriate message and include that in an
appropriate place; alternatively a driver might directly include the query's `:info` dictionary if the underlying
language is JSON-based.
The result of this function will be passed directly into calls to `execute-reducible-query`.
The result of this function will be passed directly into calls to [[execute-reducible-query]].
For example, a driver like Postgres would build a valid SQL expression and return a map such as:
......
......@@ -74,64 +74,124 @@
[column-level-perms-check :as ee.sandbox.columns]
[row-level-restrictions :as ee.sandbox.rows]]))
;; ▼▼▼ POST-PROCESSING ▼▼▼ happens from TOP-TO-BOTTOM
(def default-middleware
"The default set of middleware applied to queries ran via `process-query`."
[#'mbql-to-native/mbql->native ; EXECUTE
#'check-features/check-features ; PRE
#'limit/limit-result-rows-middleware ; POST
#'limit/add-default-limit-middleware ; PRE
#'cache/maybe-return-cached-results ; EXECUTE
#'optimize-temporal-filters/optimize-temporal-filters ; PRE
#'validate-temporal-bucketing/validate-temporal-bucketing ; PRE
#'auto-parse-filter-values/auto-parse-filter-values ; PRE
#'wrap-value-literals/wrap-value-literals ; PRE
#'annotate/add-column-info ; POST
#'perms/check-query-permissions ; EXECUTE
#'pre-alias-ags/pre-alias-aggregations ; PRE
#'cumulative-ags/sum-cumulative-aggregation-columns-middleware ; POST
#'cumulative-ags/rewrite-cumulative-aggregations-middleware ; PRE
(def ^:private pre-processing-middleware
"Pre-processing middleware. Has the form
(f query) -> query
in 43+."
[#'check-features/check-features
#'limit/add-default-limit-middleware
#'optimize-temporal-filters/optimize-temporal-filters
#'validate-temporal-bucketing/validate-temporal-bucketing
#'auto-parse-filter-values/auto-parse-filter-values
#'wrap-value-literals/wrap-value-literals
#'pre-alias-ags/pre-alias-aggregations
#'cumulative-ags/rewrite-cumulative-aggregations-middleware
;; yes, this is called a second time, because we need to handle any joins that got added
(resolve 'ee.sandbox.rows/merge-sandboxing-metadata-middleware) ; POST
(resolve 'ee.sandbox.rows/apply-sandboxing-middleware) ; PRE
#'viz-settings/update-viz-settings ; POST
#'fix-bad-refs/fix-bad-references-middleware ; PRE
#'resolve-joined-fields/resolve-joined-fields ; PRE
#'resolve-joins/resolve-joins ; PRE
#'add-implicit-joins/add-implicit-joins ; PRE
#'large-int-id/convert-id-to-string ; POST
#'format-rows/format-rows ; POST
#'add-default-temporal-unit/add-default-temporal-unit ; PRE
#'desugar/desugar ; PRE
#'binning/update-binning-strategy ; PRE
#'resolve-fields/resolve-fields ; PRE
#'add-dim/remap-results-middleware ; POST
#'add-dim/add-remapped-columns-middleware ; PRE
#'implicit-clauses/add-implicit-clauses ; PRE
(resolve 'ee.sandbox.rows/merge-sandboxing-metadata-middleware) ; POST
(resolve 'ee.sandbox.rows/apply-sandboxing-middleware) ; PRE
#'upgrade-field-literals/upgrade-field-literals ; PRE
#'add-source-metadata/add-source-metadata-for-source-queries ; PRE
(resolve 'ee.sandbox.columns/maybe-apply-column-level-perms-check) ; EXECUTE
#'reconcile-bucketing/reconcile-breakout-and-order-by-bucketing ; PRE
#'bucket-datetime/auto-bucket-datetimes ; PRE
#'resolve-source-table/resolve-source-tables ; PRE
#'parameters/substitute-parameters ; PRE
#'resolve-referenced/resolve-referenced-card-resources ; PRE
#'expand-macros/expand-macros ; PRE
#'add-timezone-info/add-timezone-info ; POST
#'splice-params-in-response/splice-params-in-response ; POST
#'resolve-database-and-driver/resolve-database-and-driver ; AROUND
#'fetch-source-query/resolve-card-id-source-tables ; PRE
#'store/initialize-store ; AROUND
#'validate/validate-query ; PRE
#'perms/remove-permissions-key-middleware ; PRE
#'normalize/normalize ; PRE
#'add-rows-truncated/add-rows-truncated ; POST
(resolve 'ee.audit/handle-internal-queries) ; AROUND
#'results-metadata/record-and-return-metadata!]) ; EXECUTE
(resolve 'ee.sandbox.rows/apply-sandboxing-middleware)
#'fix-bad-refs/fix-bad-references-middleware
#'resolve-joined-fields/resolve-joined-fields
#'resolve-joins/resolve-joins
#'add-implicit-joins/add-implicit-joins
#'add-default-temporal-unit/add-default-temporal-unit
#'desugar/desugar
#'binning/update-binning-strategy
#'resolve-fields/resolve-fields
#'add-dim/add-remapped-columns-middleware
#'implicit-clauses/add-implicit-clauses
(resolve 'ee.sandbox.rows/apply-sandboxing-middleware)
#'upgrade-field-literals/upgrade-field-literals
#'add-source-metadata/add-source-metadata-for-source-queries
#'reconcile-bucketing/reconcile-breakout-and-order-by-bucketing
#'bucket-datetime/auto-bucket-datetimes
#'resolve-source-table/resolve-source-tables
#'parameters/substitute-parameters
#'resolve-referenced/resolve-referenced-card-resources
#'expand-macros/expand-macros
#'validate/validate-query
#'perms/remove-permissions-key-middleware])
;; ▲▲▲ PRE-PROCESSING ▲▲▲ happens from BOTTOM-TO-TOP
(def ^:private compile-middleware
"Middleware for query compilation. Happens after pre-processing. Has the form
(f query) -> query
in 43+."
[#'mbql-to-native/mbql->native])
(def ^:private execution-middleware
"Middleware that happens after compilation, AROUND query execution itself. Has the form
(f qp) -> qp
Where `qp` has the form
(f query rff context)"
;; TODO -- limit SEEMS like it should be post-processing but it actually has to happen only if we don't return cached
;; results. Otherwise things break. There's probably some way to fix this. e.g. maybe it doesn't do anything if the
;; query has the `:cached?` key.
[#'limit/limit-result-rows-middleware
#'cache/maybe-return-cached-results
#'perms/check-query-permissions
(resolve 'ee.sandbox.columns/maybe-apply-column-level-perms-check)])
(def ^:private post-processing-middleware
"Post-processing middleware that transforms results. Has the form
(f preprocessed-query rff) -> rff
Where `rff` has the form
(f metadata) -> rf"
;; ▼▼▼ POST-PROCESSING ▼▼▼ happens from TOP-TO-BOTTOM
[#'annotate/add-column-info
#'cumulative-ags/sum-cumulative-aggregation-columns-middleware
#'viz-settings/update-viz-settings
#'large-int-id/convert-id-to-string
#'format-rows/format-rows
#'add-dim/remap-results-middleware
(resolve 'ee.sandbox.rows/merge-sandboxing-metadata-middleware)
#'add-timezone-info/add-timezone-info
#'splice-params-in-response/splice-params-in-response
#'add-rows-truncated/add-rows-truncated-middleware])
(def ^:private around-middleware
"Middleware that goes AROUND *all* the other middleware (even for pre-processing only or compilation only). Has the
form
(f qp) -> qp
Where `qp` has the form
(f query rff context)"
[#'resolve-database-and-driver/resolve-database-and-driver
#'fetch-source-query/resolve-card-id-source-tables
#'store/initialize-store
;; `normalize` has to be done at the very beginning or `resolve-card-id-source-tables` and the like might not work.
;; It doesn't really need to be 'around' middleware tho.
#'normalize/normalize
(resolve 'ee.audit/handle-internal-queries)
;; TODO -- I think this is actually supposed to be post-processing middleware? #idk¿?
#'results-metadata/record-and-return-metadata!])
;; query -> preprocessed = around + pre-process
;; query -> native = around + pre-process + compile
;; query -> results = around + pre-process + compile + execute + post-process = default-middleware
(def default-middleware
"The default set of middleware applied to queries ran via [[process-query]]."
(into
[]
(comp cat (keep identity))
[execution-middleware ; → → execute → → ↓
compile-middleware ; ↑ compile ↓
post-processing-middleware ; ↑ ↓ post-process
pre-processing-middleware ; ↑ pre-process ↓
around-middleware])) ; ↑ query ↓ results
;; In REPL-based dev rebuild the QP every time it is called; this way we don't need to reload this namespace when
;; middleware is changed. Outside of dev only build the QP once for performance/locality
(defn- base-qp [middleware]
......
(ns metabase.query-processor.middleware.add-rows-truncated
"Adds `:rows_truncated` to the query results if the results were truncated because of the query's constraints."
(:require [metabase.query-processor.interface :as i]
[metabase.query-processor.util :as qputil]))
[metabase.query-processor.middleware.forty-three :as m.43]
[metabase.query-processor.middleware.limit :as limit]))
(defn- results-limit [{{:keys [max-results max-results-bare-rows]} :constraints, :as query}]
(or (when (qputil/query-without-aggregations-or-limits? query)
(defn- results-limit
[{{:keys [max-results max-results-bare-rows]} :constraints
{aggregations :aggregation, :keys [limit page], ::limit/keys [original-limit]} :query
:as query}]
(or (when (and (or (not limit)
(= original-limit nil))
(not page)
(empty? aggregations))
max-results-bare-rows)
max-results
i/absolute-max-results))
......@@ -26,13 +33,12 @@
(vswap! row-count inc)
(rf result row)))))
(defn add-rows-truncated
(defn- add-rows-truncated [query rff]
(fn add-rows-truncated-rff* [metadata]
(add-rows-truncated-xform (results-limit query) (rff metadata))))
(def add-rows-truncated-middleware
"Add `:rows_truncated` to the result if the results were truncated because of the query's constraints. Only affects QP
results that are reduced to a map (e.g. the default reducing function; other reducing functions such as streaming to
a CSV are unaffected.)"
[qp]
(fn [query rff context]
(qp query
(fn [metadata]
(add-rows-truncated-xform (results-limit query) (rff metadata)))
context)))
(m.43/wrap-43-post-processing-middleware add-rows-truncated))
......@@ -14,8 +14,8 @@
(f qp) -> (f query rff context)"
[f]
(fn [qp]
(fn [query rff context]
(fn wrap-43-pre-processing-middleware-qp* [qp]
(fn wrap-43-pre-processing-middleware-fn* [query rff context]
(qp (f query) rff context))))
(defn wrap-43-post-processing-middleware
......@@ -31,6 +31,6 @@
(f qp) -> (f query rff context)"
[f]
(fn [qp]
(fn [query rff context]
(fn wrap-43-post-processing-middleware-qp* [qp]
(fn wrap-43-post-processing-middleware-fn* [query rff context]
(qp query (f query rff) context))))
......@@ -8,11 +8,11 @@
;;;; Pre-processing
(defn- add-limit [max-rows {query-type :type, :as query}]
(defn- add-limit [max-rows {query-type :type, {original-limit :limit}, :query, :as query}]
(cond-> query
(and (= query-type :query)
(qputil/query-without-aggregations-or-limits? query))
(assoc-in [:query :limit] max-rows)))
(update :query assoc :limit max-rows, ::original-limit original-limit)))
(defn determine-query-max-rows
"Given a `query`, return the max rows that should be returned. This is the first non-nil value from (in decreasing
......
......@@ -4,12 +4,12 @@
as a checksum in the API response."
(:require [clojure.tools.logging :as log]
[metabase.driver :as driver]
[metabase.query-processor.middleware.forty-three :as m.43]
[metabase.query-processor.reducible :as qp.reducible]
[metabase.sync.analyze.query-results :as analyze.results]
[metabase.util.i18n :refer [tru]]
[toucan.db :as db]))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | Middleware |
;;; +----------------------------------------------------------------------------------------------------------------+
......@@ -67,16 +67,15 @@
(rf (cond-> result
(map? result)
(update :data assoc
:results_metadata {:columns metadata}
:results_metadata {:columns metadata}
:insights insights)))))))
(defn record-and-return-metadata!
(defn- record-and-return-metadata!*
[{{:keys [skip-results-metadata?]} :middleware, :as query} rff]
(let [record! (partial record-metadata! query)]
(fn record-and-return-metadata!-rff* [metadata]
(insights-xform metadata record! (rff metadata)))))
(def record-and-return-metadata!
"Middleware that records metadata about the columns returned when running the query."
[qp]
(fn [{{:keys [skip-results-metadata?]} :middleware, :as query} rff context]
(if skip-results-metadata?
(qp query rff context)
(let [record! (partial record-metadata! query)
rff' (fn [metadata]
(insights-xform metadata record! (rff metadata)))]
(qp query rff' context)))))
(m.43/wrap-43-post-processing-middleware record-and-return-metadata!*))
......@@ -41,6 +41,8 @@
[middleware]
(reduce
(fn [qp middleware]
(when (var? middleware)
(assert (not (:private (meta middleware))) (format "%s is private" (pr-str middleware))))
(if (some? middleware)
(middleware qp)
qp))
......@@ -194,7 +196,7 @@
{:pre [(fn? primary-rf) (sequential? additional-rfs) (every? fn? additional-rfs) (fn? combine)]}
(let [additional-accs (volatile! (mapv (fn [rf] (rf))
additional-rfs))]
(fn
(fn combine-additional-reducing-fns-rf*
([] (primary-rf))
([acc]
......
......@@ -9,7 +9,7 @@
[metabase.util.schema :as su]
[schema.core :as s]))
;; TODO - I think most of the functions in this namespace that we don't remove could be moved to `metabase.mbql.util`
;; TODO - I think most of the functions in this namespace that we don't remove could be moved to [[metabase.mbql.util]]
(defn ^:deprecated mbql-query? ;; not really needed anymore since we don't need to normalize tokens
"Is the given query an MBQL query?
......@@ -22,7 +22,7 @@
[{{aggregations :aggregation, :keys [limit page]} :query}]
(and (not limit)
(not page)
(nil? aggregations)))
(empty? aggregations)))
(defn default-query->remark
"Generates the default query remark. Exists as a separate function so that overrides of the query->remark multimethod
......@@ -39,7 +39,7 @@
(defmulti query->remark
"Generate an appropriate remark `^String` to be prepended to a query to give DBAs additional information about the query
being executed. See documentation for `mbql->native` and #2386.
being executed. See documentation for [[metabase.driver/mbql->native]] and #2386.
for more information.
So this turns your average 10, 20, 30 character query into a 110, 120, 130 etc character query.
......@@ -48,8 +48,7 @@
'Hey, this is a 20 character query! What's it talking about, error at position 120?'
This gets fixed, but in a spooky-action-at-a-distance way, in
`frontend/src/metabase/query_builder/components/VisualizationError.jsx`
"
`frontend/src/metabase/query_builder/components/VisualizationError.jsx`"
{:arglists '(^String [driver query])}
driver/dispatch-on-initialized-driver
:hierarchy #'driver/hierarchy)
......
......@@ -6,7 +6,7 @@
(defn- add-rows-truncated [query rows]
(:result
(mt/test-qp-middleware add-rows-truncated/add-rows-truncated query rows)))
(mt/test-qp-middleware add-rows-truncated/add-rows-truncated-middleware query rows)))
(deftest add-rows-truncated-test
(testing "the default behavior is to treat the query as no aggregation and use :max-results-bare-rows"
......
......@@ -38,6 +38,7 @@
":row_count should match the limit added by middleware")
(is (= {:constraints {:max-results 46}
:type :query
:query {:limit 46}}
:query {:limit 46
::limit/original-limit nil}}
(#'limit/add-default-limit query))
"Preprocessed query should have :limit added to it"))))
This diff is collapsed.
......@@ -7,6 +7,10 @@
[metabase.util.schema :as su]
[schema.core :as s]))
(use-fixtures :each (fn [thunk]
(mt/with-log-level :fatal
(thunk))))
(defn- bad-query []
{:database (mt/id)
:type :query
......@@ -24,7 +28,8 @@
:type (s/eq :query)
:query {:source-table (s/eq (mt/id :venues))
:fields (s/eq [[:field (mt/id :venues :id) {:temporal-unit :month}]])
:limit (s/eq qp.i/absolute-max-results)}
:limit (s/eq qp.i/absolute-max-results)
s/Keyword s/Any}
(s/optional-key :driver) (s/eq :h2)
s/Keyword s/Any})
......
......@@ -5,8 +5,7 @@
[metabase.models.permissions :as perms]
[metabase.query-processor :as qp]
[metabase.test :as mt]
[metabase.util :as u]
[schema.core :as s]))
[metabase.util :as u]))
(deftest query->native-test
(testing "Can we convert an MBQL query to a native query?"
......@@ -46,35 +45,28 @@
;; use `query->native`
(defn- query->native-with-user-perms
[{database-id :database, {source-table-id :source-table} :query, :as query} {:keys [object-perms? native-perms?]}]
(try
(binding [api/*current-user-id* Integer/MAX_VALUE
api/*current-user-permissions-set* (delay (cond-> #{}
object-perms? (conj (perms/data-perms-path database-id "PUBLIC" source-table-id))
native-perms? (conj (perms/adhoc-native-query-path database-id))))]
(qp/query->native query))
(catch clojure.lang.ExceptionInfo e
(merge {:error (.getMessage e)}
(ex-data e)))))
(binding [api/*current-user-id* Integer/MAX_VALUE
api/*current-user-permissions-set* (delay (cond-> #{}
object-perms? (conj (perms/data-perms-path database-id "PUBLIC" source-table-id))
native-perms? (conj (perms/adhoc-native-query-path database-id))))]
(qp/query->native query)))
(deftest permissions-test
(testing "If user permissions are bound, we should do permissions checking when you call `query->native`"
(testing "If user permissions are bound, we should still NOT do permissions checking when you call `query->native`"
(testing "Should work if you have the right perms"
(is (= true
(boolean
(query->native-with-user-perms
(mt/mbql-query venues)
{:object-perms? true, :native-perms? true})))))
(testing "If you don't have MBQL permissions for the original query it should throw an error"
(is (schema= {:error (s/eq "You do not have permissions to run this query.")
s/Any s/Any}
(query->native-with-user-perms
(mt/mbql-query venues)
{:object-perms? false, :native-perms? true}))))))
(is (query->native-with-user-perms
(mt/mbql-query venues)
{:object-perms? true, :native-perms? true})))
(testing "Should still work even WITHOUT the right perms"
(is (query->native-with-user-perms
(mt/mbql-query venues)
{:object-perms? false, :native-perms? true})))))
(deftest error-test
(testing "If the query is bad in some way it should return a relevant error (?)"
(is (schema= {:error (s/eq (format "Database %d does not exist." Integer/MAX_VALUE))
s/Any s/Any}
(query->native-with-user-perms
{:database Integer/MAX_VALUE, :type :query, :query {:source-table Integer/MAX_VALUE}}
{:object-perms? true, :native-perms? true})))))
(is (thrown-with-msg?
clojure.lang.ExceptionInfo
#"Database \d+ does not exist"
(query->native-with-user-perms
{:database Integer/MAX_VALUE, :type :query, :query {:source-table Integer/MAX_VALUE}}
{:object-perms? true, :native-perms? true})))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment