Skip to content
Snippets Groups Projects
Unverified Commit 35a17758 authored by Cam Saul's avatar Cam Saul Committed by GitHub
Browse files

Metabase 43 QP middleware overhaul part 3: convert all pre-processing...

Metabase 43 QP middleware overhaul part 3: convert all pre-processing middleware to the new pattern (#19937)

* Metabase 43 QP middleware overhaul (part 1)

* Make linter happy

* Test fix

* Fix merge

* Remove perms key after normalization

* Remove unused namespace

* Metabase 43 QP middleware overhaul part 2

* Sort namespaces

* Convert all pre-processing middleware to the new pattern

* Sort namespace

* Minor code cleanup

* Address PR feedback

* Sort namespaces

* Split QP middleware into different groups

* Sort namespaces

* Fix the helper `question` function

* Wait for the card query to load before typing

Co-authored-by: default avatarNemanja <31325167+nemanjaglumac@users.noreply.github.com>
parent 3527d527
No related branches found
No related tags found
No related merge requests found
Showing
with 130 additions and 191 deletions
......@@ -319,18 +319,15 @@
(assoc ::original-metadata (expected-cols original-query))
(update-in [::qp.perms/perms :gtaps] (fn [perms] (into (set perms) (gtaps->perms-set (vals table-id->gtap)))))))))
(defn- apply-sandboxing
(defn apply-sandboxing
"Pre-processing middleware. Replaces source tables a User was querying against with source queries that (presumably)
restrict the rows returned, based on presence of segmented permission GTAPs."
[query]
(or (when-let [table-id->gtap (when *current-user-id*
(query->table-id->gtap query))]
(gtapped-query query table-id->gtap))
query))
(def apply-sandboxing-middleware
"Pre-processing middleware. Replaces source tables a User was querying against with source queries that (presumably)
restrict the rows returned, based on presence of segmented permission GTAPs."
(m.43/wrap-43-pre-processing-middleware apply-sandboxing))
;;;; Post-processing
......
......@@ -34,6 +34,7 @@
[metabase.query-processor.middleware.fetch-source-query :as fetch-source-query]
[metabase.query-processor.middleware.fix-bad-references :as fix-bad-refs]
[metabase.query-processor.middleware.format-rows :as format-rows]
[metabase.query-processor.middleware.forty-three :as m.43]
[metabase.query-processor.middleware.large-int-id :as large-int-id]
[metabase.query-processor.middleware.limit :as limit]
[metabase.query-processor.middleware.mbql-to-native :as mbql-to-native]
......@@ -77,40 +78,40 @@
(def ^:private pre-processing-middleware
"Pre-processing middleware. Has the form
(f query) -> query
in 43+."
[#'check-features/check-features
#'limit/add-default-limit-middleware
#'optimize-temporal-filters/optimize-temporal-filters
#'validate-temporal-bucketing/validate-temporal-bucketing
#'auto-parse-filter-values/auto-parse-filter-values
#'wrap-value-literals/wrap-value-literals
#'pre-alias-ags/pre-alias-aggregations
#'cumulative-ags/rewrite-cumulative-aggregations-middleware
;; yes, this is called a second time, because we need to handle any joins that got added
(resolve 'ee.sandbox.rows/apply-sandboxing-middleware)
#'fix-bad-refs/fix-bad-references-middleware
#'resolve-joined-fields/resolve-joined-fields
#'resolve-joins/resolve-joins
#'add-implicit-joins/add-implicit-joins
#'add-default-temporal-unit/add-default-temporal-unit
#'desugar/desugar
#'binning/update-binning-strategy
#'resolve-fields/resolve-fields
#'add-dim/add-remapped-columns-middleware
#'implicit-clauses/add-implicit-clauses
(resolve 'ee.sandbox.rows/apply-sandboxing-middleware)
#'upgrade-field-literals/upgrade-field-literals
#'add-source-metadata/add-source-metadata-for-source-queries
#'reconcile-bucketing/reconcile-breakout-and-order-by-bucketing
#'bucket-datetime/auto-bucket-datetimes
#'resolve-source-table/resolve-source-tables
#'parameters/substitute-parameters
#'resolve-referenced/resolve-referenced-card-resources
#'expand-macros/expand-macros
#'validate/validate-query
#'perms/remove-permissions-key-middleware])
(f query) -> query"
(mapv
m.43/wrap-43-pre-processing-middleware
[#'check-features/check-features
#'limit/add-default-limit
#'optimize-temporal-filters/optimize-temporal-filters
#'validate-temporal-bucketing/validate-temporal-bucketing
#'auto-parse-filter-values/auto-parse-filter-values
#'wrap-value-literals/wrap-value-literals
#'pre-alias-ags/pre-alias-aggregations
#'cumulative-ags/rewrite-cumulative-aggregations
;; yes, this is called a second time, because we need to handle any joins that got added
(resolve 'ee.sandbox.rows/apply-sandboxing)
#'fix-bad-refs/fix-bad-references
#'resolve-joined-fields/resolve-joined-fields
#'resolve-joins/resolve-joins
#'add-implicit-joins/add-implicit-joins
#'add-default-temporal-unit/add-default-temporal-unit
#'desugar/desugar
#'binning/update-binning-strategy
#'resolve-fields/resolve-fields
#'add-dim/add-remapped-columns
#'implicit-clauses/add-implicit-clauses
(resolve 'ee.sandbox.rows/apply-sandboxing)
#'upgrade-field-literals/upgrade-field-literals
#'add-source-metadata/add-source-metadata-for-source-queries
#'reconcile-bucketing/reconcile-breakout-and-order-by-bucketing
#'bucket-datetime/auto-bucket-datetimes
#'resolve-source-table/resolve-source-tables
#'parameters/substitute-parameters
#'resolve-referenced/resolve-referenced-card-resources
#'expand-macros/expand-macros
#'validate/validate-query
#'perms/remove-permissions-key]))
;; ▲▲▲ PRE-PROCESSING ▲▲▲ happens from BOTTOM-TO-TOP
(def ^:private compile-middleware
......
......@@ -2,7 +2,10 @@
(:require [metabase.mbql.util :as mbql.u]
[metabase.query-processor.store :as qp.store]))
(defn- add-default-temporal-unit* [query]
(defn add-default-temporal-unit
"Add `:temporal-unit` `:default` to any temporal `:field` clauses that don't already have a `:temporal-unit`. This
makes things more consistent because code downstream can rely on the key being present."
[query]
(mbql.u/replace-in query [:query]
[:field (_ :guard string?) (_ :guard (every-pred
:base-type
......@@ -14,10 +17,3 @@
(let [{base-type :base_type, effective-type :effective_type} (qp.store/field id)]
(cond-> &match
(isa? (or effective-type base-type) :type/Temporal) (mbql.u/with-temporal-unit :default)))))
(defn add-default-temporal-unit
"Add `:temporal-unit` `:default` to any temporal `:field` clauses that don't already have a `:temporal-unit`. This
makes things more consistent because code downstream can rely on the key being present."
[qp]
(fn [query rff context]
(qp (add-default-temporal-unit* query) rff context)))
......@@ -131,7 +131,11 @@
;; otherwise return query as-is
[source-query-remappings query])))
(defn- add-remapped-columns [{{:keys [disable-remaps?]} :middleware, query-type :type, :as query}]
(defn add-remapped-columns
"Pre-processing middleware. For columns that have remappings to other columns (FK remaps), rewrite the query to
include the extra column. Add `::external-remaps` information about which columns were remapped so [[remap-results]]
can do appropriate results transformations in post-processing."
[{{:keys [disable-remaps?]} :middleware, query-type :type, :as query}]
(if (or disable-remaps?
(= query-type :native))
query
......@@ -140,12 +144,6 @@
;; convert the remappings to plain maps so we don't have to look at record type nonsense everywhere
(seq remappings) (assoc ::external-remaps (mapv (partial into {}) remappings))))))
(def add-remapped-columns-middleware
"Pre-processing middleware. For columns that have remappings to other columns (FK remaps), rewrite the query to
include the extra column. Add `::external-remaps` information about which columns were remapped so [[remap-results]]
can do appropriate results transformations in post-processing."
(m.43/wrap-43-pre-processing-middleware add-remapped-columns))
;;;; Post-processing
......
......@@ -151,14 +151,10 @@
form))
form))
(defn- maybe-add-implicit-clauses [{query-type :type, :as query}]
(if (= query-type :native)
query
(update query :query add-implicit-mbql-clauses)))
(defn add-implicit-clauses
"Add an implicit `fields` clause to queries with no `:aggregation`, `breakout`, or explicit `:fields` clauses.
Add implicit `:order-by` clauses for fields specified in a `:breakout`."
[qp]
(fn [query rff context]
(qp (maybe-add-implicit-clauses query) rff context)))
[{query-type :type, :as query}]
(if (= query-type :native)
query
(update query :query add-implicit-mbql-clauses)))
......@@ -178,22 +178,18 @@
;;; | Middleware |
;;; +----------------------------------------------------------------------------------------------------------------+
(defn- add-implicit-joins* [query]
(if (mbql.u/match-one (:query query) [:field _ (_ :guard (every-pred :source-field (complement :join-alias)))])
(do
(when-not (driver/supports? driver/*driver* :foreign-keys)
(throw (ex-info (tru "{0} driver does not support foreign keys." driver/*driver*)
{:driver driver/*driver*
:type error-type/unsupported-feature})))
(update query :query resolve-implicit-joins))
query))
(defn add-implicit-joins
"Fetch and store any Tables other than the source Table referred to by `:field` clauses with `:source-field` in an
MBQL query, and add a `:join-tables` key inside the MBQL inner query containing information about the `JOIN`s (or
equivalent) that need to be performed for these tables.
This middleware also adds `:join-alias` info to all `:field` forms with `:source-field`s."
[qp]
(fn [query rff context]
(qp (add-implicit-joins* query) rff context)))
[query]
(if (mbql.u/match-one (:query query) [:field _ (_ :guard (every-pred :source-field (complement :join-alias)))])
(do
(when-not (driver/supports? driver/*driver* :foreign-keys)
(throw (ex-info (tru "{0} driver does not support foreign keys." driver/*driver*)
{:driver driver/*driver*
:type error-type/unsupported-feature})))
(update query :query resolve-implicit-joins))
query))
......@@ -105,13 +105,6 @@
(defn- add-source-metadata-at-all-levels [inner-query]
(walk/postwalk maybe-add-source-metadata inner-query))
(defn add-source-metadata-for-source-queries*
"Add `:source-metadata` for source queries inside `query`."
[{query-type :type, :as query}]
(if-not (= query-type :query)
query
(update query :query add-source-metadata-at-all-levels)))
(defn add-source-metadata-for-source-queries
"Middleware that attempts to recursively add `:source-metadata`, if not already present, to any maps with a
`:source-query`.
......@@ -120,6 +113,7 @@
query; this is added automatically for source queries added via the `card__id` source table form, but for *explicit*
source queries that do not specify this information, we can often infer it by looking at the shape of the source
query."
[qp]
(fn [query rff context]
(qp (add-source-metadata-for-source-queries* query) rff context)))
[{query-type :type, :as query}]
(if-not (= query-type :query)
query
(update query :query add-source-metadata-at-all-levels)))
......@@ -121,7 +121,15 @@
;; otherwise if there are no unbucketed breakouts/filters return the query as-is
inner-query))
(defn- auto-bucket-datetimes-all-levels [{query-type :type, :as query}]
(defn auto-bucket-datetimes
"Middleware that automatically adds `:temporal-unit` `:day` to breakout and filter `:field` clauses if the Field they
refer to has a type that derives from `:type/Temporal` (but not `:type/Time`). (This is done for historic reasons,
before datetime bucketing was added to MBQL; datetime Fields defaulted to breaking out by day. We might want to
revisit this behavior in the future.)
Applies to any unbucketed Field in a breakout, or fields in a filter clause being compared against `yyyy-MM-dd`
format datetime strings."
[{query-type :type, :as query}]
(if (not= query-type :query)
query
;; walk query, looking for inner-query forms that have a `:filter` key
......@@ -133,15 +141,3 @@
(auto-bucket-datetimes-this-level form)
form))
query)))
(defn auto-bucket-datetimes
"Middleware that automatically adds `:temporal-unit` `:day` to breakout and filter `:field` clauses if the Field they
refer to has a type that derives from `:type/Temporal` (but not `:type/Time`). (This is done for historic reasons,
before datetime bucketing was added to MBQL; datetime Fields defaulted to breaking out by day. We might want to
revisit this behavior in the future.)
Applies to any unbucketed Field in a breakout, or fields in a filter clause being compared against `yyyy-MM-dd`
format datetime strings."
[qp]
(fn [query rff context]
(qp (auto-bucket-datetimes-all-levels query) rff context)))
......@@ -29,15 +29,11 @@
{:type error-type/invalid-query}
e)))))
(defn- auto-parse-filter-values* [query]
(defn auto-parse-filter-values
"Automatically parse String filter clause values to the appropriate type."
[query]
(mbql.u/replace-in query [:query]
[:value (v :guard string?) (info :guard (fn [{base-type :base_type}]
(and base-type
(not (isa? base-type :type/Text)))))]
[:value (parse-value-for-base-type v (:base_type info)) info]))
(defn auto-parse-filter-values
"Automatically parse String filter clause values to the appropriate type."
[qp]
(fn [query rff context]
(qp (auto-parse-filter-values* query) rff context)))
......@@ -215,15 +215,11 @@
(catch Throwable e
(throw (ex-info (.getMessage e) {:clause &match} e)))))))
(defn- update-binning-strategy* [{query-type :type, :as query}]
(if (= query-type :native)
query
(update query :query update-binning-strategy-in-inner-query)))
(defn update-binning-strategy
"When a binned field is found, it might need to be updated if a relevant query criteria affects the min/max value of
the binned field. This middleware looks for that criteria, then updates the related min/max values and calculates
the bin-width based on the criteria values (or global min/max information)."
[qp]
(fn [query rff context]
(qp (update-binning-strategy* query) rff context)))
[{query-type :type, :as query}]
(if (= query-type :native)
query
(update query :query update-binning-strategy-in-inner-query)))
......@@ -28,15 +28,11 @@
(let [{:keys [strategy]} join]
(assert-driver-supports strategy)))))
(defn- check-features* [{query-type :type, :as query}]
(defn check-features
"Middleware that checks that drivers support the `:features` required to use certain clauses, like `:stddev`."
[{query-type :type, :as query}]
(if-not (= query-type :query)
query
(u/prog1 query
(doseq [required-feature (query->required-features query)]
(assert-driver-supports required-feature)))))
(defn check-features
"Middleware that checks that drivers support the `:features` required to use certain clauses, like `:stddev`."
[qp]
(fn [query rff context]
(qp (check-features* query) rff context)))
......@@ -26,7 +26,10 @@
[:cum-count field] [:count field]
[:cum-sum field] [:sum field]))
(defn- rewrite-cumulative-aggregations [{{breakouts :breakout, aggregations :aggregation} :query, :as query}]
(defn rewrite-cumulative-aggregations
"Pre-processing middleware. Rewrite `:cum-count` and `:cum-sum` aggregations as `:count` and `:sum` respectively. Add
information about the indecies of the replaced aggregations under the `::replaced-indecies` key."
[{{breakouts :breakout, aggregations :aggregation} :query, :as query}]
(if-not (mbql.u/match aggregations #{:cum-count :cum-sum})
query
(let [query' (replace-cumulative-ags query)
......@@ -38,11 +41,6 @@
(cond-> query'
(seq replaced-indecies) (assoc ::replaced-indecies replaced-indecies)))))
(def rewrite-cumulative-aggregations-middleware
"Pre-processing middleware. Rewrite `:cum-count` and `:cum-sum` aggregations as `:count` and `:sum` respectively. Add
information about the indecies of the replaced aggregations under the `::replaced-indecies` key."
(m.43/wrap-43-pre-processing-middleware rewrite-cumulative-aggregations))
;;;; Post-processing
......
......@@ -5,17 +5,12 @@
[metabase.mbql.util :as mbql.u]
[schema.core :as s]))
(s/defn ^:private desugar* :- mbql.s/Query
[query]
(m/update-existing query :query (fn [query]
(mbql.u/replace query
(filter-clause :guard mbql.preds/Filter?)
(mbql.u/desugar-filter-clause filter-clause)))))
(defn desugar
(s/defn desugar :- mbql.s/Query
"Middleware that uses MBQL lib functions to replace high-level 'syntactic sugar' clauses like `time-interval` and
`inside` with lower-level clauses like `between`. This is done to minimize the number of MBQL clauses individual
drivers need to support. Clauses replaced by this middleware are marked `^:sugar` in the MBQL schema."
[qp]
(fn [query rff context]
(qp (desugar* query) rff context)))
[query]
(m/update-existing query :query (fn [query]
(mbql.u/replace query
(filter-clause :guard mbql.preds/Filter?)
(mbql.u/desugar-filter-clause filter-clause)))))
......@@ -170,15 +170,10 @@
expand-metrics
expand-segments))
(defn- expand-macros*
(defn expand-macros
"Middleware that looks for `:metric` and `:segment` macros in an unexpanded MBQL query and substitute the macros for
their contents."
[{query-type :type, :as query}]
(if-not (= query-type :query)
query
(expand-metrics-and-segments query)))
(defn expand-macros
"Middleware that looks for `:metric` and `:segment` macros in an unexpanded MBQL query and substitute the macros for
their contents."
[qp]
(fn [query rff context]
(qp (expand-macros* query) rff context)))
......@@ -83,9 +83,3 @@
(fix-bad-references* form)
form))
query))
(defn fix-bad-references-middleware
"Middleware version of [[fix-bad-references]]."
[qp]
(fn [query rff context]
(qp (fix-bad-references query) rff context)))
......@@ -14,9 +14,12 @@
(f qp) -> (f query rff context)"
[f]
(fn wrap-43-pre-processing-middleware-qp* [qp]
(fn wrap-43-pre-processing-middleware-fn* [query rff context]
(qp (f query) rff context))))
(when f
(fn wrap-43-pre-processing-middleware-qp* [qp]
(fn wrap-43-pre-processing-middleware-fn* [query rff context]
(let [query' (f query)]
(assert (map? query') (format "%s did not return a valid query" (pr-str f)))
(qp query' rff context))))))
(defn wrap-43-post-processing-middleware
"Temporary helper to wrap a 43+ style post-processing middleware function so it can be used as a <=42 style around
......@@ -31,6 +34,7 @@
(f qp) -> (f query rff context)"
[f]
(fn wrap-43-post-processing-middleware-qp* [qp]
(fn wrap-43-post-processing-middleware-fn* [query rff context]
(qp query (f query rff) context))))
(when f
(fn wrap-43-post-processing-middleware-qp* [qp]
(fn wrap-43-post-processing-middleware-fn* [query rff context]
(qp query (f query rff) context)))))
......@@ -27,12 +27,10 @@
(mbql.u/query->max-rows-limit query)
i/absolute-max-results))
(defn- add-default-limit [query]
(add-limit (determine-query-max-rows query) query))
(def add-default-limit-middleware
(defn add-default-limit
"Pre-processing middleware. Add default `:limit` to MBQL queries without any aggregations."
(m.43/wrap-43-pre-processing-middleware add-default-limit))
[query]
(add-limit (determine-query-max-rows query) query))
;;;; Post-processing
......
......@@ -160,21 +160,6 @@
(log/error (trs "Error optimizing temporal filter clause") (pr-str &match)))))
&match)))
(defn- optimize-temporal-filters-all-levels [{query-type :type, :as query}]
(if (not= query-type :query)
query
;; walk query, looking for inner-query forms that have a `:filter` key
(walk/postwalk
(fn [form]
(if-not (and (map? form) (seq (:filter form)))
form
;; optimize the filters in this inner-query form.
(let [optimized (optimize-temporal-filters* form)]
;; if we did some optimizations, we should flatten/deduplicate the filter clauses afterwards.
(cond-> optimized
(not= optimized form) (update :filter mbql.u/combine-filter-clauses)))))
query)))
(defn optimize-temporal-filters
"Middlware that optimizes equality (`=` and `!=`) and comparison (`<`, `between`, etc.) filter clauses against
bucketed datetime fields. Rewrites those filter clauses as logically equivalent filter clauses that do not use
......@@ -199,6 +184,17 @@
This namespace expects to run *after* the `wrap-value-literals` middleware, meaning datetime literal strings like
`\"2019-09-24\"` should already have been converted to `:absolute-datetime` clauses."
[qp]
(fn [query rff context]
(qp (optimize-temporal-filters-all-levels query) rff context)))
[{query-type :type, :as query}]
(if (not= query-type :query)
query
;; walk query, looking for inner-query forms that have a `:filter` key
(walk/postwalk
(fn [form]
(if-not (and (map? form) (seq (:filter form)))
form
;; optimize the filters in this inner-query form.
(let [optimized (optimize-temporal-filters* form)]
;; if we did some optimizations, we should flatten/deduplicate the filter clauses afterwards.
(cond-> optimized
(not= optimized form) (update :filter mbql.u/combine-filter-clauses)))))
query)))
......@@ -106,7 +106,7 @@
A SQL query with a param like `{{param}}` will have that part of the query replaced with an appropriate snippet as
well as any prepared statement args needed. MBQL queries will have additional filter clauses added."
[qp]
(fn [query rff context]
(qp ((comp substitute-parameters* hoist-database-for-snippet-tags) query)
rff context)))
[query]
(-> query
hoist-database-for-snippet-tags
substitute-parameters*))
......@@ -9,7 +9,6 @@
[metabase.models.query.permissions :as query-perms]
[metabase.plugins.classloader :as classloader]
[metabase.query-processor.error-type :as error-type]
[metabase.query-processor.middleware.forty-three :as m.43]
[metabase.query-processor.middleware.resolve-referenced :as qp.resolve-referenced]
[metabase.util :as u]
[metabase.util.i18n :refer [tru]]
......@@ -108,14 +107,12 @@
(check-query-permissions* query)
(qp query rff context)))
(defn- remove-permissions-key [query]
(dissoc query ::perms))
(def remove-permissions-key-middleware
(defn remove-permissions-key
"Pre-processing middleware. Removes the `::perms` key from the query. This is where we store important permissions
information like perms coming from sandboxing (GTAPs). This is programatically added by middleware when appropriate,
but we definitely don't want users passing it in themselves. So remove it if it's present."
(m.43/wrap-43-pre-processing-middleware #'remove-permissions-key))
[query]
(dissoc query ::perms))
;;; +----------------------------------------------------------------------------------------------------------------+
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment