Skip to content
Snippets Groups Projects
Commit 5de17c88 authored by Cam Saul's avatar Cam Saul
Browse files

Remove duplicate Fields specified in both Fields and Breakouts

parent 3aa1cf35
No related branches found
No related tags found
No related merge requests found
(ns metabase.mbql.normalize
"Logic for taking any sort of weird MBQL query and normalizing it into a standardized, canonical form. You can think
of this like taking any 'valid' MBQL query and rewriting it as-if it was written in perfect up-to-date MBQL in the
latest version. There are two main things done here, done as three separate steps:
latest version. There are four main things done here, done as four separate steps:
#### NORMALIZING TOKENS
......@@ -13,6 +13,16 @@
Rewriting deprecated MBQL 95/98 syntax and other things that are still supported for backwards-compatibility in
canonical MBQL 2000 syntax. For example `{:breakout [:count 10]}` becomes `{:breakout [[:count [:field-id 10]]]}`.
#### WHOLE-QUERY TRANSFORMATIONS
Transformations and cleanup of the query structure as a whole to fix inconsistencies. Whereas the canonicalization
phase operates on a lower-level, transforming invidual clauses, this phase focuses on transformations that affect
multiple clauses, such as removing duplicate references to Fields if they are specified in both the `:breakout` and
`:fields` clauses.
This is not the only place that does such transformations; several pieces of QP middleware perform similar
individual transformations, such as `reconcile-breakout-and-order-by-bucketing`.
#### REMOVING EMPTY CLAUSES
Removing empty clauses like `{:aggregation nil}` or `{:breakout []}`.
......@@ -489,6 +499,26 @@
(:parameters outer-query) (update :parameters (partial mapv canonicalize-mbql-clauses))))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | WHOLE-QUERY TRANSFORMATIONS |
;;; +----------------------------------------------------------------------------------------------------------------+
(defn- remove-breakout-fields-from-fields
"Remove any Fields specified in both `:breakout` and `:fields` from `:fields`; it is implied that any breakout Field
will be returned, specifying it in both would imply it is to be returned twice, which tends to cause confusion for
the QP and drivers."
[{{:keys [breakout fields]} :query, :as query}]
(if-not (and (seq breakout) (seq fields))
query
(update-in query [:query :fields] (comp vec (partial remove (set breakout))))))
(defn- perform-whole-query-transformations
"Perform transformations that operate on the query as a whole, making sure the structure as a whole is logical and
consistent."
[query]
(-> query
remove-breakout-fields-from-fields))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | REMOVING EMPTY CLAUSES |
;;; +----------------------------------------------------------------------------------------------------------------+
......@@ -543,6 +573,7 @@
"Normalize the tokens in a Metabase query (i.e., make them all `lisp-case` keywords), rewrite deprecated clauses as
up-to-date MBQL 2000, and remove empty clauses."
(comp remove-empty-clauses
perform-whole-query-transformations
canonicalize
normalize-tokens))
......
......@@ -543,13 +543,15 @@
(def MBQLQuery
"Schema for a valid, normalized MBQL [inner] query."
(s/constrained
(->
{(s/optional-key :source-query) SourceQuery
(s/optional-key :source-table) SourceTable
(s/optional-key :aggregation) (su/non-empty [Aggregation])
(s/optional-key :breakout) (su/non-empty [Field])
(s/optional-key :expressions) {s/Keyword ExpressionDef} ; TODO - I think expressions keys should be strings
(s/optional-key :fields) (su/non-empty [Field]) ; TODO - should this be `distinct-non-empty`?
; TODO - expressions keys should be strings; fix this when we get a chance
(s/optional-key :expressions) {s/Keyword ExpressionDef}
;; TODO - should this be `distinct-non-empty`?
(s/optional-key :fields) (su/non-empty [Field])
(s/optional-key :filter) Filter
(s/optional-key :limit) su/IntGreaterThanZero
(s/optional-key :order-by) (distinct-non-empty [OrderBy])
......@@ -559,9 +561,16 @@
;; info to other pieces of middleware. Everyone else can ignore them.
(s/optional-key :join-tables) (s/constrained [JoinInfo] (partial apply distinct?) "distinct JoinInfo")
s/Keyword s/Any}
(fn [query]
(core/= 1 (core/count (select-keys query [:source-query :source-table]))))
"Query must specify either `:source-table` or `:source-query`, but not both."))
(s/constrained
(fn [query]
(core/= 1 (core/count (select-keys query [:source-query :source-table]))))
"Query must specify either `:source-table` or `:source-query`, but not both.")
(s/constrained
(fn [{:keys [breakout fields]}]
(empty? (set/intersection (set breakout) (set fields))))
"Fields specified in `:breakout` should not be specified in `:fields`; this is implied.")))
;;; ----------------------------------------------------- Params -----------------------------------------------------
......
......@@ -231,21 +231,26 @@
(defn- save-and-return-failed-query!
"Save QueryExecution state and construct a failed query response"
[query-execution error-message]
[query-execution, ^Throwable e]
;; record our query execution and format response
(-> query-execution
(dissoc :start_time_millis)
(merge {:error error-message
(merge {:error (.getMessage e)
:running_time (- (System/currentTimeMillis) (:start_time_millis query-execution))})
save-query-execution!
(dissoc :result_rows :hash :executor_id :native :card_id :dashboard_id :pulse_id)
;; this is just for the response for client
(assoc :status :failed
:error error-message
:error (.getMessage e)
:row_count 0
:data {:rows []
:cols []
:columns []})))
:columns []})
;; include stacktrace and preprocessed/native stages of the query if available in the response which should make
;; debugging queries a bit easier
(merge (some-> (ex-data e)
(select-keys [:stacktrace :preprocessed :native])
(m/dissoc-in [:preprocessed :info])))))
(defn- save-and-return-successful-query!
"Save QueryExecution state and construct a completed (successful) query response"
......@@ -271,10 +276,12 @@
"Make sure QUERY-RESULT `:status` is something other than `nil`or `:failed`, or throw an Exception."
[query-result]
(when-not (contains? query-result :status)
(throw (Exception. "invalid response from database driver. no :status provided")))
(throw (ex-info (str (tru "Invalid response from database driver. No :status provided."))
query-result)))
(when (= :failed (:status query-result))
(log/warn (u/pprint-to-str 'red query-result))
(throw (Exception. (str (get query-result :error "general error"))))))
(throw (ex-info (str (get query-result :error (tru "General error")))
query-result))))
(def ^:dynamic ^Boolean *allow-queries-with-no-executor-id*
"Should we allow running queries (via `dataset-query`) without specifying the `executed-by` User ID? By default
......@@ -316,7 +323,7 @@
(log/warn (u/format-color 'red "Query failure: %s\n%s"
(.getMessage e)
(u/pprint-to-str (u/filtered-stacktrace e))))
(save-and-return-failed-query! query-execution (.getMessage e))))))))
(save-and-return-failed-query! query-execution e)))))))
;; TODO - couldn't saving the query execution be done by MIDDLEWARE?
(s/defn process-query-and-save-execution!
......
......@@ -12,6 +12,8 @@
:class (class e)
:error (or (.getMessage e) (str e))
:stacktrace (u/filtered-stacktrace e)
;; TODO - removing this stuff is not really needed anymore since `:database` is just the ID and not the
;; entire map including `:details`
:query (dissoc query :database :driver)}
;; add the fully-preprocessed and native forms to the error message for MBQL queries, since they're extremely
;; useful for debugging purposes. Since generating them requires us to recursively run the query processor,
......
......@@ -131,7 +131,7 @@
((user->client :rasta) :post 200 "dataset" {:database (id)
:type "native"
:native {:query "foobar"}}))]
[(check-error-message (format-response result))
[(check-error-message (dissoc (format-response result) :stacktrace))
(check-error-message (format-response (most-recent-query-execution)))]))
......
......@@ -709,6 +709,22 @@
:query {:source-query {:source-table 1, :aggregation :rows}}}))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | WHOLE-QUERY TRANSFORMATIONS |
;;; +----------------------------------------------------------------------------------------------------------------+
;; If you specify a field in a breakout and in the Fields clause, we should go ahead and remove it from the Fields
;; clause, because it is (obviously) implied that you should get that Field back.
(expect
{:type :query
:query {:breakout [[:field-id 1] [:field-id 2]]
:fields [[:field-id 3]]}}
(#'normalize/perform-whole-query-transformations
{:type :query
:query {:breakout [[:field-id 1] [:field-id 2]]
:fields [[:field-id 2] [:field-id 3]]}}))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | REMOVE EMPTY CLAUSES |
;;; +----------------------------------------------------------------------------------------------------------------+
......
......@@ -276,3 +276,15 @@
(-> (nested-venues-query card)
qp/process-query
rows))))
;; if we include a Field in both breakout and fields, does the query still work? (Normalization should be taking care
;; of this) (#8760)
(expect-with-non-timeseries-dbs
:completed
(-> (qp/process-query
{:database (data/id)
:type :query
:query {:source-table (data/id :venues)
:breakout [[:field-id (data/id :venues :price)]]
:fields [["field_id" (data/id :venues :price)]]}})
:status))
(ns metabase.query-processor-test.failure-test
"Tests for how the query processor as a whole handles failures."
(:require [expectations :refer [expect]]
[metabase.query-processor :as qp]
[metabase.test.data :as data]
[metabase.query-processor.interface :as qp.i])
(:import metabase.driver.h2.H2Driver))
(defn- bad-query []
{:database (data/id)
:type :query
:query {:source-table (data/id :venues)
:fields [["datetime_field" (data/id :venues :id) "MONTH"]]}})
(defn- bad-query:preprocessed []
{:database (data/id)
:type :query
:query {:source-table (data/id :venues)
:fields [[:datetime-field [:field-id (data/id :venues :id)] :month]]
:limit qp.i/absolute-max-results}
:driver (H2Driver.)
:settings {}})
(def ^:private bad-query:native
{:query (str "SELECT parsedatetime(formatdatetime(\"PUBLIC\".\"VENUES\".\"ID\", 'yyyyMM'), 'yyyyMM') AS \"ID\" "
"FROM \"PUBLIC\".\"VENUES\" "
"LIMIT 1048576")
:params nil})
(def ^:private ^{:arglists '([stacktrace])} valid-stacktrace? (every-pred seq (partial every? (every-pred string? seq))))
;; running a bad query via `process-query` should return stacktrace, query, preprocessed query, and native query
(expect
{:status :failed
:class java.util.concurrent.ExecutionException
:error true
:stacktrace true
;; `:database` is removed by the catch-exceptions middleware for historical reasons
:query (dissoc (bad-query) :database)
:preprocessed (bad-query:preprocessed)
:native bad-query:native}
(-> (qp/process-query (bad-query))
(update :error (every-pred string? seq))
(update :stacktrace valid-stacktrace?)))
;; running via `process-query-and-save-execution!` should return similar info and a bunch of other nonsense too
(expect
{:started_at true
:json_query (bad-query)
:native bad-query:native
:status :failed
:stacktrace true
:context :question
:error true
:row_count 0
:running_time true
:preprocessed (bad-query:preprocessed)
:data {:rows [], :cols [], :columns []}}
(-> (qp/process-query-and-save-execution! (bad-query) {:context :question})
(update :error (every-pred string? seq))
(update :started_at (partial instance? java.util.Date))
(update :stacktrace valid-stacktrace?)
(update :running_time (complement neg?))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment