From 8977de87ee399236d056564033b2094cc64cd2f0 Mon Sep 17 00:00:00 2001 From: Cam Saul <cammsaul@gmail.com> Date: Tue, 20 Nov 2018 14:16:19 -0800 Subject: [PATCH] Fix Mongo & Druid multiple ags of same type :v: [ci drivers] --- src/metabase/driver/druid/query_processor.clj | 23 ++++---- src/metabase/driver/mongo/query_processor.clj | 52 ++++++++++++------- .../query_processor_test/aggregation_test.clj | 10 +++- test/metabase/test/data.clj | 8 +-- .../timeseries_query_processor_test.clj | 10 +++- 5 files changed, 67 insertions(+), 36 deletions(-) diff --git a/src/metabase/driver/druid/query_processor.clj b/src/metabase/driver/druid/query_processor.clj index c045fc2e8be..81e2c5aac53 100644 --- a/src/metabase/driver/druid/query_processor.clj +++ b/src/metabase/driver/druid/query_processor.clj @@ -370,20 +370,21 @@ (update :query #(merge-with concat % {:postAggregations [post-agg]}))))) (defn- handle-aggregations [query-type {aggregations :aggregation} query-context] - (loop [[ag & more] aggregations, query query-context] - (cond - (and (mbql.u/is-clause? :named ag) - (mbql.u/is-clause? #{:+ :- :/ :*} (second ag))) - (handle-expression-aggregation query-type ag query) + (let [aggregations (mbql.u/pre-alias-and-uniquify-aggregations annotate/aggregation-name aggregations)] + (loop [[ag & more] aggregations, query query-context] + (cond + (and (mbql.u/is-clause? :named ag) + (mbql.u/is-clause? #{:+ :- :/ :*} (second ag))) + (handle-expression-aggregation query-type ag query) - (mbql.u/is-clause? #{:+ :- :/ :*} ag) - (handle-expression-aggregation query-type ag query) + (mbql.u/is-clause? #{:+ :- :/ :*} ag) + (handle-expression-aggregation query-type ag query) - (not ag) - query + (not ag) + query - :else - (recur more (handle-aggregation query-type ag query))))) + :else + (recur more (handle-aggregation query-type ag query)))))) ;;; ------------------------------------------------ handle-breakout ------------------------------------------------- diff --git a/src/metabase/driver/mongo/query_processor.clj b/src/metabase/driver/mongo/query_processor.clj index 4563725ae37..e4b40ef5f64 100644 --- a/src/metabase/driver/mongo/query_processor.clj +++ b/src/metabase/driver/mongo/query_processor.clj @@ -107,17 +107,20 @@ ;; Escaped: ;; {"$group" {"source___username" {"$first" {"$source.username"}, "_id" "$source.username"}}, ...} -(defmulti ^:private ^{:doc (str "Format this `Field` or `Value` for use as the right hand value of an expression, e.g. " - "by adding `$` to a `Field`'s name")} - ->rvalue +(defmulti ^:private ->rvalue + "Format this `Field` or value for use as the right hand value of an expression, e.g. by adding `$` to a `Field`'s + name" + {:arglists '([x])} mbql.u/dispatch-by-clause-name-or-class) -(defmulti ^:private ^{:doc "Return an escaped name that can be used as the name of a given Field."} - ^String ->lvalue +(defmulti ^:private ->lvalue + "Return an escaped name that can be used as the name of a given Field." + {:arglists '([field])} mbql.u/dispatch-by-clause-name-or-class) -(defmulti ^:private ^{:doc "Return the rvalue that should be used in the *initial* projection for this `Field`."} - ->initial-rvalue +(defmulti ^:private ->initial-rvalue + "Return the rvalue that should be used in the *initial* projection for this `Field`." + {:arglists '([field])} mbql.u/dispatch-by-clause-name-or-class) @@ -351,6 +354,7 @@ (case aggregation-type :count {$sum 1}) (case aggregation-type + :named (recur field) :avg {$avg (->rvalue field)} :count {$sum {$cond {:if (->rvalue field) :then 1 @@ -360,6 +364,11 @@ :min {$min (->rvalue field)} :max {$max (->rvalue field)}))) +(defn- unwrap-named-ag [[ag-type arg :as ag]] + (if (= ag-type :named) + arg + ag)) + (s/defn ^:private breakouts-and-ags->projected-fields :- [(s/pair su/NonBlankString "projected-field-name" s/Any "source")] "Determine field projections for MBQL breakouts and aggregations. Returns a sequence of pairs like @@ -369,7 +378,7 @@ (for [field breakout-fields] [(->lvalue field) (format "$_id.%s" (->lvalue field))]) (for [ag aggregations] - [(annotate/aggregation-name ag) (if (mbql.u/is-clause? :distinct ag) + [(annotate/aggregation-name ag) (if (mbql.u/is-clause? :distinct (unwrap-named-ag ag)) {$size "$count"} ; HACK true)]))) @@ -384,8 +393,9 @@ {$project (merge {"_id" "$_id" "___group" (into {} (for [field breakout-fields] {(->lvalue field) (->rvalue field)}))} - (into {} (for [[_ ag-field] aggregations - :when ag-field] + (into {} (for [ag aggregations + :let [[_ ag-field] (unwrap-named-ag ag)] + :when ag-field] {(->lvalue ag-field) (->rvalue ag-field)})))}) ;; Now project onto the __group and the aggregation rvalue {$group (merge @@ -467,16 +477,18 @@ (s/defn ^:private generate-aggregation-pipeline :- {:projections Projections, :query Pipeline} "Generate the aggregation pipeline. Returns a sequence of maps representing each stage." [inner-query :- mbql.s/MBQLQuery] - (reduce (fn [pipeline-ctx f] - (f inner-query pipeline-ctx)) - {:projections [], :query []} - [add-initial-projection - handle-filter - handle-breakout+aggregation - handle-order-by - handle-fields - handle-limit - handle-page])) + (let [inner-query (update inner-query :aggregation (partial mbql.u/pre-alias-and-uniquify-aggregations + annotate/aggregation-name))] + (reduce (fn [pipeline-ctx f] + (f inner-query pipeline-ctx)) + {:projections [], :query []} + [add-initial-projection + handle-filter + handle-breakout+aggregation + handle-order-by + handle-fields + handle-limit + handle-page]))) (s/defn ^:private create-unescaping-rename-map :- {s/Keyword s/Keyword} [original-keys :- Projections] diff --git a/test/metabase/query_processor_test/aggregation_test.clj b/test/metabase/query_processor_test/aggregation_test.clj index 258ca94ff0c..bb130cf90dd 100644 --- a/test/metabase/query_processor_test/aggregation_test.clj +++ b/test/metabase/query_processor_test/aggregation_test.clj @@ -2,7 +2,7 @@ "Tests for MBQL aggregations." (:require [expectations :refer [expect]] [metabase - [query-processor-test :refer :all] + [query-processor-test :as qp.test :refer :all] [util :as u]] [metabase.models.field :refer [Field]] [metabase.test @@ -339,3 +339,11 @@ {:aggregation [[:sum [:field-id (u/get-id copy-of-venues-price)]]]})] (or (-> results :data :cols first) results)))) + +;; Do we properly handle queries that have more than one of the same aggregation? (#5393) +(expect + [[5050 203]] + (qp.test/format-rows-by [int int] + (qp.test/rows + (data/run-mbql-query venues + {:aggregation [[:sum $id] [:sum $price]]})))) diff --git a/test/metabase/test/data.clj b/test/metabase/test/data.clj index b0aa3f4d49e..1afeb188759 100644 --- a/test/metabase/test/data.clj +++ b/test/metabase/test/data.clj @@ -94,7 +94,9 @@ (defn- $->id "Internal impl fn of `$ids` and `mbql-query` macros. Walk `body` and replace `$field` (and related) tokens with calls - to `id`, optionally wrapping them in `:field-id` or `:fk->` clauses." + to `id`. + + Optionally wraps IDs in `:field-id` or `:fk->` clauses as appropriate; this defaults to true." [table-name body & {:keys [wrap-field-ids?], :or {wrap-field-ids? true}}] (walk/postwalk (fn [form] @@ -144,8 +146,8 @@ :query query}) (defmacro mbql-query - "Build a query, expands symbols like `$field` into calls to `id`. See the dox for `$->id` for more information on how - `$`-prefixed expansion behaves. + "Build a query, expands symbols like `$field` into calls to `id` and wraps them in `:field-id`. See the dox for + `$->id` for more information on how `$`-prefixed expansion behaves. (mbql-query venues {:filter [:= $id 1]}) diff --git a/test/metabase/timeseries_query_processor_test.clj b/test/metabase/timeseries_query_processor_test.clj index bca7ee71d65..99fe9ed0690 100644 --- a/test/metabase/timeseries_query_processor_test.clj +++ b/test/metabase/timeseries_query_processor_test.clj @@ -1,6 +1,6 @@ (ns metabase.timeseries-query-processor-test "Query processor tests for DBs that are event-based, like Druid. - There architecture is different enough that we can't test them along with our 'normal' DBs in `query-procesor-test`." + There architecture is different enough that we can't test them along with our 'normal' DBs in `query-procesor-test`." (:require [metabase [query-processor-test :refer [first-row format-rows-by rows]] [util :as u]] @@ -889,3 +889,11 @@ (rows (data/run-mbql-query checkins {:aggregation [[:max $venue_latitude]] :breakout [$venue_price]}))) + +;; Do we properly handle queries that have more than one of the same aggregation? (#4166) +(expect-with-timeseries-dbs + [[35643 1992]] + (format-rows-by [int int] + (rows + (data/run-mbql-query checkins + {:aggregation [[:sum $venue_latitude] [:sum $venue_price]]})))) -- GitLab