Skip to content
Snippets Groups Projects
Unverified Commit ba42baaf authored by lbrdnk's avatar lbrdnk Committed by GitHub
Browse files

Adjust distinct aggregation expansion on Mongo (#44442)

This commit adds `$size` operator wrapping to `$addToSet`. That enables use of `:distinct`
clause in expressions on Mongo.
parent 452382bc
No related merge requests found
......@@ -9,7 +9,7 @@
[java-time.api :as t]
[metabase.driver :as driver]
[metabase.driver.common :as driver.common]
[metabase.driver.mongo.operators :refer [$add $addToSet $and $avg $concat $cond
[metabase.driver.mongo.operators :refer [$add $addFields $addToSet $and $avg $concat $cond
$dayOfMonth $dayOfWeek $dayOfYear $divide $eq $expr
$group $gt $gte $hour $limit $literal $lookup $lt $lte $match $max $min
$minute $mod $month $multiply $ne $not $or $project $regexMatch $second
......@@ -46,9 +46,6 @@
;; this is just a very limited schema to make sure we're generating valid queries. We should expand it more in the
;; future
(def ^:private $addFields
:$addFields)
(def ^:private $ProjectStage [:map-of [:= $project] [:map-of ::lib.schema.common/non-blank-string :any]])
(def ^:private $SortStage [:map-of [:= $sort] [:map-of ::lib.schema.common/non-blank-string [:enum -1 1]]])
(def ^:private $MatchStage [:map-of [:= $match] [:map-of
......@@ -1014,9 +1011,7 @@
[(field-alias field-or-expr) (format "$_id.%s" (field-alias field-or-expr))])
(for [ag aggregations
:let [ag-name (annotate/aggregation-name (:query *query*) ag)]]
[ag-name (if (mbql.u/is-clause? :distinct (unwrap-named-ag ag))
{$size (str \$ ag-name)}
true)])))
[ag-name true])))
(defmulti ^:private expand-aggregation
"Expand aggregations like `:share` and `:var` that can't be done as top-level aggregations in the `$group` stage
......@@ -1127,6 +1122,29 @@
[(str \$ aggr-name) {aggr-group aggr-name}]
extracted-aggr))
(defn- adjust-distinct-aggregations
"This function transforms `aggr-expr'` as in [[expand-aggregations]] so identifiers representing array that is
a set of _distinct_ values are wrapped in `{$size...}.
`aggr-expr` is expected to be a clause that is a result of [[extract-aggregations]]. For details see its docstring.
Distinct values are computed using the `$addToSet` in a `$group` stage. `$size` transforms them to actual count."
[[aggr-expr mappings]]
(let [distinct-keys (filter (fn [[clause]] (= :distinct clause)) (keys mappings))
distinct-vals (into #{}
(comp (map #(get mappings %))
;; \$ is added to identifiers so eg. `q~count1` becomes `$q~count1`. Those values
;; are used match against `aggr-expr` where identifiers have the prefix.
(map #(str \$ %)))
distinct-keys)]
[(walk/postwalk (fn [x]
(if (and (string? x)
(distinct-vals x))
{$size x}
x))
aggr-expr)
mappings]))
(defn- expand-aggregations
"Expands the aggregations in `aggr-expr` into groupings and post processing
expressions. The return value is a map with the following keys:
......@@ -1136,9 +1154,10 @@
usually does) refer to the fields introduced by the preceding maps."
[aggr-expr]
(let [aggr-name (annotate/aggregation-name (:query *query*) aggr-expr)
[aggr-expr' aggregations-seen] (simplify-extracted-aggregations
aggr-name
(extract-aggregations aggr-expr aggr-name))
[aggr-expr' aggregations-seen] (->> (extract-aggregations aggr-expr aggr-name)
(simplify-extracted-aggregations aggr-name)
adjust-distinct-aggregations)
raggr-expr (->rvalue aggr-expr')
expandeds (map (fn [[aggr name]]
(expand-aggregation [:aggregation-options aggr {:name name}]))
......
......@@ -273,8 +273,9 @@
(is (= {:projections ["count" "count_2"]
:query
[{"$group" {"_id" nil, "count" {"$addToSet" "$name"}, "count_2" {"$addToSet" "$price"}}}
{"$addFields" {"count" {"$size" "$count"} "count_2" {"$size" "$count_2"}}}
{"$sort" {"_id" 1}}
{"$project" {"_id" false, "count" {"$size" "$count"}, "count_2" {"$size" "$count_2"}}}
{"$project" {"_id" false, "count" true, "count_2" true}}
{"$limit" 5}],
:collection "venues"
:mbql? true}
......@@ -284,6 +285,32 @@
[:distinct $price]]
:limit 5})))))))
(deftest ^:parallel multiple-aggregations-with-distinct-count-expression-test
(mt/test-driver
:mongo
(testing "Should generate correct queries for `:distinct` in expressions (#35425)"
(is (= {:projections ["expression" "expression_2"],
:query
[{"$group"
{"_id" nil,
"expression~count" {"$addToSet" "$name"},
"expression~count1" {"$addToSet" "$price"},
"expression_2~count" {"$addToSet" "$name"},
"expression_2~count1" {"$addToSet" "$price"}}}
{"$addFields"
{"expression" {"$add" [{"$size" "$expression~count"} {"$size" "$expression~count1"}]},
"expression_2" {"$subtract" [{"$size" "$expression_2~count"} {"$size" "$expression_2~count1"}]}}}
{"$sort" {"_id" 1}}
{"$project" {"_id" false, "expression" true, "expression_2" true}}
{"$limit" 5}],
:collection "venues",
:mbql? true}
(qp.compile/compile
(mt/mbql-query venues
{:aggregation [[:+ [:distinct $name] [:distinct $price]]
[:- [:distinct $name] [:distinct $price]]]
:limit 5})))))))
(defn- extract-projections [projections q]
(select-keys (get-in q [:query 0 "$project"]) projections))
......
......@@ -6,7 +6,8 @@
[metabase.lib.test-util :as lib.tu]
[metabase.query-processor.store :as qp.store]
[metabase.query-processor.test-util :as qp.test-util]
[metabase.test :as mt]))
[metabase.test :as mt]
[metabase.util :as u]))
(deftest ^:parallel no-aggregation-test
(mt/test-drivers (mt/normal-drivers)
......@@ -201,6 +202,37 @@
{:aggregation [[:distinct $name]
[:distinct $price]]})))))))
(deftest ^:synchronized complex-distinct-aggregation-test
(mt/test-drivers
;; TODO: This test was added in PR #44442 fixing issue #35425. Enable this test for other drivers _while_ fixing
;; the issue #14523.
#_(mt/normal-drivers) #{:mongo}
(testing "Aggregation as `Count / Distinct([SOME_FIELD])` returns expected results (#35425)"
(every?
(fn [[_id c d c-div-d more-complex]]
(testing "Simple division"
(is (= (u/round-to-decimals 2 (double (/ c d)))
(u/round-to-decimals 2 (double c-div-d)))))
(testing "More complex expression"
(is (= (u/round-to-decimals 2 (double (/
(- c (* d 10))
(+ d (- c (- d 7))))))
(u/round-to-decimals 2 (double more-complex))))))
(mt/rows
(mt/run-mbql-query
venues
{:aggregation [[:aggregation-options [:count] {:name "A"}]
[:aggregation-options [:distinct $price] {:name "B"}]
[:aggregation-options [:/ [:count] [:distinct $price]] {:name "C"}]
[:aggregation-options [:/
[:- [:count] [:* [:distinct $price] 10]]
[:+
[:distinct $price]
[:- [:count] [:- [:distinct $price] 7]]]] {:name "D"}]]
:breakout [$category_id]
:order-by [[:asc $id]]
:limit 5}))))))
(deftest ^:parallel aggregate-boolean-without-type-test
(testing "Legacy breakout on boolean field should work correctly (#34286)"
(mt/dataset places-cam-likes
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment