diff --git a/modules/drivers/mongo/src/metabase/driver/mongo/query_processor.clj b/modules/drivers/mongo/src/metabase/driver/mongo/query_processor.clj index 8bc4e217c42edd4cdb25c5df0f2ec5a1cf22b65f..11d64ef5e02c469675b6f41bb33a86424687710e 100644 --- a/modules/drivers/mongo/src/metabase/driver/mongo/query_processor.clj +++ b/modules/drivers/mongo/src/metabase/driver/mongo/query_processor.clj @@ -9,7 +9,7 @@ [java-time.api :as t] [metabase.driver :as driver] [metabase.driver.common :as driver.common] - [metabase.driver.mongo.operators :refer [$add $addToSet $and $avg $concat $cond + [metabase.driver.mongo.operators :refer [$add $addFields $addToSet $and $avg $concat $cond $dayOfMonth $dayOfWeek $dayOfYear $divide $eq $expr $group $gt $gte $hour $limit $literal $lookup $lt $lte $match $max $min $minute $mod $month $multiply $ne $not $or $project $regexMatch $second @@ -46,9 +46,6 @@ ;; this is just a very limited schema to make sure we're generating valid queries. We should expand it more in the ;; future -(def ^:private $addFields - :$addFields) - (def ^:private $ProjectStage [:map-of [:= $project] [:map-of ::lib.schema.common/non-blank-string :any]]) (def ^:private $SortStage [:map-of [:= $sort] [:map-of ::lib.schema.common/non-blank-string [:enum -1 1]]]) (def ^:private $MatchStage [:map-of [:= $match] [:map-of @@ -1014,9 +1011,7 @@ [(field-alias field-or-expr) (format "$_id.%s" (field-alias field-or-expr))]) (for [ag aggregations :let [ag-name (annotate/aggregation-name (:query *query*) ag)]] - [ag-name (if (mbql.u/is-clause? :distinct (unwrap-named-ag ag)) - {$size (str \$ ag-name)} - true)]))) + [ag-name true]))) (defmulti ^:private expand-aggregation "Expand aggregations like `:share` and `:var` that can't be done as top-level aggregations in the `$group` stage @@ -1127,6 +1122,29 @@ [(str \$ aggr-name) {aggr-group aggr-name}] extracted-aggr)) +(defn- adjust-distinct-aggregations + "This function transforms `aggr-expr'` as in [[expand-aggregations]] so identifiers representing array that is + a set of _distinct_ values are wrapped in `{$size...}. + + `aggr-expr` is expected to be a clause that is a result of [[extract-aggregations]]. For details see its docstring. + + Distinct values are computed using the `$addToSet` in a `$group` stage. `$size` transforms them to actual count." + [[aggr-expr mappings]] + (let [distinct-keys (filter (fn [[clause]] (= :distinct clause)) (keys mappings)) + distinct-vals (into #{} + (comp (map #(get mappings %)) + ;; \$ is added to identifiers so eg. `q~count1` becomes `$q~count1`. Those values + ;; are used match against `aggr-expr` where identifiers have the prefix. + (map #(str \$ %))) + distinct-keys)] + [(walk/postwalk (fn [x] + (if (and (string? x) + (distinct-vals x)) + {$size x} + x)) + aggr-expr) + mappings])) + (defn- expand-aggregations "Expands the aggregations in `aggr-expr` into groupings and post processing expressions. The return value is a map with the following keys: @@ -1136,9 +1154,10 @@ usually does) refer to the fields introduced by the preceding maps." [aggr-expr] (let [aggr-name (annotate/aggregation-name (:query *query*) aggr-expr) - [aggr-expr' aggregations-seen] (simplify-extracted-aggregations - aggr-name - (extract-aggregations aggr-expr aggr-name)) + [aggr-expr' aggregations-seen] (->> (extract-aggregations aggr-expr aggr-name) + (simplify-extracted-aggregations aggr-name) + adjust-distinct-aggregations) + raggr-expr (->rvalue aggr-expr') expandeds (map (fn [[aggr name]] (expand-aggregation [:aggregation-options aggr {:name name}])) diff --git a/modules/drivers/mongo/test/metabase/driver/mongo/query_processor_test.clj b/modules/drivers/mongo/test/metabase/driver/mongo/query_processor_test.clj index 7e89dbd8e2a02ac6da508da70145e1787a304c4d..361be7af07bed97375300635aa696dcbdd50929a 100644 --- a/modules/drivers/mongo/test/metabase/driver/mongo/query_processor_test.clj +++ b/modules/drivers/mongo/test/metabase/driver/mongo/query_processor_test.clj @@ -273,8 +273,9 @@ (is (= {:projections ["count" "count_2"] :query [{"$group" {"_id" nil, "count" {"$addToSet" "$name"}, "count_2" {"$addToSet" "$price"}}} + {"$addFields" {"count" {"$size" "$count"} "count_2" {"$size" "$count_2"}}} {"$sort" {"_id" 1}} - {"$project" {"_id" false, "count" {"$size" "$count"}, "count_2" {"$size" "$count_2"}}} + {"$project" {"_id" false, "count" true, "count_2" true}} {"$limit" 5}], :collection "venues" :mbql? true} @@ -284,6 +285,32 @@ [:distinct $price]] :limit 5}))))))) +(deftest ^:parallel multiple-aggregations-with-distinct-count-expression-test + (mt/test-driver + :mongo + (testing "Should generate correct queries for `:distinct` in expressions (#35425)" + (is (= {:projections ["expression" "expression_2"], + :query + [{"$group" + {"_id" nil, + "expression~count" {"$addToSet" "$name"}, + "expression~count1" {"$addToSet" "$price"}, + "expression_2~count" {"$addToSet" "$name"}, + "expression_2~count1" {"$addToSet" "$price"}}} + {"$addFields" + {"expression" {"$add" [{"$size" "$expression~count"} {"$size" "$expression~count1"}]}, + "expression_2" {"$subtract" [{"$size" "$expression_2~count"} {"$size" "$expression_2~count1"}]}}} + {"$sort" {"_id" 1}} + {"$project" {"_id" false, "expression" true, "expression_2" true}} + {"$limit" 5}], + :collection "venues", + :mbql? true} + (qp.compile/compile + (mt/mbql-query venues + {:aggregation [[:+ [:distinct $name] [:distinct $price]] + [:- [:distinct $name] [:distinct $price]]] + :limit 5}))))))) + (defn- extract-projections [projections q] (select-keys (get-in q [:query 0 "$project"]) projections)) diff --git a/test/metabase/query_processor_test/aggregation_test.clj b/test/metabase/query_processor_test/aggregation_test.clj index 069f6263ad342fcca07ceefc8e44960b3def63e4..843373bd75ea95c74d7633c001ec3c99e65c65d8 100644 --- a/test/metabase/query_processor_test/aggregation_test.clj +++ b/test/metabase/query_processor_test/aggregation_test.clj @@ -6,7 +6,8 @@ [metabase.lib.test-util :as lib.tu] [metabase.query-processor.store :as qp.store] [metabase.query-processor.test-util :as qp.test-util] - [metabase.test :as mt])) + [metabase.test :as mt] + [metabase.util :as u])) (deftest ^:parallel no-aggregation-test (mt/test-drivers (mt/normal-drivers) @@ -201,6 +202,37 @@ {:aggregation [[:distinct $name] [:distinct $price]]}))))))) +(deftest ^:synchronized complex-distinct-aggregation-test + (mt/test-drivers + ;; TODO: This test was added in PR #44442 fixing issue #35425. Enable this test for other drivers _while_ fixing + ;; the issue #14523. + #_(mt/normal-drivers) #{:mongo} + (testing "Aggregation as `Count / Distinct([SOME_FIELD])` returns expected results (#35425)" + (every? + (fn [[_id c d c-div-d more-complex]] + (testing "Simple division" + (is (= (u/round-to-decimals 2 (double (/ c d))) + (u/round-to-decimals 2 (double c-div-d))))) + (testing "More complex expression" + (is (= (u/round-to-decimals 2 (double (/ + (- c (* d 10)) + (+ d (- c (- d 7)))))) + (u/round-to-decimals 2 (double more-complex)))))) + (mt/rows + (mt/run-mbql-query + venues + {:aggregation [[:aggregation-options [:count] {:name "A"}] + [:aggregation-options [:distinct $price] {:name "B"}] + [:aggregation-options [:/ [:count] [:distinct $price]] {:name "C"}] + [:aggregation-options [:/ + [:- [:count] [:* [:distinct $price] 10]] + [:+ + [:distinct $price] + [:- [:count] [:- [:distinct $price] 7]]]] {:name "D"}]] + :breakout [$category_id] + :order-by [[:asc $id]] + :limit 5})))))) + (deftest ^:parallel aggregate-boolean-without-type-test (testing "Legacy breakout on boolean field should work correctly (#34286)" (mt/dataset places-cam-likes