From 0165251858b9502dcee457e2d6fdb8ff5af69145 Mon Sep 17 00:00:00 2001
From: Cam Saul <1455846+camsaul@users.noreply.github.com>
Date: Wed, 28 Aug 2019 15:49:48 -0700
Subject: [PATCH] PR 10295 with test fixes (#10752)

* Support distinct by two or more columns in druid

* Test fixes [ci druid]
---
 .../metabase/driver/druid/query_processor.clj |  14 ++-
 .../driver/druid/query_processor_test.clj     | 114 +++++++++---------
 .../druid/test/metabase/driver/druid_test.clj | 106 ++++++++--------
 3 files changed, 122 insertions(+), 112 deletions(-)

diff --git a/modules/drivers/druid/src/metabase/driver/druid/query_processor.clj b/modules/drivers/druid/src/metabase/driver/druid/query_processor.clj
index 368a0de13fd..043e1dc4457 100644
--- a/modules/drivers/druid/src/metabase/driver/druid/query_processor.clj
+++ b/modules/drivers/druid/src/metabase/driver/druid/query_processor.clj
@@ -515,13 +515,23 @@
 
 (defn- ag:distinct
   [field output-name]
-  (if (isa? (:base-type field) :type/DruidHyperUnique)
+  (cond
+    (mbql.u/is-clause? #{:+} field)
+    {:type       :cardinality
+     :name       output-name
+     :fieldNames (mapv ->rvalue (rest field))
+     :byRow      true
+     :round      true}
+    (isa? (:base-type field) :type/DruidHyperUnique)
     {:type      :hyperUnique
      :name      output-name
      :fieldName (->rvalue field)}
+    :else
     {:type       :cardinality
      :name       output-name
-     :fieldNames [(->rvalue field)]}))
+     :fieldNames [(->rvalue field)]
+     :byRow      true
+     :round      true}))
 
 (defn- ag:count
   ([output-name]
diff --git a/modules/drivers/druid/test/metabase/driver/druid/query_processor_test.clj b/modules/drivers/druid/test/metabase/driver/druid/query_processor_test.clj
index 4ec7fb87e8f..5cda190572e 100644
--- a/modules/drivers/druid/test/metabase/driver/druid/query_processor_test.clj
+++ b/modules/drivers/druid/test/metabase/driver/druid/query_processor_test.clj
@@ -37,9 +37,8 @@
                  :metric           {:type :alphaNumeric}
                  :aggregations
                  [{:type       :filtered
-                   :filter
-                   {:type  :not
-                    :field {:type :selector, :dimension "id", :value nil}}
+                   :filter     {:type  :not
+                                :field {:type :selector, :dimension "id", :value nil}}
                    :aggregator {:type :count, :name "__count_0"}}]}
    :query-type  ::druid.qp/topN
    :mbql?       true}
@@ -49,67 +48,72 @@
 
 (datasets/expect-with-driver :druid
   {:projections [:venue_category_name :user_name :__count_0]
-   :query       {:queryType        :groupBy
-                 :granularity      :all
-                 :dataSource       "checkins"
-                 :dimensions       ["venue_category_name", "user_name"]
-                 :context          {:timeout 60000, :queryId "<Query ID>"}
-                 :intervals        ["1900-01-01/2100-01-01"]
-                 :aggregations     [{:type       :cardinality
-                                     :name       "__count_0"
-                                     :fieldNames ["venue_name"]}]
-                 :limitSpec        {:type    :default
-                                    :columns [{:dimension "__count_0", :direction :descending}
-                                              {:dimension "venue_category_name", :direction :ascending}
-                                              {:dimension "user_name", :direction :ascending}]}}
-    :query-type  ::druid.qp/groupBy
-    :mbql?       true}
+   :query       {:queryType    :groupBy
+                 :granularity  :all
+                 :dataSource   "checkins"
+                 :dimensions   ["venue_category_name", "user_name"]
+                 :context      {:timeout 60000, :queryId "<Query ID>"}
+                 :intervals    ["1900-01-01/2100-01-01"]
+                 :aggregations [{:type       :cardinality
+                                 :name       "__count_0"
+                                 :fieldNames ["venue_name"]
+                                 :byRow      true
+                                 :round      true}]
+                 :limitSpec    {:type    :default
+                                :columns [{:dimension "__count_0", :direction :descending}
+                                          {:dimension "venue_category_name", :direction :ascending}
+                                          {:dimension "user_name", :direction :ascending}]}}
+   :query-type ::druid.qp/groupBy
+   :mbql?      true}
   (query->native
-    {:aggregation [[:aggregation-options [:distinct [:field-id (data/id :checkins :venue_name)]] {:name "__count_0"}]]
-     :breakout    [$venue_category_name $user_name]
-     :order-by    [[:desc [:aggregation 0]] [:asc [:field-id (data/id :checkins :venue_category_name)]]]}))
+   {:aggregation [[:aggregation-options [:distinct $checkins.venue_name] {:name "__count_0"}]]
+    :breakout    [$venue_category_name $user_name]
+    :order-by    [[:desc [:aggregation 0]] [:asc $checkins.venue_category_name]]}))
 
 (datasets/expect-with-driver :druid
   {:projections [:venue_category_name :user_name :__count_0]
-   :query       {:queryType        :groupBy
-                 :granularity      :all
-                 :dataSource       "checkins"
-                 :dimensions       ["venue_category_name", "user_name"]
-                 :context          {:timeout 60000, :queryId "<Query ID>"}
-                 :intervals        ["1900-01-01/2100-01-01"]
-                 :aggregations     [{:type       :cardinality
-                                     :name       "__count_0"
-                                     :fieldNames ["venue_name"]}]
-                 :limitSpec        {:type    :default
-                                    :columns [{:dimension "__count_0", :direction :descending}
-                                              {:dimension "venue_category_name", :direction :ascending}
-                                              {:dimension "user_name", :direction :ascending}]
-                                    :limit   5}}
+   :query       {:queryType    :groupBy
+                 :granularity  :all
+                 :dataSource   "checkins"
+                 :dimensions   ["venue_category_name", "user_name"]
+                 :context      {:timeout 60000, :queryId "<Query ID>"}
+                 :intervals    ["1900-01-01/2100-01-01"]
+                 :aggregations [{:type       :cardinality
+                                 :name       "__count_0"
+                                 :fieldNames ["venue_name"]
+                                 :byRow      true
+                                 :round      true}]
+                 :limitSpec    {:type    :default
+                                :columns [{:dimension "__count_0", :direction :descending}
+                                          {:dimension "venue_category_name", :direction :ascending}
+                                          {:dimension "user_name", :direction :ascending}]
+                                :limit   5}}
    :query-type  ::druid.qp/groupBy
    :mbql?       true}
   (query->native
-    {:aggregation [[:aggregation-options [:distinct [:field-id (data/id :checkins :venue_name)]] {:name "__count_0"}]]
-     :breakout    [$venue_category_name $user_name]
-     :order-by    [[:desc [:aggregation 0]] [:asc [:field-id (data/id :checkins :venue_category_name)]]]
-     :limit       5}))
+   {:aggregation [[:aggregation-options [:distinct $checkins.venue_name] {:name "__count_0"}]]
+    :breakout    [$venue_category_name $user_name]
+    :order-by    [[:desc [:aggregation 0]] [:asc $checkins.venue_category_name]]
+    :limit       5}))
 
 (datasets/expect-with-driver :druid
   {:projections [:venue_category_name :__count_0]
-    :query       {:queryType        :topN
-                  :threshold        1000
-                  :granularity      :all
-                  :dataSource       "checkins"
-                  :dimension        "venue_category_name"
-                  :context          {:timeout 60000, :queryId "<Query ID>"}
-                  :intervals        ["1900-01-01/2100-01-01"]
-                  :metric           "__count_0"
-                  :aggregations
-                  [{:type       :cardinality
-                    :name       "__count_0"
-                    :fieldNames ["venue_name"]}]}
-    :query-type  ::druid.qp/topN
-    :mbql?       true}
+   :query       {:queryType    :topN
+                 :threshold    1000
+                 :granularity  :all
+                 :dataSource   "checkins"
+                 :dimension    "venue_category_name"
+                 :context      {:timeout 60000, :queryId "<Query ID>"}
+                 :intervals    ["1900-01-01/2100-01-01"]
+                 :metric       "__count_0"
+                 :aggregations [{:type       :cardinality
+                                 :name       "__count_0"
+                                 :fieldNames ["venue_name"]
+                                 :byRow      true
+                                 :round      true}]}
+   :query-type  ::druid.qp/topN
+   :mbql?       true}
   (query->native
-    {:aggregation [[:aggregation-options [:distinct [:field-id (data/id :checkins :venue_name)]] {:name "__count_0"}]]
+   {:aggregation [[:aggregation-options [:distinct $checkins.venue_name] {:name "__count_0"}]]
     :breakout    [$venue_category_name]
-    :order-by    [[:desc [:aggregation 0]] [:asc [:field-id (data/id :checkins :venue_category_name)]]]}))
+    :order-by    [[:desc [:aggregation 0]] [:asc $checkins.venue_category_name]]}))
diff --git a/modules/drivers/druid/test/metabase/driver/druid_test.clj b/modules/drivers/druid/test/metabase/driver/druid_test.clj
index e94f1fb7f31..8cd232eef70 100644
--- a/modules/drivers/druid/test/metabase/driver/druid_test.clj
+++ b/modules/drivers/druid/test/metabase/driver/druid_test.clj
@@ -27,6 +27,14 @@
             [toucan.util.test :as tt]))
 
 ;;; table-rows-sample
+(defn table-rows-sample []
+  (->> (metadata-queries/table-rows-sample (Table (data/id :checkins))
+         [(Field (data/id :checkins :id))
+          (Field (data/id :checkins :venue_name))
+          (Field (data/id :checkins :timestamp))])
+       (sort-by first)
+       (take 5)))
+
 (datasets/expect-with-driver :druid
   ;; druid returns a timestamp along with the query, but that shouldn't really matter here :D
   [["1"    "The Misfit Restaurant + Bar" #inst "2014-04-07T07:00:00.000Z"]
@@ -34,12 +42,7 @@
    ["100"  "PizzaHacker"                 #inst "2014-07-26T07:00:00.000Z"]
    ["1000" "Tito's Tacos"                #inst "2014-06-03T07:00:00.000Z"]
    ["101"  "Golden Road Brewing"         #inst "2015-09-04T07:00:00.000Z"]]
-  (->> (metadata-queries/table-rows-sample (Table (data/id :checkins))
-         [(Field (data/id :checkins :id))
-          (Field (data/id :checkins :venue_name))
-          (Field (data/id :checkins :timestamp))])
-       (sort-by first)
-       (take 5)))
+  (table-rows-sample))
 
 (datasets/expect-with-driver :druid
   ;; druid returns a timestamp along with the query, but that shouldn't really matter here :D
@@ -49,12 +52,7 @@
    ["1000" "Tito's Tacos"                #inst "2014-06-03T00:00:00.000-07:00"]
    ["101"  "Golden Road Brewing"         #inst "2015-09-04T00:00:00.000-07:00"]]
   (tu/with-temporary-setting-values [report-timezone "America/Los_Angeles"]
-    (->> (metadata-queries/table-rows-sample (Table (data/id :checkins))
-           [(Field (data/id :checkins :id))
-            (Field (data/id :checkins :venue_name))
-            (Field (data/id :checkins :timestamp))])
-         (sort-by first)
-         (take 5))))
+    (table-rows-sample)))
 
 (datasets/expect-with-driver :druid
   ;; druid returns a timestamp along with the query, but that shouldn't really matter here :D
@@ -64,12 +62,7 @@
    ["1000" "Tito's Tacos"                #inst "2014-06-03T02:00:00.000-05:00"]
    ["101"  "Golden Road Brewing"         #inst "2015-09-04T02:00:00.000-05:00"]]
   (tu.tz/with-jvm-tz (time/time-zone-for-id "America/Chicago")
-    (->> (metadata-queries/table-rows-sample (Table (data/id :checkins))
-           [(Field (data/id :checkins :id))
-            (Field (data/id :checkins :venue_name))
-            (Field (data/id :checkins :timestamp))])
-         (sort-by first)
-         (take 5))))
+    (table-rows-sample)))
 
 (def ^:private ^String native-query-1
   (json/generate-string
@@ -135,14 +128,14 @@
 ;; make sure we can run a native :timeseries query. This was throwing an Exception -- see #3409
 (def ^:private ^String native-query-2
   (json/generate-string
-    {:intervals    ["1900-01-01/2100-01-01"]
-     :granularity  {:type     :period
-                    :period   :P1M
-                    :timeZone :UTC}
-     :queryType    :timeseries
-     :dataSource   :checkins
-     :aggregations [{:type :count
-                     :name :count}]}))
+   {:intervals    ["1900-01-01/2100-01-01"]
+    :granularity  {:type     :period
+                   :period   :P1M
+                   :timeZone :UTC}
+    :queryType    :timeseries
+    :dataSource   :checkins
+    :aggregations [{:type :count
+                    :name :count}]}))
 
 (datasets/expect-with-driver :druid
   :completed
@@ -340,6 +333,15 @@
       {:aggregation [[:aggregation-options [:- [:sum $venue_price] 41] {:name "Sum-41"}]]
        :breakout    [$venue_price]})))
 
+;; distinct count of two dimensions
+(datasets/expect-with-driver :druid
+   {:rows [[98]]
+    :columns ["count"]}
+   (qp.test/rows+column-names
+    (druid-query
+     {:aggregation [[:distinct [:+  $checkins.venue_category_name
+                                    $checkins.venue_name]]]})))
+
 ;; check that we can handle METRICS (ick) inside expression aggregation clauses
 (datasets/expect-with-driver :druid
   [["2" 1231.0]
@@ -349,12 +351,9 @@
     (tt/with-temp Metric [metric {:definition {:aggregation [:sum [:field-id (data/id :checkins :venue_price)]]
                                                :filter      [:> [:field-id (data/id :checkins :venue_price)] 1]}}]
       (qp.test/rows
-        (qp/process-query
-          {:database (data/id)
-           :type     :query
-           :query    {:source-table (data/id :checkins)
-                      :aggregation  [:+ [:metric (u/get-id metric)] 1]
-                      :breakout     [[:field-id (data/id :checkins :venue_price)]]}})))))
+        (data/run-mbql-query checkins
+          {:aggregation [:+ [:metric (u/get-id metric)] 1]
+           :breakout    [$venue_price]})))))
 
 (expect
   com.jcraft.jsch.JSchException
@@ -430,36 +429,33 @@
            "Kinaree Thai Bistro"
            "1"]]}
   (tqpt/with-flattened-dbdef
-    (let [results (qp/process-query
-                    {:database (data/id)
-                     :type     :query
-                     :query    {:source-table (data/id :checkins)
-                                :limit        1}})]
+    (let [results (data/run-mbql-query checkins
+                    {:limit 1})]
       (assert (= (:status results) :completed)
         (u/pprint-to-str 'red results))
       {:cols (->> results :data :cols (map :name))
        :rows (-> results :data :rows)})))
 
 (datasets/expect-with-driver :druid
-  [["Bar" "Felipinho Asklepios" 8.015665809687173]
-    ["Bar" "Spiros Teofil" 8.015665809687173]
-    ["Japanese" "Felipinho Asklepios" 7.011990219885757]
-    ["Japanese" "Frans Hevel" 7.011990219885757]
-    ["Mexican" "Shad Ferdynand" 7.011990219885757]]
+  [["Bar" "Felipinho Asklepios"      8]
+   ["Bar" "Spiros Teofil"            8]
+   ["Japanese" "Felipinho Asklepios" 7]
+   ["Japanese" "Frans Hevel"         7]
+   ["Mexican" "Shad Ferdynand"       7]]
   (druid-query-returning-rows
-    {:aggregation [[:aggregation-options [:distinct [:field-id (data/id :checkins :venue_name)]] {:name "__count_0"}]]
-      :breakout    [$venue_category_name $user_name]
-      :order-by    [[:desc [:aggregation 0]] [:asc [:field-id (data/id :checkins :venue_category_name)]]]
-      :limit       5}))
+    {:aggregation [[:aggregation-options [:distinct $checkins.venue_name] {:name "__count_0"}]]
+     :breakout    [$venue_category_name $user_name]
+     :order-by    [[:desc [:aggregation 0]] [:asc $checkins.venue_category_name]]
+     :limit       5}))
 
 (datasets/expect-with-driver :druid
-  [["American" "Rüstem Hebel" 1.0002442201269182]
-  ["Artisan" "Broen Olujimi" 1.0002442201269182]
-  ["Artisan" "Conchúr Tihomir" 1.0002442201269182]
-  ["Artisan" "Dwight Gresham" 1.0002442201269182]
-  ["Artisan" "Plato Yeshua" 1.0002442201269182]]
+  [["American" "Rüstem Hebel"    1]
+   ["Artisan"  "Broen Olujimi"   1]
+   ["Artisan"  "Conchúr Tihomir" 1]
+   ["Artisan"  "Dwight Gresham"  1]
+   ["Artisan"  "Plato Yeshua"    1]]
   (druid-query-returning-rows
-    {:aggregation [[:aggregation-options [:distinct [:field-id (data/id :checkins :venue_name)]] {:name "__count_0"}]]
-      :breakout    [$venue_category_name $user_name]
-      :order-by    [[:asc [:aggregation 0]] [:asc [:field-id (data/id :checkins :venue_category_name)]]]
-      :limit       5}))
\ No newline at end of file
+    {:aggregation [[:aggregation-options [:distinct $checkins.venue_name] {:name "__count_0"}]]
+     :breakout   [$venue_category_name $user_name]
+     :order-by   [[:asc [:aggregation 0]] [:asc $checkins.venue_category_name]]
+     :limit      5}))
-- 
GitLab