From 0165251858b9502dcee457e2d6fdb8ff5af69145 Mon Sep 17 00:00:00 2001 From: Cam Saul <1455846+camsaul@users.noreply.github.com> Date: Wed, 28 Aug 2019 15:49:48 -0700 Subject: [PATCH] PR 10295 with test fixes (#10752) * Support distinct by two or more columns in druid * Test fixes [ci druid] --- .../metabase/driver/druid/query_processor.clj | 14 ++- .../driver/druid/query_processor_test.clj | 114 +++++++++--------- .../druid/test/metabase/driver/druid_test.clj | 106 ++++++++-------- 3 files changed, 122 insertions(+), 112 deletions(-) diff --git a/modules/drivers/druid/src/metabase/driver/druid/query_processor.clj b/modules/drivers/druid/src/metabase/driver/druid/query_processor.clj index 368a0de13fd..043e1dc4457 100644 --- a/modules/drivers/druid/src/metabase/driver/druid/query_processor.clj +++ b/modules/drivers/druid/src/metabase/driver/druid/query_processor.clj @@ -515,13 +515,23 @@ (defn- ag:distinct [field output-name] - (if (isa? (:base-type field) :type/DruidHyperUnique) + (cond + (mbql.u/is-clause? #{:+} field) + {:type :cardinality + :name output-name + :fieldNames (mapv ->rvalue (rest field)) + :byRow true + :round true} + (isa? (:base-type field) :type/DruidHyperUnique) {:type :hyperUnique :name output-name :fieldName (->rvalue field)} + :else {:type :cardinality :name output-name - :fieldNames [(->rvalue field)]})) + :fieldNames [(->rvalue field)] + :byRow true + :round true})) (defn- ag:count ([output-name] diff --git a/modules/drivers/druid/test/metabase/driver/druid/query_processor_test.clj b/modules/drivers/druid/test/metabase/driver/druid/query_processor_test.clj index 4ec7fb87e8f..5cda190572e 100644 --- a/modules/drivers/druid/test/metabase/driver/druid/query_processor_test.clj +++ b/modules/drivers/druid/test/metabase/driver/druid/query_processor_test.clj @@ -37,9 +37,8 @@ :metric {:type :alphaNumeric} :aggregations [{:type :filtered - :filter - {:type :not - :field {:type :selector, :dimension "id", :value nil}} + :filter {:type :not + :field {:type :selector, :dimension "id", :value nil}} :aggregator {:type :count, :name "__count_0"}}]} :query-type ::druid.qp/topN :mbql? true} @@ -49,67 +48,72 @@ (datasets/expect-with-driver :druid {:projections [:venue_category_name :user_name :__count_0] - :query {:queryType :groupBy - :granularity :all - :dataSource "checkins" - :dimensions ["venue_category_name", "user_name"] - :context {:timeout 60000, :queryId "<Query ID>"} - :intervals ["1900-01-01/2100-01-01"] - :aggregations [{:type :cardinality - :name "__count_0" - :fieldNames ["venue_name"]}] - :limitSpec {:type :default - :columns [{:dimension "__count_0", :direction :descending} - {:dimension "venue_category_name", :direction :ascending} - {:dimension "user_name", :direction :ascending}]}} - :query-type ::druid.qp/groupBy - :mbql? true} + :query {:queryType :groupBy + :granularity :all + :dataSource "checkins" + :dimensions ["venue_category_name", "user_name"] + :context {:timeout 60000, :queryId "<Query ID>"} + :intervals ["1900-01-01/2100-01-01"] + :aggregations [{:type :cardinality + :name "__count_0" + :fieldNames ["venue_name"] + :byRow true + :round true}] + :limitSpec {:type :default + :columns [{:dimension "__count_0", :direction :descending} + {:dimension "venue_category_name", :direction :ascending} + {:dimension "user_name", :direction :ascending}]}} + :query-type ::druid.qp/groupBy + :mbql? true} (query->native - {:aggregation [[:aggregation-options [:distinct [:field-id (data/id :checkins :venue_name)]] {:name "__count_0"}]] - :breakout [$venue_category_name $user_name] - :order-by [[:desc [:aggregation 0]] [:asc [:field-id (data/id :checkins :venue_category_name)]]]})) + {:aggregation [[:aggregation-options [:distinct $checkins.venue_name] {:name "__count_0"}]] + :breakout [$venue_category_name $user_name] + :order-by [[:desc [:aggregation 0]] [:asc $checkins.venue_category_name]]})) (datasets/expect-with-driver :druid {:projections [:venue_category_name :user_name :__count_0] - :query {:queryType :groupBy - :granularity :all - :dataSource "checkins" - :dimensions ["venue_category_name", "user_name"] - :context {:timeout 60000, :queryId "<Query ID>"} - :intervals ["1900-01-01/2100-01-01"] - :aggregations [{:type :cardinality - :name "__count_0" - :fieldNames ["venue_name"]}] - :limitSpec {:type :default - :columns [{:dimension "__count_0", :direction :descending} - {:dimension "venue_category_name", :direction :ascending} - {:dimension "user_name", :direction :ascending}] - :limit 5}} + :query {:queryType :groupBy + :granularity :all + :dataSource "checkins" + :dimensions ["venue_category_name", "user_name"] + :context {:timeout 60000, :queryId "<Query ID>"} + :intervals ["1900-01-01/2100-01-01"] + :aggregations [{:type :cardinality + :name "__count_0" + :fieldNames ["venue_name"] + :byRow true + :round true}] + :limitSpec {:type :default + :columns [{:dimension "__count_0", :direction :descending} + {:dimension "venue_category_name", :direction :ascending} + {:dimension "user_name", :direction :ascending}] + :limit 5}} :query-type ::druid.qp/groupBy :mbql? true} (query->native - {:aggregation [[:aggregation-options [:distinct [:field-id (data/id :checkins :venue_name)]] {:name "__count_0"}]] - :breakout [$venue_category_name $user_name] - :order-by [[:desc [:aggregation 0]] [:asc [:field-id (data/id :checkins :venue_category_name)]]] - :limit 5})) + {:aggregation [[:aggregation-options [:distinct $checkins.venue_name] {:name "__count_0"}]] + :breakout [$venue_category_name $user_name] + :order-by [[:desc [:aggregation 0]] [:asc $checkins.venue_category_name]] + :limit 5})) (datasets/expect-with-driver :druid {:projections [:venue_category_name :__count_0] - :query {:queryType :topN - :threshold 1000 - :granularity :all - :dataSource "checkins" - :dimension "venue_category_name" - :context {:timeout 60000, :queryId "<Query ID>"} - :intervals ["1900-01-01/2100-01-01"] - :metric "__count_0" - :aggregations - [{:type :cardinality - :name "__count_0" - :fieldNames ["venue_name"]}]} - :query-type ::druid.qp/topN - :mbql? true} + :query {:queryType :topN + :threshold 1000 + :granularity :all + :dataSource "checkins" + :dimension "venue_category_name" + :context {:timeout 60000, :queryId "<Query ID>"} + :intervals ["1900-01-01/2100-01-01"] + :metric "__count_0" + :aggregations [{:type :cardinality + :name "__count_0" + :fieldNames ["venue_name"] + :byRow true + :round true}]} + :query-type ::druid.qp/topN + :mbql? true} (query->native - {:aggregation [[:aggregation-options [:distinct [:field-id (data/id :checkins :venue_name)]] {:name "__count_0"}]] + {:aggregation [[:aggregation-options [:distinct $checkins.venue_name] {:name "__count_0"}]] :breakout [$venue_category_name] - :order-by [[:desc [:aggregation 0]] [:asc [:field-id (data/id :checkins :venue_category_name)]]]})) + :order-by [[:desc [:aggregation 0]] [:asc $checkins.venue_category_name]]})) diff --git a/modules/drivers/druid/test/metabase/driver/druid_test.clj b/modules/drivers/druid/test/metabase/driver/druid_test.clj index e94f1fb7f31..8cd232eef70 100644 --- a/modules/drivers/druid/test/metabase/driver/druid_test.clj +++ b/modules/drivers/druid/test/metabase/driver/druid_test.clj @@ -27,6 +27,14 @@ [toucan.util.test :as tt])) ;;; table-rows-sample +(defn table-rows-sample [] + (->> (metadata-queries/table-rows-sample (Table (data/id :checkins)) + [(Field (data/id :checkins :id)) + (Field (data/id :checkins :venue_name)) + (Field (data/id :checkins :timestamp))]) + (sort-by first) + (take 5))) + (datasets/expect-with-driver :druid ;; druid returns a timestamp along with the query, but that shouldn't really matter here :D [["1" "The Misfit Restaurant + Bar" #inst "2014-04-07T07:00:00.000Z"] @@ -34,12 +42,7 @@ ["100" "PizzaHacker" #inst "2014-07-26T07:00:00.000Z"] ["1000" "Tito's Tacos" #inst "2014-06-03T07:00:00.000Z"] ["101" "Golden Road Brewing" #inst "2015-09-04T07:00:00.000Z"]] - (->> (metadata-queries/table-rows-sample (Table (data/id :checkins)) - [(Field (data/id :checkins :id)) - (Field (data/id :checkins :venue_name)) - (Field (data/id :checkins :timestamp))]) - (sort-by first) - (take 5))) + (table-rows-sample)) (datasets/expect-with-driver :druid ;; druid returns a timestamp along with the query, but that shouldn't really matter here :D @@ -49,12 +52,7 @@ ["1000" "Tito's Tacos" #inst "2014-06-03T00:00:00.000-07:00"] ["101" "Golden Road Brewing" #inst "2015-09-04T00:00:00.000-07:00"]] (tu/with-temporary-setting-values [report-timezone "America/Los_Angeles"] - (->> (metadata-queries/table-rows-sample (Table (data/id :checkins)) - [(Field (data/id :checkins :id)) - (Field (data/id :checkins :venue_name)) - (Field (data/id :checkins :timestamp))]) - (sort-by first) - (take 5)))) + (table-rows-sample))) (datasets/expect-with-driver :druid ;; druid returns a timestamp along with the query, but that shouldn't really matter here :D @@ -64,12 +62,7 @@ ["1000" "Tito's Tacos" #inst "2014-06-03T02:00:00.000-05:00"] ["101" "Golden Road Brewing" #inst "2015-09-04T02:00:00.000-05:00"]] (tu.tz/with-jvm-tz (time/time-zone-for-id "America/Chicago") - (->> (metadata-queries/table-rows-sample (Table (data/id :checkins)) - [(Field (data/id :checkins :id)) - (Field (data/id :checkins :venue_name)) - (Field (data/id :checkins :timestamp))]) - (sort-by first) - (take 5)))) + (table-rows-sample))) (def ^:private ^String native-query-1 (json/generate-string @@ -135,14 +128,14 @@ ;; make sure we can run a native :timeseries query. This was throwing an Exception -- see #3409 (def ^:private ^String native-query-2 (json/generate-string - {:intervals ["1900-01-01/2100-01-01"] - :granularity {:type :period - :period :P1M - :timeZone :UTC} - :queryType :timeseries - :dataSource :checkins - :aggregations [{:type :count - :name :count}]})) + {:intervals ["1900-01-01/2100-01-01"] + :granularity {:type :period + :period :P1M + :timeZone :UTC} + :queryType :timeseries + :dataSource :checkins + :aggregations [{:type :count + :name :count}]})) (datasets/expect-with-driver :druid :completed @@ -340,6 +333,15 @@ {:aggregation [[:aggregation-options [:- [:sum $venue_price] 41] {:name "Sum-41"}]] :breakout [$venue_price]}))) +;; distinct count of two dimensions +(datasets/expect-with-driver :druid + {:rows [[98]] + :columns ["count"]} + (qp.test/rows+column-names + (druid-query + {:aggregation [[:distinct [:+ $checkins.venue_category_name + $checkins.venue_name]]]}))) + ;; check that we can handle METRICS (ick) inside expression aggregation clauses (datasets/expect-with-driver :druid [["2" 1231.0] @@ -349,12 +351,9 @@ (tt/with-temp Metric [metric {:definition {:aggregation [:sum [:field-id (data/id :checkins :venue_price)]] :filter [:> [:field-id (data/id :checkins :venue_price)] 1]}}] (qp.test/rows - (qp/process-query - {:database (data/id) - :type :query - :query {:source-table (data/id :checkins) - :aggregation [:+ [:metric (u/get-id metric)] 1] - :breakout [[:field-id (data/id :checkins :venue_price)]]}}))))) + (data/run-mbql-query checkins + {:aggregation [:+ [:metric (u/get-id metric)] 1] + :breakout [$venue_price]}))))) (expect com.jcraft.jsch.JSchException @@ -430,36 +429,33 @@ "Kinaree Thai Bistro" "1"]]} (tqpt/with-flattened-dbdef - (let [results (qp/process-query - {:database (data/id) - :type :query - :query {:source-table (data/id :checkins) - :limit 1}})] + (let [results (data/run-mbql-query checkins + {:limit 1})] (assert (= (:status results) :completed) (u/pprint-to-str 'red results)) {:cols (->> results :data :cols (map :name)) :rows (-> results :data :rows)}))) (datasets/expect-with-driver :druid - [["Bar" "Felipinho Asklepios" 8.015665809687173] - ["Bar" "Spiros Teofil" 8.015665809687173] - ["Japanese" "Felipinho Asklepios" 7.011990219885757] - ["Japanese" "Frans Hevel" 7.011990219885757] - ["Mexican" "Shad Ferdynand" 7.011990219885757]] + [["Bar" "Felipinho Asklepios" 8] + ["Bar" "Spiros Teofil" 8] + ["Japanese" "Felipinho Asklepios" 7] + ["Japanese" "Frans Hevel" 7] + ["Mexican" "Shad Ferdynand" 7]] (druid-query-returning-rows - {:aggregation [[:aggregation-options [:distinct [:field-id (data/id :checkins :venue_name)]] {:name "__count_0"}]] - :breakout [$venue_category_name $user_name] - :order-by [[:desc [:aggregation 0]] [:asc [:field-id (data/id :checkins :venue_category_name)]]] - :limit 5})) + {:aggregation [[:aggregation-options [:distinct $checkins.venue_name] {:name "__count_0"}]] + :breakout [$venue_category_name $user_name] + :order-by [[:desc [:aggregation 0]] [:asc $checkins.venue_category_name]] + :limit 5})) (datasets/expect-with-driver :druid - [["American" "Rüstem Hebel" 1.0002442201269182] - ["Artisan" "Broen Olujimi" 1.0002442201269182] - ["Artisan" "Conchúr Tihomir" 1.0002442201269182] - ["Artisan" "Dwight Gresham" 1.0002442201269182] - ["Artisan" "Plato Yeshua" 1.0002442201269182]] + [["American" "Rüstem Hebel" 1] + ["Artisan" "Broen Olujimi" 1] + ["Artisan" "Conchúr Tihomir" 1] + ["Artisan" "Dwight Gresham" 1] + ["Artisan" "Plato Yeshua" 1]] (druid-query-returning-rows - {:aggregation [[:aggregation-options [:distinct [:field-id (data/id :checkins :venue_name)]] {:name "__count_0"}]] - :breakout [$venue_category_name $user_name] - :order-by [[:asc [:aggregation 0]] [:asc [:field-id (data/id :checkins :venue_category_name)]]] - :limit 5})) \ No newline at end of file + {:aggregation [[:aggregation-options [:distinct $checkins.venue_name] {:name "__count_0"}]] + :breakout [$venue_category_name $user_name] + :order-by [[:asc [:aggregation 0]] [:asc $checkins.venue_category_name]] + :limit 5})) -- GitLab