From 9d112756ff49273b348b36c4c06778636c30b752 Mon Sep 17 00:00:00 2001 From: Cam Saul <1455846+camsaul@users.noreply.github.com> Date: Fri, 5 Apr 2024 13:03:24 -0700 Subject: [PATCH] Use window functions for cumulative sum and cumulative count (#40752) * Use window functions for SQL drivers for cumulative count and sum * H2/Snowflake/Redshift implementation * Test fixes :wrench: * Use SELECT expression positions as the default window function implementation * Some test fixes. * Fix bugs in add-alias-info * MongoDB fixes * Another pass * BigQuery should use positions? * Disable tests for the failing drivers, we can fix in follow-on PRs. * Extra info * Add GH issue numbers * Update more GH issue numbers * Fix SQL Server!!!! wooo * I THOUGHT WE WERE DISABLING SPARK SQL!!!!!!!!!!1 * Cleanup to get things ready for review. * Snowflake + Vertica shouldn't use output column numbers for ORDER BY in OVER * Code cleanup * Test fix? :wrench: --- docs/developers-guide/driver-changelog.md | 55 ++++ .../custom-column/custom-column.cy.spec.js | 4 +- .../athena/src/metabase/driver/athena.clj | 9 +- .../metabase/driver/bigquery_cloud_sdk.clj | 11 +- .../driver/mongo/query_processor_test.clj | 11 +- .../src/metabase/driver/presto_jdbc.clj | 19 +- .../redshift/src/metabase/driver/redshift.clj | 11 +- .../src/metabase/driver/snowflake.clj | 11 +- .../sparksql/src/metabase/driver/sparksql.clj | 6 +- .../src/metabase/driver/sqlserver.clj | 23 +- .../vertica/src/metabase/driver/vertica.clj | 11 +- src/metabase/db.clj | 1 - src/metabase/driver.clj | 12 +- src/metabase/driver/h2.clj | 12 +- src/metabase/driver/mysql.clj | 29 +- src/metabase/driver/postgres.clj | 15 +- src/metabase/driver/sql.clj | 13 +- src/metabase/driver/sql/query_processor.clj | 97 ++++++- src/metabase/driver/sql_jdbc/execute.clj | 1 + src/metabase/legacy_mbql/util.cljc | 15 +- src/metabase/lib/aggregation.cljc | 4 +- .../middleware/cumulative_aggregations.clj | 13 +- .../driver/sql/query_processor_test.clj | 3 +- test/metabase/legacy_mbql/util_test.cljc | 48 ++-- .../middleware/add_implicit_clauses_test.clj | 31 ++- .../cumulative_aggregations_test.clj | 32 ++- .../util/add_alias_info_test.clj | 45 +++- .../query_processor_test/aggregation_test.clj | 131 --------- .../cumulative_aggregation_test.clj | 248 ++++++++++++++++++ .../expression_aggregations_test.clj | 30 --- test/metabase/test/data/h2.clj | 12 +- test/metabase/test/data/interface.clj | 10 +- 32 files changed, 669 insertions(+), 304 deletions(-) create mode 100644 test/metabase/query_processor_test/cumulative_aggregation_test.clj diff --git a/docs/developers-guide/driver-changelog.md b/docs/developers-guide/driver-changelog.md index 704bd7040da..be60a2daa82 100644 --- a/docs/developers-guide/driver-changelog.md +++ b/docs/developers-guide/driver-changelog.md @@ -13,6 +13,61 @@ title: Driver interface changelog efficient way possible. This is currently only required for drivers that support the `:uploads` feature, and has a default implementation for JDBC-based drivers. +- New feature `:window-functions` has been added. Drivers that implement this method are expected to implement the + cumulative sum (`:cum-sum`) and cumulative count (`:cum-count`) aggregation clauses in their native query language. + For non-SQL drivers (drivers not based on our `:sql` or `:sql-jdbc` drivers), this feature flag is set to `false` by + default; the old (broken) post-processing implementations of cumulative aggregations will continue to be used. (See + issues [#13634](https://github.com/metabase/metabase/issues/13634) and + [#15118](https://github.com/metabase/metabase/issues/15118) for more information on why the old implementation is + broken.) + + Non-SQL drivers should be updated to implement cumulative aggregations natively if possible. + + SQL drivers that support ordering by output column numbers in `OVER` `ORDER BY` expressions, e.g. + + ```sql + SELECT + sum(sum(my_column) OVER (ORDER BY 1 ROWS UNBOUNDED PRECEDING) AS cumulative_sum + date_trunc(my_timestamp, 'month') AS my_timestamp_month + FROM + my_table + GROUP BY + date_trunc(my_timestamp, 'month') + ORDER BY + date_trunc(my_timestamp, 'month') + ``` + + will work without any changes; this is the new default behavior for drivers based on the `:sql` or `:sql-jdbc` + drivers. + + For databases that do not support ordering by output column numbers (e.g. MySQL/MariaDB), you can mark your driver + as not supporting the `:sql/window-functions.order-by-output-column-numbers` feature flag, e.g. + + ```clj + (defmethod driver/database-supports? [:my-driver :sql/window-functions.order-by-output-column-numbers] + [_driver _feature _database] + false) + ``` + + In this case, the `:sql` driver will instead generate something like + + ```sql + SELECT + sum(sum(my_column) OVER (ORDER BY date_trunc(my_timestamp, 'month') ROWS UNBOUNDED PRECEDING) AS cumulative_sum + date_trunc(my_timestamp, 'month') AS my_timestamp_month + FROM + my_table + GROUP BY + date_trunc(my_timestamp, 'month') + ORDER BY + date_trunc(my_timestamp, 'month') + ``` + + If neither of these strategies work for you, you might need to do something more complicated -- see + [#40982](https://github.com/metabase/metabase/pull/40982) for an example of complex query transformations to get + fussy BigQuery working. (More on this soon.) If all else fails, you can always specify that your driver does not + support `:window-functions`, and it will fall back to using the old broken implementation. + ## Metabase 0.49.1 - Another driver feature has been added: `describe-fields`. If a driver opts-in to supporting this feature, The diff --git a/e2e/test/scenarios/custom-column/custom-column.cy.spec.js b/e2e/test/scenarios/custom-column/custom-column.cy.spec.js index 24499e1127d..e948b1fae91 100644 --- a/e2e/test/scenarios/custom-column/custom-column.cy.spec.js +++ b/e2e/test/scenarios/custom-column/custom-column.cy.spec.js @@ -341,7 +341,7 @@ describe("scenarios > question > custom column", () => { .should("have.length.of.at.least", 8); }); - it.skip("should create custom column after aggregation with 'cum-sum/count' (metabase#13634)", () => { + it("should create custom column after aggregation with 'cum-sum/count' (metabase#13634)", () => { cy.createQuestion( { name: "13634", @@ -365,7 +365,7 @@ describe("scenarios > question > custom column", () => { cy.log("Reported failing in v0.34.3, v0.35.4, v0.36.8.2, v0.37.0.2"); // eslint-disable-next-line no-unscoped-text-selectors -- deprecated usage cy.findByText("Foo Bar"); - cy.findAllByText("57911"); + cy.findAllByText("57,911"); }); it("should not be dropped if filter is changed after aggregation (metaabase#14193)", () => { diff --git a/modules/drivers/athena/src/metabase/driver/athena.clj b/modules/drivers/athena/src/metabase/driver/athena.clj index 56284630aa7..77aa61c01a1 100644 --- a/modules/drivers/athena/src/metabase/driver/athena.clj +++ b/modules/drivers/athena/src/metabase/driver/athena.clj @@ -32,10 +32,11 @@ ;;; | metabase.driver method impls | ;;; +----------------------------------------------------------------------------------------------------------------+ -(doseq [[feature supported?] {:foreign-keys true - :datetime-diff true - :nested-fields false #_true ; huh? Not sure why this was `true`. Disabled for now. - :test/jvm-timezone-setting false}] +(doseq [[feature supported?] {:datetime-diff true + :foreign-keys true + :nested-fields false + :sql/window-functions.order-by-output-column-numbers false + :test/jvm-timezone-setting false}] (defmethod driver/database-supports? [:athena feature] [_driver _feature _db] supported?)) ;;; +----------------------------------------------------------------------------------------------------------------+ diff --git a/modules/drivers/bigquery-cloud-sdk/src/metabase/driver/bigquery_cloud_sdk.clj b/modules/drivers/bigquery-cloud-sdk/src/metabase/driver/bigquery_cloud_sdk.clj index a64333af881..3e425ea62ec 100644 --- a/modules/drivers/bigquery-cloud-sdk/src/metabase/driver/bigquery_cloud_sdk.clj +++ b/modules/drivers/bigquery-cloud-sdk/src/metabase/driver/bigquery_cloud_sdk.clj @@ -462,10 +462,13 @@ :datetime-diff true :now true :convert-timezone true - ;; BigQuery uses timezone operators and arguments on calls like extract() and timezone_trunc() rather than literally - ;; using SET TIMEZONE, but we need to flag it as supporting set-timezone anyway so that reporting timezones are - ;; returned and used, and tests expect the converted values. - :set-timezone true}] + ;; BigQuery uses timezone operators and arguments on calls like extract() and + ;; timezone_trunc() rather than literally using SET TIMEZONE, but we need to flag it as + ;; supporting set-timezone anyway so that reporting timezones are returned and used, and + ;; tests expect the converted values. + :set-timezone true + ;; temporarily disabled -- will be fixed by #40982 + :window-functions false}] (defmethod driver/database-supports? [:bigquery-cloud-sdk feature] [_driver _feature _db] supported?)) ;; BigQuery is always in UTC diff --git a/modules/drivers/mongo/test/metabase/driver/mongo/query_processor_test.clj b/modules/drivers/mongo/test/metabase/driver/mongo/query_processor_test.clj index 69e161c3685..7e89dbd8e2a 100644 --- a/modules/drivers/mongo/test/metabase/driver/mongo/query_processor_test.clj +++ b/modules/drivers/mongo/test/metabase/driver/mongo/query_processor_test.clj @@ -106,8 +106,14 @@ (qp.compile/compile (mt/mbql-query attempts {:aggregation [[:count]] - :filter [:time-interval $datetime :last :month]})))) + :filter [:time-interval $datetime :last :month]})))))))))) +(deftest ^:parallel no-initial-projection-test-2 + (mt/test-driver :mongo + (testing "Don't need to create initial projections anymore (#4216)" + (testing "Don't create an initial projection for datetime-fields that use `:default` bucketing (#14838)" + (mt/with-clock #t "2021-02-15T17:33:00-08:00[US/Pacific]" + (mt/dataset attempted-murders (testing "should still work even with bucketing bucketing" (let [tz (qp.timezone/results-timezone-id :mongo mt/db) query (mt/with-metadata-provider (mt/id) @@ -147,8 +153,7 @@ {"$project" {"_id" false "datetime" "$_id.datetime" "datetime_2" "$_id.datetime_2" - "count" true}} - {"$sort" {"datetime" 1}}] + "count" true}}] :collection "attempts" :mbql? true} query)) diff --git a/modules/drivers/presto-jdbc/src/metabase/driver/presto_jdbc.clj b/modules/drivers/presto-jdbc/src/metabase/driver/presto_jdbc.clj index 4582c1d71d6..428b4b1fc57 100644 --- a/modules/drivers/presto-jdbc/src/metabase/driver/presto_jdbc.clj +++ b/modules/drivers/presto-jdbc/src/metabase/driver/presto_jdbc.clj @@ -43,15 +43,16 @@ (driver/register! :presto-jdbc, :parent #{:sql-jdbc ::sql-jdbc.legacy/use-legacy-classes-for-read-and-set}) -(doseq [[feature supported?] {:set-timezone true - :basic-aggregations true - :standard-deviation-aggregations true - :expressions true - :native-parameters true - :expression-aggregations true - :binning true - :foreign-keys true - :now true}] +(doseq [[feature supported?] {:basic-aggregations true + :binning true + :expression-aggregations true + :expressions true + :foreign-keys true + :native-parameters true + :now true + :set-timezone true + :sql/window-functions.order-by-output-column-numbers false + :standard-deviation-aggregations true}] (defmethod driver/database-supports? [:presto-jdbc feature] [_driver _feature _db] supported?)) ;;; Presto API helpers diff --git a/modules/drivers/redshift/src/metabase/driver/redshift.clj b/modules/drivers/redshift/src/metabase/driver/redshift.clj index 370140b4b2f..04f25b90dfc 100644 --- a/modules/drivers/redshift/src/metabase/driver/redshift.clj +++ b/modules/drivers/redshift/src/metabase/driver/redshift.clj @@ -33,11 +33,12 @@ (driver/register! :redshift, :parent #{:postgres ::sql-jdbc.legacy/use-legacy-classes-for-read-and-set}) -(doseq [[feature supported?] {:test/jvm-timezone-setting false - :nested-field-columns false - :describe-fields true - :describe-fks true - :connection-impersonation true}] +(doseq [[feature supported?] {:connection-impersonation true + :describe-fields true + :describe-fks true + :nested-field-columns false + :sql/window-functions.order-by-output-column-numbers false + :test/jvm-timezone-setting false}] (defmethod driver/database-supports? [:redshift feature] [_driver _feat _db] supported?)) ;;; +----------------------------------------------------------------------------------------------------------------+ diff --git a/modules/drivers/snowflake/src/metabase/driver/snowflake.clj b/modules/drivers/snowflake/src/metabase/driver/snowflake.clj index 2e906d68873..48b7a103867 100644 --- a/modules/drivers/snowflake/src/metabase/driver/snowflake.clj +++ b/modules/drivers/snowflake/src/metabase/driver/snowflake.clj @@ -49,11 +49,12 @@ (driver/register! :snowflake, :parent #{:sql-jdbc ::sql-jdbc.legacy/use-legacy-classes-for-read-and-set}) -(doseq [[feature supported?] {:datetime-diff true - :now true - :convert-timezone true - :connection-impersonation true - :connection-impersonation-requires-role true}] +(doseq [[feature supported?] {:connection-impersonation true + :connection-impersonation-requires-role true + :convert-timezone true + :datetime-diff true + :now true + :sql/window-functions.order-by-output-column-numbers false}] (defmethod driver/database-supports? [:snowflake feature] [_driver _feature _db] supported?)) (defmethod driver/humanize-connection-error-message :snowflake diff --git a/modules/drivers/sparksql/src/metabase/driver/sparksql.clj b/modules/drivers/sparksql/src/metabase/driver/sparksql.clj index ebe15841e9c..d28170db64e 100644 --- a/modules/drivers/sparksql/src/metabase/driver/sparksql.clj +++ b/modules/drivers/sparksql/src/metabase/driver/sparksql.clj @@ -209,8 +209,10 @@ :native-parameters true :nested-queries true :standard-deviation-aggregations true - :test/jvm-timezone-setting false}] - (defmethod driver/database-supports? [:sparkql feature] [_driver _feature _db] supported?)) + :test/jvm-timezone-setting false + ;; disabled for now, see issue #40991 to fix this. + :window-functions false}] + (defmethod driver/database-supports? [:sparksql feature] [_driver _feature _db] supported?)) ;; only define an implementation for `:foreign-keys` if none exists already. In test extensions we define an alternate ;; implementation, and we don't want to stomp over that if it was loaded already diff --git a/modules/drivers/sqlserver/src/metabase/driver/sqlserver.clj b/modules/drivers/sqlserver/src/metabase/driver/sqlserver.clj index ffd39807700..956d048dfc4 100644 --- a/modules/drivers/sqlserver/src/metabase/driver/sqlserver.clj +++ b/modules/drivers/sqlserver/src/metabase/driver/sqlserver.clj @@ -32,13 +32,14 @@ (driver/register! :sqlserver, :parent :sql-jdbc) -(doseq [[feature supported?] {:regex false - :case-sensitivity-string-filter-options false - :now true - :datetime-diff true - :convert-timezone true - :test/jvm-timezone-setting false - :index-info true}] +(doseq [[feature supported?] {:case-sensitivity-string-filter-options false + :convert-timezone true + :datetime-diff true + :index-info true + :now true + :regex false + :sql/window-functions.order-by-output-column-numbers false + :test/jvm-timezone-setting false}] (defmethod driver/database-supports? [:sqlserver feature] [_driver _feature _db] supported?)) (defmethod driver/database-supports? [:sqlserver :percentile-aggregations] @@ -448,7 +449,9 @@ ;; For the GROUP BY, we replace the unoptimized fields with the optimized ones, e.g. ;; ;; GROUP BY year(field), month(field) - (apply sql.helpers/group-by new-hsql (mapv (partial sql.qp/->honeysql driver) optimized))))) + (apply sql.helpers/group-by new-hsql (mapv (partial sql.qp/->honeysql driver) optimized)) + ;; remove duplicate group by clauses (from the optimize breakout clauses stuff) + (update new-hsql :group-by distinct)))) (defn- optimize-order-by-subclauses "Optimize `:order-by` `subclauses` using [[optimized-temporal-buckets]], if possible." @@ -468,7 +471,9 @@ ;; year(), month(), and day() can make use of indexes while DateFromParts() cannot. (let [query (update query :order-by optimize-order-by-subclauses) parent-method (get-method sql.qp/apply-top-level-clause [:sql-jdbc :order-by])] - (parent-method driver :order-by honeysql-form query))) + (-> (parent-method driver :order-by honeysql-form query) + ;; order bys have to be distinct in SQL Server!!!!!!!1 + (update :order-by distinct)))) ;; SQLServer doesn't support `TRUE`/`FALSE`; it uses `1`/`0`, respectively; convert these booleans to numbers. (defmethod sql.qp/->honeysql [:sqlserver Boolean] diff --git a/modules/drivers/vertica/src/metabase/driver/vertica.clj b/modules/drivers/vertica/src/metabase/driver/vertica.clj index b2e5cc444aa..32ea6cd41bf 100644 --- a/modules/drivers/vertica/src/metabase/driver/vertica.clj +++ b/modules/drivers/vertica/src/metabase/driver/vertica.clj @@ -27,11 +27,12 @@ ::sql-jdbc.legacy/use-legacy-classes-for-read-and-set ::sql.qp.empty-string-is-null/empty-string-is-null}) -(doseq [[feature supported?] {:percentile-aggregations false - :datetime-diff true - :now true - :convert-timezone true - :test/jvm-timezone-setting false}] +(doseq [[feature supported?] {:convert-timezone true + :datetime-diff true + :now true + :percentile-aggregations false + :sql/window-functions.order-by-output-column-numbers false + :test/jvm-timezone-setting false}] (defmethod driver/database-supports? [:vertica feature] [_driver _feature _db] supported?)) (defmethod driver/db-start-of-week :vertica diff --git a/src/metabase/db.clj b/src/metabase/db.clj index d436d0e5ca0..ee5f3644769 100644 --- a/src/metabase/db.clj +++ b/src/metabase/db.clj @@ -80,7 +80,6 @@ (fn [& args] (apply f* (unique-identifier) args)))) - (defn increment-app-db-unique-indentifier! "Increment the [[unique-identifier]] for the Metabase application DB. This effectively flushes all caches using it as a key (including things using [[mdb/memoize-for-application-db]]) such as the Settings cache. Should only be used diff --git a/src/metabase/driver.clj b/src/metabase/driver.clj index fcd3544db2c..aac5b659b2b 100644 --- a/src/metabase/driver.clj +++ b/src/metabase/driver.clj @@ -589,7 +589,10 @@ :describe-fields ;; Does the driver support fingerprint the fields. Default is true - :fingerprint}) + :fingerprint + + ;; Does this driver support window functions like cumulative count and cumulative sum? (default: false) + :window-functions}) (defmulti database-supports? "Does this driver and specific instance of a database support a certain `feature`? @@ -609,8 +612,11 @@ (database-supports? :mongo :set-timezone mongo-db) ; -> true" {:arglists '([driver feature database]), :added "0.41.0"} (fn [driver feature _database] - (when-not (features feature) - (throw (Exception. (tru "Invalid driver feature: {0}" feature)))) + ;; only make sure unqualified keywords are explicitly defined in [[features]]. + (when (simple-keyword? feature) + (when-not (features feature) + (throw (ex-info (tru "Invalid driver feature: {0}" feature) + {:feature feature})))) [(dispatch-on-initialized-driver driver) feature]) :hierarchy #'hierarchy) diff --git a/src/metabase/driver/h2.clj b/src/metabase/driver/h2.clj index ce2bd4d4c0e..dbc9476d0cf 100644 --- a/src/metabase/driver/h2.clj +++ b/src/metabase/driver/h2.clj @@ -66,16 +66,16 @@ ;;; | metabase.driver impls | ;;; +----------------------------------------------------------------------------------------------------------------+ -(doseq [[feature supported?] {:full-join false - :regex true - :percentile-aggregations false - :actions true +(doseq [[feature supported?] {:actions true :actions/custom true :datetime-diff true + :full-join false + :index-info true :now true + :percentile-aggregations false + :regex true :test/jvm-timezone-setting false - :uploads true - :index-info true}] + :uploads true}] (defmethod driver/database-supports? [:h2 feature] [_driver _feature _database] supported?)) diff --git a/src/metabase/driver/mysql.clj b/src/metabase/driver/mysql.clj index 65c9654ebae..949c444e9ff 100644 --- a/src/metabase/driver/mysql.clj +++ b/src/metabase/driver/mysql.clj @@ -53,20 +53,21 @@ (defmethod driver/display-name :mysql [_] "MySQL") -(doseq [[feature supported?] {:persist-models true - :convert-timezone true - :datetime-diff true - :now true - :regex false - :percentile-aggregations false - :full-join false - :uploads true - :schemas false - ;; MySQL LIKE clauses are case-sensitive or not based on whether the collation of the server and the columns - ;; themselves. Since this isn't something we can really change in the query itself don't present the option to the - ;; users in the UI - :case-sensitivity-string-filter-options false - :index-info true}] +(doseq [[feature supported?] {;; MySQL LIKE clauses are case-sensitive or not based on whether the collation of the + ;; server and the columns themselves. Since this isn't something we can really change in + ;; the query itself don't present the option to the users in the UI + :case-sensitivity-string-filter-options false + :convert-timezone true + :datetime-diff true + :full-join false + :index-info true + :now true + :percentile-aggregations false + :persist-models true + :regex false + :schemas false + :sql/window-functions.order-by-output-column-numbers false + :uploads true}] (defmethod driver/database-supports? [:mysql feature] [_driver _feature _db] supported?)) ;; This is a bit of a lie since the JSON type was introduced for MySQL since 5.7.8. diff --git a/src/metabase/driver/postgres.clj b/src/metabase/driver/postgres.clj index 7cf962ce42c..ccee5ad8e41 100644 --- a/src/metabase/driver/postgres.clj +++ b/src/metabase/driver/postgres.clj @@ -61,13 +61,14 @@ (defmethod driver/display-name :postgres [_] "PostgreSQL") ;; Features that are supported by Postgres and all of its child drivers like Redshift -(doseq [[feature supported?] {:convert-timezone true - :datetime-diff true - :now true - :persist-models true - :table-privileges true - :schemas true - :connection-impersonation true}] +(doseq [[feature supported?] {:connection-impersonation true + :convert-timezone true + :datetime-diff true + :now true + :persist-models true + :schemas true + :sql/window-functions.order-by-output-column-numbers false + :table-privileges true}] (defmethod driver/database-supports? [:postgres feature] [_driver _feature _db] supported?)) (defmethod driver/database-supports? [:postgres :nested-field-columns] diff --git a/src/metabase/driver/sql.clj b/src/metabase/driver/sql.clj index 28808e680e8..b297cfb3ea3 100644 --- a/src/metabase/driver/sql.clj +++ b/src/metabase/driver/sql.clj @@ -18,16 +18,17 @@ (driver/register! :sql, :abstract? true) -(doseq [feature [:standard-deviation-aggregations - :foreign-keys - :expressions +(doseq [feature [:advanced-math-expressions + :binning :expression-aggregations + :expressions + :foreign-keys :native-parameters :nested-queries - :binning - :advanced-math-expressions :percentile-aggregations - :regex]] + :regex + :standard-deviation-aggregations + :window-functions]] (defmethod driver/database-supports? [:sql feature] [_driver _feature _db] true)) (doseq [join-feature [:left-join diff --git a/src/metabase/driver/sql/query_processor.clj b/src/metabase/driver/sql/query_processor.clj index 4e7caf061ed..c16a607c7be 100644 --- a/src/metabase/driver/sql/query_processor.clj +++ b/src/metabase/driver/sql/query_processor.clj @@ -420,7 +420,7 @@ of `honeysql-form`. Most drivers can use the default implementations for all of these methods, but some may need to override one or more (e.g. SQL Server needs to override this method for the `:limit` clause, since T-SQL uses `TOP` instead of `LIMIT`)." - {:added "0.32.0", :arglists '([driver top-level-clause honeysql-form query]), :style/indent 2} + {:added "0.32.0", :arglists '([driver top-level-clause honeysql-form query]), :style/indent [:form]} (fn [driver top-level-clause _ _] [(driver/dispatch-on-initialized-driver driver) top-level-clause]) :hierarchy #'driver/hierarchy) @@ -687,6 +687,101 @@ (->honeysql driver mbql-expr) (->honeysql driver power)]) +;;; NOCOMMIT -- we need to document the new `:window-functions` feature. +(defmethod driver/database-supports? [:sql :sql/window-functions.order-by-output-column-numbers] + [_driver _feature _database] + true) + +(defn- format-rows-unbounded-preceding [_clause _args] + ["ROWS UNBOUNDED PRECEDING"]) + +(sql/register-clause! + ::rows-unbounded-preceding + #'format-rows-unbounded-preceding + nil) + +(defn- over-rows-with-order-by-copying-expressions + "e.g. + + OVER (ORDER BY date_trunc('month', my_column))" + [driver expr] + (let [{:keys [group-by]} (apply-top-level-clause + driver + :breakout + {} + *inner-query*)] + [:over + [expr + {:order-by (mapv (fn [expr] + [expr :asc]) + group-by) + ::rows-unbounded-preceding []}]])) + +;;; For databases that do something intelligent with ORDER BY 1, 2: we can take advantage of that functionality +;;; implement the window function versions of cumulative sum and cumulative sum more simply. + +(defn- over-rows-with-order-by-output-column-numbers + "e.g. + + OVER (ORDER BY 1 ROWS UNBOUNDED PRECEDING)" + [_driver expr] + [:over + [expr + {:order-by (mapv (fn [i] + [[:inline (inc i)] :asc]) + (range (count (:breakout *inner-query*)))) + ::rows-unbounded-preceding []}]]) + +(defn- cumulative-aggregation-window-function [driver expr] + (let [over-rows-with-order-by (if (driver/database-supports? driver + :sql/window-functions.order-by-output-column-numbers + (lib.metadata/database (qp.store/metadata-provider))) + over-rows-with-order-by-output-column-numbers + over-rows-with-order-by-copying-expressions)] + (over-rows-with-order-by driver expr))) + +;;; cum-count() +;;; +;;; should compile to SQL like +;;; +;;; sum(count()) OVER (ORDER BY ...) +;;; +;;; where the ORDER BY matches what's in the query (i.e., the breakouts), or +;;; +;;; sum(count()) OVER (ORDER BY 1 ROWS UNBOUNDED PRECEDING) +;;; +;;; if the database supports ordering by SELECT expression position +(defmethod ->honeysql [:sql :cum-count] + [driver [_cum-count expr-or-nil]] + ;; a cumulative count with no breakouts doesn't really mean anything, just compile it as a normal count. + (if (empty? (:breakout *inner-query*)) + (->honeysql driver [:count expr-or-nil]) + (cumulative-aggregation-window-function + driver + [:sum (if expr-or-nil + [:count (->honeysql driver expr-or-nil)] + [:count :*])]))) + +;;; cum-sum(total) +;;; +;;; should compile to SQL like +;;; +;;; sum(sum(total)) OVER (ORDER BY ...) +;;; +;;; where the ORDER BY matches what's in the query (i.e., the breakouts), or +;;; +;;; sum(sum(total)) OVER (ORDER BY 1 ROWS UNBOUNDED PRECEDING) +;;; +;;; if the database supports ordering by SELECT expression position +(defmethod ->honeysql [:sql :cum-sum] + [driver [_cum-sum expr]] + ;; a cumulative sum with no breakouts doesn't really mean anything, just compile it as a normal sum. + (if (empty? (:breakout *inner-query*)) + (->honeysql driver [:sum expr]) + (cumulative-aggregation-window-function + driver + [:sum [:sum (->honeysql driver expr)]]))) + (defn- interval? [expr] (mbql.u/is-clause? :interval expr)) diff --git a/src/metabase/driver/sql_jdbc/execute.clj b/src/metabase/driver/sql_jdbc/execute.clj index 96f2b4d606f..5fe6bacc9df 100644 --- a/src/metabase/driver/sql_jdbc/execute.clj +++ b/src/metabase/driver/sql_jdbc/execute.clj @@ -135,6 +135,7 @@ driver/dispatch-on-initialized-driver :hierarchy #'driver/hierarchy) +;;; TODO -- we should just make this a FEATURE!!!!!1 (defmulti ^Statement statement-supported? "Indicates whether the given driver supports creating a java.sql.Statement, via the Connection. By default, this is true for all :sql-jdbc drivers. If the underlying driver does not support Statement creation, override this as diff --git a/src/metabase/legacy_mbql/util.cljc b/src/metabase/legacy_mbql/util.cljc index 20418f7c3ed..588a4b750c3 100644 --- a/src/metabase/legacy_mbql/util.cljc +++ b/src/metabase/legacy_mbql/util.cljc @@ -399,11 +399,13 @@ (mu/defn add-order-by-clause :- mbql.s/MBQLQuery "Add a new `:order-by` clause to an MBQL `inner-query`. If the new order-by clause references a Field that is already being used in another order-by clause, this function does nothing." - [inner-query :- mbql.s/MBQLQuery - [_ [_ id-or-name :as _field], :as order-by-clause] :- mbql.s/OrderBy] - (let [existing-fields (set (for [[_ [_ id-or-name]] (:order-by inner-query)] - id-or-name))] - (if (existing-fields id-or-name) + [inner-query :- mbql.s/MBQLQuery + [_dir orderable, :as order-by-clause] :- mbql.s/OrderBy] + (let [existing-orderables (into #{} + (map (fn [[_dir orderable]] + orderable)) + (:order-by inner-query))] + (if (existing-orderables orderable) ;; Field already referenced, nothing to do inner-query ;; otherwise add new clause at the end @@ -577,6 +579,7 @@ (assert (not= candidate original) (str "unique-alias-fn must return a different string than its input. Input: " (pr-str candidate))) + (swap! id+original->unique assoc [id name-key] candidate) (recur id candidate)))))))))) (mu/defn uniquify-names :- [:and @@ -632,7 +635,6 @@ Most often, `aggregation->name-fn` will be something like `annotate/aggregation-name`, but for purposes of keeping the `metabase.legacy-mbql` module seperate from the `metabase.query-processor` code we'll let you pass that in yourself." - {:style/indent 1} [aggregation->name-fn :- fn? aggregations :- [:sequential mbql.s/Aggregation]] (lib.util.match/replace aggregations @@ -648,7 +650,6 @@ (mu/defn pre-alias-and-uniquify-aggregations :- UniquelyNamedAggregations "Wrap every aggregation clause in a `:named` clause with a unique name. Combines `pre-alias-aggregations` with `uniquify-named-aggregations`." - {:style/indent 1} [aggregation->name-fn :- fn? aggregations :- [:sequential mbql.s/Aggregation]] (-> (pre-alias-aggregations aggregation->name-fn aggregations) diff --git a/src/metabase/lib/aggregation.cljc b/src/metabase/lib/aggregation.cljc index 60cadff4f03..9573f4eeccc 100644 --- a/src/metabase/lib/aggregation.cljc +++ b/src/metabase/lib/aggregation.cljc @@ -95,8 +95,8 @@ (defmethod lib.metadata.calculation/column-name-method ::count-aggregation [_query _stage-number [tag :as _clause]] (case tag - :count "count" - :cum-count "cum_count" + :count "count" + :cum-count "count" :count-where "count_where")) (defmethod lib.metadata.calculation/metadata-method ::quantity-aggregation diff --git a/src/metabase/query_processor/middleware/cumulative_aggregations.clj b/src/metabase/query_processor/middleware/cumulative_aggregations.clj index be04bc8b57f..ce977d22325 100644 --- a/src/metabase/query_processor/middleware/cumulative_aggregations.clj +++ b/src/metabase/query_processor/middleware/cumulative_aggregations.clj @@ -1,8 +1,11 @@ (ns metabase.query-processor.middleware.cumulative-aggregations "Middlware for handling cumulative count and cumulative sum aggregations." (:require + [metabase.driver :as driver] [metabase.legacy-mbql.schema :as mbql.s] + [metabase.lib.metadata :as lib.metadata] [metabase.lib.util.match :as lib.util.match] + [metabase.query-processor.store :as qp.store] [metabase.util.malli :as mu])) ;;;; Pre-processing @@ -29,8 +32,16 @@ "Pre-processing middleware. Rewrite `:cum-count` and `:cum-sum` aggregations as `:count` and `:sum` respectively. Add information about the indecies of the replaced aggregations under the `::replaced-indices` key." [{{breakouts :breakout, aggregations :aggregation} :query, :as query}] - (if-not (lib.util.match/match aggregations #{:cum-count :cum-sum}) + (cond + ;; no need to rewrite `:cum-sum` and `:cum-count` functions, this driver supports native window function versions + (driver/database-supports? driver/*driver* :window-functions (lib.metadata/database (qp.store/metadata-provider))) + query + + ;; nothing to rewrite + (not (lib.util.match/match aggregations #{:cum-count :cum-sum})) query + + :else (let [query' (replace-cumulative-ags query) ;; figure out which indexes are being changed in the results. Since breakouts always get included in ;; results first we need to offset the indexes to change by the number of breakouts diff --git a/test/metabase/driver/sql/query_processor_test.clj b/test/metabase/driver/sql/query_processor_test.clj index 085c70a7ba3..9fbe28d4000 100644 --- a/test/metabase/driver/sql/query_processor_test.clj +++ b/test/metabase/driver/sql/query_processor_test.clj @@ -275,7 +275,8 @@ query)}))) (deftest ^:parallel correct-for-null-behaviour - (testing "NULLs should be treated intuitively in filters (SQL has somewhat unintuitive semantics where NULLs get propagated out of expressions)." + (testing (str "NULLs should be treated intuitively in filters (SQL has somewhat unintuitive semantics where NULLs " + "get propagated out of expressions).") (is (= [[nil] ["bar"]] (query-on-dataset-with-nils {:filter [:not [:starts-with [:field "A" {:base-type :type/Text}] "f"]]}))) (is (= [[nil] ["bar"]] diff --git a/test/metabase/legacy_mbql/util_test.cljc b/test/metabase/legacy_mbql/util_test.cljc index 1b2925eefa4..a0bb6bc8178 100644 --- a/test/metabase/legacy_mbql/util_test.cljc +++ b/test/metabase/legacy_mbql/util_test.cljc @@ -121,28 +121,32 @@ [:asc [:field 20 nil]]]} (mbql.u/add-order-by-clause {:source-table 1 :order-by [[:asc [:field 10 nil]]]} - [:asc [:field 20 nil]])))) + [:asc [:field 20 nil]]))))) +(t/deftest ^:parallel add-order-by-clause-test-2 (t/testing "duplicate clauses should get ignored" (t/is (= {:source-table 1 :order-by [[:asc [:field 10 nil]]]} (mbql.u/add-order-by-clause {:source-table 1 :order-by [[:asc [:field 10 nil]]]} - [:asc [:field 10 nil]])))) + [:asc [:field 10 nil]]))))) +(t/deftest ^:parallel add-order-by-clause-test-3 (t/testing "as should clauses that reference the same Field" (t/is (= {:source-table 1 :order-by [[:asc [:field 10 nil]]]} (mbql.u/add-order-by-clause {:source-table 1 :order-by [[:asc [:field 10 nil]]]} - [:desc [:field 10 nil]]))) + [:desc [:field 10 nil]]))))) - (t/testing "fields with different amounts of wrapping (plain field vs datetime-field)" - (t/is (= {:source-table 1 - :order-by [[:asc [:field 10 nil]]]} - (mbql.u/add-order-by-clause {:source-table 1 - :order-by [[:asc [:field 10 nil]]]} - [:asc [:field 10 {:temporal-unit :day}]])))))) +(t/deftest ^:parallel add-order-by-clause-test-4 + (t/testing "fields with different temporal-units should still get added (#40995)" + (t/is (= {:source-table 1 + :order-by [[:asc [:field 10 nil]] + [:asc [:field 10 {:temporal-unit :day}]]]} + (mbql.u/add-order-by-clause {:source-table 1 + :order-by [[:asc [:field 10 nil]]]} + [:asc [:field 10 {:temporal-unit :day}]]))))) (t/deftest ^:parallel combine-filter-clauses-test (t/is (= [:and [:= [:field 1 nil] 100] [:= [:field 2 nil] 200]] @@ -652,29 +656,37 @@ (t/deftest ^:parallel unique-name-generator-test (t/testing "Can we get a simple unique name generator" (t/is (= ["count" "sum" "count_2" "count_2_2"] - (map (mbql.u/unique-name-generator) ["count" "sum" "count" "count_2"])))) + (map (mbql.u/unique-name-generator) ["count" "sum" "count" "count_2"]))))) + +(t/deftest ^:parallel unique-name-generator-test-2 (t/testing "Can we get an idempotent unique name generator" (t/is (= ["count" "sum" "count" "count_2"] - (map (mbql.u/unique-name-generator) [:x :y :x :z] ["count" "sum" "count" "count_2"])))) + (map (mbql.u/unique-name-generator) [:x :y :x :z] ["count" "sum" "count" "count_2"]))))) + +(t/deftest ^:parallel unique-name-generator-test-3 (t/testing "Can the same object have multiple aliases" (t/is (= ["count" "sum" "count" "count_2"] - (map (mbql.u/unique-name-generator) [:x :y :x :x] ["count" "sum" "count" "count_2"])))) + (map (mbql.u/unique-name-generator) [:x :y :x :x] ["count" "sum" "count" "count_2"]))))) - (t/testing "idempotence (2-arity calls to generated function)" +(t/deftest ^:parallel unique-name-generator-idempotence-test + (t/testing "idempotence (2-arity calls to generated function) (#40994)" (let [unique-name (mbql.u/unique-name-generator)] - (t/is (= ["A" "B" "A" "A_2"] + (t/is (= ["A" "B" "A" "A_2" "A_2"] [(unique-name :x "A") (unique-name :x "B") (unique-name :x "A") - (unique-name :y "A")])))) + (unique-name :y "A") + (unique-name :y "A")]))))) - #_{:clj-kondo/ignore [:discouraged-var]} +(t/deftest ^:parallel unique-name-generator-options-test (t/testing "options" (t/testing :name-key-fn - (let [f (mbql.u/unique-name-generator :name-key-fn str/lower-case)] + (let [f (mbql.u/unique-name-generator :name-key-fn #_{:clj-kondo/ignore [:discouraged-var]} str/lower-case)] (t/is (= ["x" "X_2" "X_3"] - (map f ["x" "X" "X"]))))) + (map f ["x" "X" "X"]))))))) +(t/deftest ^:parallel unique-name-generator-options-test-2 + (t/testing "options" (t/testing :unique-alias-fn (let [f (mbql.u/unique-name-generator :unique-alias-fn (fn [x y] (str y "~~" x)))] (t/is (= ["x" "2~~x"] diff --git a/test/metabase/query_processor/middleware/add_implicit_clauses_test.clj b/test/metabase/query_processor/middleware/add_implicit_clauses_test.clj index 0ec1cc5e5a8..1eabfe5535c 100644 --- a/test/metabase/query_processor/middleware/add_implicit_clauses_test.clj +++ b/test/metabase/query_processor/middleware/add_implicit_clauses_test.clj @@ -7,10 +7,8 @@ [metabase.lib.test-util :as lib.tu] [metabase.lib.test-util.macros :as lib.tu.macros] [metabase.lib.types.isa :as lib.types.isa] - [metabase.query-processor.middleware.add-implicit-clauses - :as qp.add-implicit-clauses] - [metabase.query-processor.middleware.add-source-metadata - :as add-source-metadata] + [metabase.query-processor.middleware.add-implicit-clauses :as qp.add-implicit-clauses] + [metabase.query-processor.middleware.add-source-metadata :as add-source-metadata] [metabase.query-processor.preprocess :as qp.preprocess] [metabase.query-processor.store :as qp.store] [metabase.query-processor.test-util :as qp.test-util] @@ -44,7 +42,10 @@ :order-by [[:asc [:field 1 nil]]]} (#'qp.add-implicit-clauses/add-implicit-breakout-order-by {:source-table 1 - :breakout [[:field 1 nil]]}))) + :breakout [[:field 1 nil]]}))))) + +(deftest ^:parallel add-order-bys-for-breakouts-test-2 + (testing "we should add order-bys for breakout clauses" (testing "Add Field to existing order-by" (is (= {:source-table 1 :breakout [[:field 2 nil]] @@ -53,8 +54,10 @@ (#'qp.add-implicit-clauses/add-implicit-breakout-order-by {:source-table 1 :breakout [[:field 2 nil]] - :order-by [[:asc [:field 1 nil]]]})))) + :order-by [[:asc [:field 1 nil]]]})))))) +(deftest ^:parallel add-order-bys-for-breakouts-test-3 + (testing "we should add order-bys for breakout clauses" (testing "...but not if the Field is already in an order-by" (is (= {:source-table 1 :breakout [[:field 1 nil]] @@ -62,22 +65,30 @@ (#'qp.add-implicit-clauses/add-implicit-breakout-order-by {:source-table 1 :breakout [[:field 1 nil]] - :order-by [[:asc [:field 1 nil]]]}))) + :order-by [[:asc [:field 1 nil]]]})))))) + +(deftest ^:parallel add-order-bys-for-breakouts-test-4 + (testing "we should add order-bys for breakout clauses" + (testing "...but not if the Field is already in an order-by" (is (= {:source-table 1 :breakout [[:field 1 nil]] :order-by [[:desc [:field 1 nil]]]} (#'qp.add-implicit-clauses/add-implicit-breakout-order-by {:source-table 1 :breakout [[:field 1 nil]] - :order-by [[:desc [:field 1 nil]]]}))) + :order-by [[:desc [:field 1 nil]]]})))))) + +(deftest ^:parallel add-order-bys-for-breakouts-test-5 + (testing "we should add order-bys for breakout clauses" + (testing "...but not if the Field is already in an order-by" (testing "With a datetime-field" (is (= {:source-table 1 :breakout [[:field 1 {:temporal-unit :day}]] - :order-by [[:asc [:field 1 nil]]]} + :order-by [[:asc [:field 1 {:temporal-unit :day}]]]} (#'qp.add-implicit-clauses/add-implicit-breakout-order-by {:source-table 1 :breakout [[:field 1 {:temporal-unit :day}]] - :order-by [[:asc [:field 1 nil]]]}))))))) + :order-by [[:asc [:field 1 {:temporal-unit :day}]]]}))))))) (defn- add-implicit-fields [inner-query] (if (qp.store/initialized?) diff --git a/test/metabase/query_processor/middleware/cumulative_aggregations_test.clj b/test/metabase/query_processor/middleware/cumulative_aggregations_test.clj index af860dbdcd5..7f81f5e86aa 100644 --- a/test/metabase/query_processor/middleware/cumulative_aggregations_test.clj +++ b/test/metabase/query_processor/middleware/cumulative_aggregations_test.clj @@ -1,9 +1,12 @@ (ns metabase.query-processor.middleware.cumulative-aggregations-test (:require [clojure.test :refer :all] + [metabase.driver :as driver] + [metabase.lib.test-metadata :as meta] [metabase.query-processor.middleware.cumulative-aggregations :as - qp.cumulative-aggregations])) + qp.cumulative-aggregations] + [metabase.query-processor.store :as qp.store])) (deftest ^:parallel add-values-from-last-row-test (are [expected indecies] (= expected @@ -80,16 +83,23 @@ :type :query :query {:source-table 1, :aggregation [[:* [:cum-count] 1]]}}))))) +(driver/register! ::no-window-function-driver) + +(defmethod driver/database-supports? [::no-window-function-driver :window-functions] + [_driver _feature _database] + false) (defn- handle-cumulative-aggregations [query] - (let [query (#'qp.cumulative-aggregations/rewrite-cumulative-aggregations query) - rff (qp.cumulative-aggregations/sum-cumulative-aggregation-columns query (constantly conj)) - rf (rff nil)] - (transduce identity rf [[1 1] - [2 2] - [3 3] - [4 4] - [5 5]]))) + (driver/with-driver ::no-window-function-driver + (qp.store/with-metadata-provider meta/metadata-provider + (let [query (#'qp.cumulative-aggregations/rewrite-cumulative-aggregations query) + rff (qp.cumulative-aggregations/sum-cumulative-aggregation-columns query (constantly conj)) + rf (rff nil)] + (transduce identity rf [[1 1] + [2 2] + [3 3] + [4 4] + [5 5]]))))) (deftest ^:parallel e2e-test (testing "make sure we take breakout fields into account" @@ -99,7 +109,9 @@ :type :query :query {:source-table 1 :breakout [[:field 1 nil]] - :aggregation [[:cum-sum [:field 1 nil]]]}})))) + :aggregation [[:cum-sum [:field 1 nil]]]}}))))) + +(deftest ^:parallel e2e-test-2 (testing "make sure we sum up cumulative aggregations inside expressions correctly" (testing "we shouldn't be doing anything special with the expressions, let the database figure that out. We will just SUM" (is (= [[1 1] [2 3] [3 6] [4 10] [5 15]] diff --git a/test/metabase/query_processor/util/add_alias_info_test.clj b/test/metabase/query_processor/util/add_alias_info_test.clj index d68c4fe4f7d..c8e8d411a5d 100644 --- a/test/metabase/query_processor/util/add_alias_info_test.clj +++ b/test/metabase/query_processor/util/add_alias_info_test.clj @@ -10,8 +10,7 @@ [metabase.lib.test-util :as lib.tu] [metabase.lib.test-util.macros :as lib.tu.macros] [metabase.lib.test-util.metadata-providers.mock :as providers.mock] - [metabase.query-processor.middleware.fix-bad-references - :as fix-bad-refs] + [metabase.query-processor.middleware.fix-bad-references :as fix-bad-refs] [metabase.query-processor.preprocess :as qp.preprocess] [metabase.query-processor.store :as qp.store] [metabase.query-processor.util.add-alias-info :as add] @@ -838,3 +837,45 @@ (add-alias-info {:type :query :database (meta/id) :query {:source-table "card__3"}})))))) + +(deftest ^:parallel handle-multiple-orders-bys-on-same-field-correctly-test + (testing "#40993" + (let [query (lib.tu.macros/mbql-query orders + {:aggregation [[:count]] + :breakout [[:field %created-at {:temporal-unit :month}] + [:field %created-at {:temporal-unit :day}]]})] + (qp.store/with-metadata-provider meta/metadata-provider + (driver/with-driver :h2 + (is (=? {:query {:source-table (meta/id :orders) + :breakout [[:field + (meta/id :orders :created-at) + {:temporal-unit :month + ::add/source-alias "CREATED_AT" + ::add/desired-alias "CREATED_AT" + ::add/position 0}] + [:field + (meta/id :orders :created-at) + {:temporal-unit :day + ::add/source-alias "CREATED_AT" + ::add/desired-alias "CREATED_AT_2" + ::add/position 1}]] + :aggregation [[:aggregation-options + [:count] + {::add/source-alias "count" + ::add/position 2 + ::add/desired-alias "count"}]] + :order-by [[:asc + [:field + (meta/id :orders :created-at) + {:temporal-unit :month + ::add/source-alias "CREATED_AT" + ::add/desired-alias "CREATED_AT" + ::add/position 0}]] + [:asc + [:field + (meta/id :orders :created-at) + {:temporal-unit :day + ::add/source-alias "CREATED_AT" + ::add/desired-alias "CREATED_AT_2" + ::add/position 1}]]]}} + (add/add-alias-info (qp.preprocess/preprocess query))))))))) diff --git a/test/metabase/query_processor_test/aggregation_test.clj b/test/metabase/query_processor_test/aggregation_test.clj index 662366a19e0..7ecd8252adf 100644 --- a/test/metabase/query_processor_test/aggregation_test.clj +++ b/test/metabase/query_processor_test/aggregation_test.clj @@ -116,11 +116,6 @@ {:aggregation [[:max $latitude]] :breakout [$price]})))))) - -;;; +----------------------------------------------------------------------------------------------------------------+ -;;; | MULTIPLE AGGREGATIONS | -;;; +----------------------------------------------------------------------------------------------------------------+ - (deftest ^:parallel multiple-aggregations-test (mt/test-drivers (mt/normal-drivers) (testing "two aggregations" @@ -146,132 +141,6 @@ (mt/run-mbql-query venues {:aggregation [[:count] [:count]]}))))))) - -;;; ------------------------------------------------- CUMULATIVE SUM ------------------------------------------------- - -(deftest ^:parallel cumulative-sum-test - (mt/test-drivers (mt/normal-drivers) - (testing "cum_sum w/o breakout should be treated the same as sum" - (let [result (mt/run-mbql-query users - {:aggregation [[:cum-sum $id]]})] - (is (=? [{:display_name "Cumulative sum of ID", - :source :aggregation}] - (-> result :data :cols))) - (is (= [[120]] - (mt/formatted-rows [int] - result))))))) - -(deftest ^:parallel cumulative-sum-test-2 - (mt/test-drivers (mt/normal-drivers) - (testing " Simple cumulative sum where breakout field is same as cum_sum field" - (is (= [[ 1 1] - [ 2 3] - [ 3 6] - [ 4 10] - [ 5 15] - [ 6 21] - [ 7 28] - [ 8 36] - [ 9 45] - [10 55] - [11 66] - [12 78] - [13 91] - [14 105] - [15 120]] - (mt/formatted-rows [int int] - (mt/run-mbql-query users - {:aggregation [[:cum-sum $id]] - :breakout [$id]}))))))) - -(deftest ^:parallel cumulative-sum-test-3 - (mt/test-drivers (mt/normal-drivers) - (testing " Cumulative sum w/ a different breakout field" - (is (= [["Broen Olujimi" 14] - ["Conchúr Tihomir" 21] - ["Dwight Gresham" 34] - ["Felipinho Asklepios" 36] - ["Frans Hevel" 46] - ["Kaneonuskatew Eiran" 49] - ["Kfir Caj" 61] - ["Nils Gotam" 70] - ["Plato Yeshua" 71] - ["Quentin Sören" 76] - ["Rüstem Hebel" 91] - ["Shad Ferdynand" 97] - ["Simcha Yan" 101] - ["Spiros Teofil" 112] - ["Szymon Theutrich" 120]] - (mt/formatted-rows [str int] - (mt/run-mbql-query users - {:aggregation [[:cum-sum $id]] - :breakout [$name]}))))))) - -(deftest ^:parallel cumulative-sum-test-4 - (mt/test-drivers (mt/normal-drivers) - (testing " Cumulative sum w/ a different breakout field that requires grouping" - (is (= [[1 1211] - [2 4066] - [3 4681] - [4 5050]] - (mt/formatted-rows [int int] - (mt/run-mbql-query venues - {:aggregation [[:cum-sum $id]] - :breakout [$price]}))))))) - -(deftest ^:parallel cumulative-count-test - (mt/test-drivers (mt/normal-drivers) - (testing "cumulative count aggregations" - (testing "w/o breakout should be treated the same as count" - (is (= {:rows [[15]] - :cols [(qp.test-util/aggregate-col :cum-count :users :id)]} - (qp.test-util/rows-and-cols - (mt/format-rows-by [int] - (mt/run-mbql-query users - {:aggregation [[:cum-count $id]]}))))))))) - -(deftest ^:parallel cumulative-count-with-breakout-test - (mt/test-drivers (mt/normal-drivers) - (testing "w/ breakout on field with distinct values" - (is (=? {:rows [["Broen Olujimi" 1] - ["Conchúr Tihomir" 2] - ["Dwight Gresham" 3] - ["Felipinho Asklepios" 4] - ["Frans Hevel" 5] - ["Kaneonuskatew Eiran" 6] - ["Kfir Caj" 7] - ["Nils Gotam" 8] - ["Plato Yeshua" 9] - ["Quentin Sören" 10] - ["Rüstem Hebel" 11] - ["Shad Ferdynand" 12] - ["Simcha Yan" 13] - ["Spiros Teofil" 14] - ["Szymon Theutrich" 15]] - :cols [(qp.test-util/breakout-col :users :name) - (qp.test-util/aggregate-col :cum-count :users :id)]} - (qp.test-util/rows-and-cols - (mt/format-rows-by [str int] - (mt/run-mbql-query users - {:aggregation [[:cum-count $id]] - :breakout [$name]})))))))) - - -(deftest ^:parallel cumulative-count-with-breakout-test-2 - (mt/test-drivers (mt/normal-drivers) - (testing "w/ breakout on field that requires grouping" - (is (=? {:cols [(qp.test-util/breakout-col :venues :price) - (qp.test-util/aggregate-col :cum-count :venues :id)] - :rows [[1 22] - [2 81] - [3 94] - [4 100]]} - (qp.test-util/rows-and-cols - (mt/format-rows-by [int int] - (mt/run-mbql-query venues - {:aggregation [[:cum-count $id]] - :breakout [$price]})))))))) - (deftest field-settings-for-aggregate-fields-test (testing "Does `:settings` show up for aggregate Fields?" (tu/with-temp-vals-in-db Field (data/id :venues :price) {:settings {:is_priceless false}} diff --git a/test/metabase/query_processor_test/cumulative_aggregation_test.clj b/test/metabase/query_processor_test/cumulative_aggregation_test.clj new file mode 100644 index 00000000000..0d283ecfa28 --- /dev/null +++ b/test/metabase/query_processor_test/cumulative_aggregation_test.clj @@ -0,0 +1,248 @@ +(ns metabase.query-processor-test.cumulative-aggregation-test + (:require + [clojure.test :refer :all] + [java-time.api :as t] + [metabase.lib.core :as lib] + [metabase.lib.metadata :as lib.metadata] + [metabase.lib.metadata.jvm :as lib.metadata.jvm] + [metabase.query-processor :as qp] + [metabase.test :as mt])) + +(defn- ->local-date [t] + (t/local-date + (cond-> t + (instance? java.time.Instant t) + (t/zoned-date-time (t/zone-id "UTC"))))) + +(deftest ^:parallel cumulative-sum-test + (mt/test-drivers (mt/normal-drivers) + (testing "cum_sum w/o breakout should be treated the same as sum" + (let [result (mt/run-mbql-query users + {:aggregation [[:cum-sum $id]]})] + (is (=? [{:display_name "Cumulative sum of ID", + :source :aggregation}] + (-> result :data :cols))) + (is (= [[120]] + (mt/formatted-rows [int] + result))))))) + +(deftest ^:parallel cumulative-sum-test-2 + (mt/test-drivers (mt/normal-drivers) + (testing "Simple cumulative sum where breakout field is same as cum_sum field" + (let [query (mt/mbql-query users + {:aggregation [[:cum-sum $id]] + :breakout [$id]})] + (mt/with-native-query-testing-context query + (is (= [[1 1] + [2 3] + [3 6] + [4 10] + [5 15] + [6 21] + [7 28] + [8 36] + [9 45] + [10 55] + [11 66] + [12 78] + [13 91] + [14 105] + [15 120]] + (mt/formatted-rows [int int] + (qp/process-query query))))))))) + +(deftest ^:parallel cumulative-sum-test-3 + (mt/test-drivers (mt/normal-drivers) + (testing "Cumulative sum w/ a different breakout field" + (let [query (mt/mbql-query users + {:aggregation [[:cum-sum $id]] + :breakout [$name]})] + (mt/with-native-query-testing-context query + (is (= [["Broen Olujimi" 14] + ["Conchúr Tihomir" 21] + ["Dwight Gresham" 34] + ["Felipinho Asklepios" 36] + ["Frans Hevel" 46] + ["Kaneonuskatew Eiran" 49] + ["Kfir Caj" 61] + ["Nils Gotam" 70] + ["Plato Yeshua" 71] + ["Quentin Sören" 76] + ["Rüstem Hebel" 91] + ["Shad Ferdynand" 97] + ["Simcha Yan" 101] + ["Spiros Teofil" 112] + ["Szymon Theutrich" 120]] + (mt/formatted-rows [str int] + (qp/process-query query))))))))) + +(deftest ^:parallel cumulative-sum-test-4 + (mt/test-drivers (mt/normal-drivers) + (testing "Cumulative sum w/ a different breakout field that requires grouping" + (let [query (mt/mbql-query venues + {:aggregation [[:cum-sum $id]] + :breakout [$price]})] + (mt/with-native-query-testing-context query + (is (= [[1 1211] + [2 4066] + [3 4681] + [4 5050]] + (mt/formatted-rows [int int] + (qp/process-query query))))))))) + +(deftest ^:parallel cumulative-sum-with-bucketed-breakout-test + (mt/test-drivers (mt/normal-drivers) + (testing "cumulative sum with a temporally bucketed breakout" + (let [metadata-provider (lib.metadata.jvm/application-database-metadata-provider (mt/id)) + orders (lib.metadata/table metadata-provider (mt/id :orders)) + orders-created-at (lib.metadata/field metadata-provider (mt/id :orders :created_at)) + orders-total (lib.metadata/field metadata-provider (mt/id :orders :total)) + query (-> (lib/query metadata-provider orders) + (lib/breakout (lib/with-temporal-bucket orders-created-at :month)) + (lib/aggregate (lib/cum-sum orders-total)) + (lib/limit 3) + (assoc-in [:middleware :format-rows?] false))] + (mt/with-native-query-testing-context query + (is (= [[#t "2016-04-01" 52.76] + [#t "2016-05-01" 1318.49] + [#t "2016-06-01" 3391.41]] + (mt/formatted-rows [->local-date 2.0] + (qp/process-query query))))))))) + +(deftest ^:parallel cumulative-count-test + (mt/test-drivers (mt/normal-drivers) + (testing "cumulative count aggregations" + (testing "w/o breakout should be treated the same as count" + (let [query (mt/mbql-query users + {:aggregation [[:cum-count $id]]})] + (mt/with-native-query-testing-context query + (is (= [[15]] + (mt/formatted-rows [int] + (qp/process-query query)))))))))) + +(deftest ^:parallel cumulative-count-with-breakout-test + (mt/test-drivers (mt/normal-drivers) + (testing "w/ breakout on field with distinct values" + (let [query (mt/mbql-query users + {:aggregation [[:cum-count $id]] + :breakout [$name]})] + (mt/with-native-query-testing-context query + (is (= [["Broen Olujimi" 1] + ["Conchúr Tihomir" 2] + ["Dwight Gresham" 3] + ["Felipinho Asklepios" 4] + ["Frans Hevel" 5] + ["Kaneonuskatew Eiran" 6] + ["Kfir Caj" 7] + ["Nils Gotam" 8] + ["Plato Yeshua" 9] + ["Quentin Sören" 10] + ["Rüstem Hebel" 11] + ["Shad Ferdynand" 12] + ["Simcha Yan" 13] + ["Spiros Teofil" 14] + ["Szymon Theutrich" 15]] + (mt/formatted-rows [str int] + (qp/process-query query))))))))) + +(deftest ^:parallel cumulative-count-with-breakout-test-2 + (mt/test-drivers (mt/normal-drivers) + (testing "w/ breakout on field that requires grouping" + (let [query (mt/mbql-query venues + {:aggregation [[:cum-count $id]] + :breakout [$price]})] + (mt/with-native-query-testing-context query + (is (= [[1 22] + [2 81] + [3 94] + [4 100]] + (mt/formatted-rows [int int] + (qp/process-query query))))))))) + +(deftest ^:parallel cumulative-count-with-multiple-breakouts-test + (mt/test-drivers (mt/normal-drivers) + (let [query (-> (mt/mbql-query orders + {:aggregation [[:cum-count]] + :breakout [!year.created_at !month.created_at] + :limit 3}) + (assoc-in [:middleware :format-rows?] false))] + (mt/with-native-query-testing-context query + (is (= [[#t "2016-01-01" #t "2016-04-01" 1] + [#t "2016-01-01" #t "2016-05-01" 20] + [#t "2016-01-01" #t "2016-06-01" 57]] + (mt/formatted-rows [->local-date ->local-date int] + (qp/process-query query)))))))) + +(deftest ^:parallel cumulative-count-without-field-test + (mt/test-drivers (mt/normal-drivers) + (testing "cumulative count without a field" + (let [query (mt/mbql-query venues + {:aggregation [[:cum-count]] + :breakout [$price]})] + (mt/with-native-query-testing-context query + (is (= [[1 22] + [2 81] + [3 94] + [4 100]] + (mt/formatted-rows [int int] + (qp/process-query query))))))))) + +(deftest ^:parallel cumulative-count-with-bucketed-breakout-test + (mt/test-drivers (mt/normal-drivers) + (testing "cumulative count with a temporally bucketed breakout" + (mt/test-drivers (mt/normal-drivers-with-feature :window-functions) + (let [metadata-provider (lib.metadata.jvm/application-database-metadata-provider (mt/id)) + orders (lib.metadata/table metadata-provider (mt/id :orders)) + orders-created-at (lib.metadata/field metadata-provider (mt/id :orders :created_at)) + query (-> (lib/query metadata-provider orders) + (lib/breakout (lib/with-temporal-bucket orders-created-at :month)) + (lib/aggregate (lib/cum-count)) + (lib/limit 3) + (assoc-in [:middleware :format-rows?] false))] + (mt/with-native-query-testing-context query + (is (= [[#t "2016-04-01" 1] + [#t "2016-05-01" 20] + [#t "2016-06-01" 57]] + (mt/formatted-rows [->local-date int] + (qp/process-query query)))))))))) + +(deftest ^:parallel cumulative-sum-with-multiple-breakouts-test + (mt/test-drivers (mt/normal-drivers) + (let [query (-> (mt/mbql-query orders + {:aggregation [[:cum-sum $total]] + :breakout [!year.created_at !month.created_at] + :limit 3}) + (assoc-in [:middleware :format-rows?] false))] + (mt/with-native-query-testing-context query + (is (= [[#t "2016-01-01" #t "2016-04-01" 52] + [#t "2016-01-01" #t "2016-05-01" 1318] + [#t "2016-01-01" #t "2016-06-01" 3391]] + (mt/formatted-rows [->local-date ->local-date int] + (qp/process-query query)))))))) + +(deftest ^:parallel cumulative-count-and-sum-in-expressions-test + (testing "Cumulative count should work inside expressions (#13634, #15118)" + (mt/test-drivers (mt/normal-drivers-with-feature :window-functions) + (let [metadata-provider (lib.metadata.jvm/application-database-metadata-provider (mt/id)) + orders (lib.metadata/table metadata-provider (mt/id :orders)) + orders-created-at (lib.metadata/field metadata-provider (mt/id :orders :created_at)) + orders-total (lib.metadata/field metadata-provider (mt/id :orders :total)) + query (-> (lib/query metadata-provider orders) + ;; 1. month + (lib/breakout (lib/with-temporal-bucket orders-created-at :month)) + ;; 2. cumulative count of orders + (lib/aggregate (lib/cum-count)) + ;; 3. cumulative sum of orders + (lib/aggregate (lib/cum-sum orders-total)) + ;; 4. cumulative average order price (cumulative sum of total / cumulative count) + (lib/aggregate (lib// (lib/cum-sum orders-total) + (lib/cum-count))) + (lib/limit 3) + (assoc-in [:middleware :format-rows?] false))] + (mt/with-native-query-testing-context query + ;; 1 2 3 4 + (is (= [[#t "2016-04-01" 1 52.76 52.76] + [#t "2016-05-01" 20 1318.49 65.92] + [#t "2016-06-01" 57 3391.41 59.50]] + (mt/formatted-rows [->local-date int 2.0 2.0] + (qp/process-query query))))))))) diff --git a/test/metabase/query_processor_test/expression_aggregations_test.clj b/test/metabase/query_processor_test/expression_aggregations_test.clj index 653c72b1072..eaaf19f0683 100644 --- a/test/metabase/query_processor_test/expression_aggregations_test.clj +++ b/test/metabase/query_processor_test/expression_aggregations_test.clj @@ -332,33 +332,3 @@ {:aggregation [[:aggregation-options [:sum $rating] {:name "MyCE"}]] :breakout [$category] :order-by [[:asc [:aggregation 0]]]})))))))) - -;;; this is a repo for #15118, which is not fixed yet. - -#_(deftest ^:parallel multiple-cumulative-sums-test - (mt/test-drivers (mt/normal-drivers-with-feature :expression-aggregations) - (testing "The results of divide or multiply two CumulativeSum should be correct (#15118)" - (mt/dataset test-data - (is (= [["2016-01-01T00:00:00Z" 3236 2458.0 5694.0 1] - ["2017-01-01T00:00:00Z" 17587 14995.0 32582.0 2] - ["2018-01-01T00:00:00Z" 40381 35366.5 75747.5 3] - ["2019-01-01T00:00:00Z" 65835 58002.7 123837.7 4] - ["2020-01-01T00:00:00Z" 69540 64923.0 134463.0 5]] - (mt/formatted-rows [identity int 2.0 2.0 int] - (mt/run-mbql-query orders - {:aggregation - [[:aggregation-options [:cum-sum $quantity] {:display-name "C1"}] - [:aggregation-options - [:cum-sum $product_id->products.rating] - {:display-name "C2"}] - [:aggregation-options - [:+ - [:cum-sum $quantity] - [:cum-sum $product_id->products.rating]] - {:display-name "C3"}] - [:aggregation-options - [:* - [:cum-sum $quantity] - [:cum-sum $product_id->products.rating]] - {:display-name "C4"}]] - :breakout [!year.created_at]})))))))) diff --git a/test/metabase/test/data/h2.clj b/test/metabase/test/data/h2.clj index ee1657a3a01..c50c17fdf67 100644 --- a/test/metabase/test/data/h2.clj +++ b/test/metabase/test/data/h2.clj @@ -100,13 +100,19 @@ (merge ((get-method tx/aggregate-column-info ::tx/test-extensions) driver ag-type) (when (= ag-type :count) - {:base_type :type/BigInteger}))) + {:base_type :type/BigInteger}) + (when (= ag-type :cum-count) + {:base_type :type/Decimal}))) ([driver ag-type field] (merge ((get-method tx/aggregate-column-info ::tx/test-extensions) driver ag-type field) - (when (#{:sum :cum-count} ag-type) - {:base_type :type/BigInteger})))) + (when (= ag-type :sum) + {:base_type :type/BigInteger}) + ;; because it's implemented as sum(count(field)) OVER (...). But shouldn't a sum of integers be an + ;; integer? :thinking_face: + (when (= ag-type :cum-count) + {:base_type :type/Decimal})))) (defmethod execute/execute-sql! :h2 [driver _ dbdef sql] diff --git a/test/metabase/test/data/interface.clj b/test/metabase/test/data/interface.clj index b0632d8b418..a236790c1d6 100644 --- a/test/metabase/test/data/interface.clj +++ b/test/metabase/test/data/interface.clj @@ -428,10 +428,14 @@ (defmethod aggregate-column-info ::test-extensions ([_ aggregation-type] (assert (#{:count :cum-count} aggregation-type)) - {:base_type :type/BigInteger + {:base_type (case aggregation-type + :count :type/BigInteger + :cum-count :type/Decimal) :semantic_type :type/Quantity :name "count" - :display_name "Count" + :display_name (case aggregation-type + :count "Count" + :cum-count "Cumulative count") :source :aggregation :field_ref [:aggregation 0]}) @@ -443,7 +447,7 @@ :query {:source-table table-id :aggregation [[aggregation-type [:field-id field-id]]]}})) (when (= aggregation-type :cum-count) - {:base_type :type/BigInteger + {:base_type :type/Decimal :semantic_type :type/Quantity})))) -- GitLab