diff --git a/docs/developers-guide/driver-changelog.md b/docs/developers-guide/driver-changelog.md index 704bd7040dad990359cb08c427fcbde9e782d825..be60a2daa82bff7f8e4b90e55ce39da60c1d5637 100644 --- a/docs/developers-guide/driver-changelog.md +++ b/docs/developers-guide/driver-changelog.md @@ -13,6 +13,61 @@ title: Driver interface changelog efficient way possible. This is currently only required for drivers that support the `:uploads` feature, and has a default implementation for JDBC-based drivers. +- New feature `:window-functions` has been added. Drivers that implement this method are expected to implement the + cumulative sum (`:cum-sum`) and cumulative count (`:cum-count`) aggregation clauses in their native query language. + For non-SQL drivers (drivers not based on our `:sql` or `:sql-jdbc` drivers), this feature flag is set to `false` by + default; the old (broken) post-processing implementations of cumulative aggregations will continue to be used. (See + issues [#13634](https://github.com/metabase/metabase/issues/13634) and + [#15118](https://github.com/metabase/metabase/issues/15118) for more information on why the old implementation is + broken.) + + Non-SQL drivers should be updated to implement cumulative aggregations natively if possible. + + SQL drivers that support ordering by output column numbers in `OVER` `ORDER BY` expressions, e.g. + + ```sql + SELECT + sum(sum(my_column) OVER (ORDER BY 1 ROWS UNBOUNDED PRECEDING) AS cumulative_sum + date_trunc(my_timestamp, 'month') AS my_timestamp_month + FROM + my_table + GROUP BY + date_trunc(my_timestamp, 'month') + ORDER BY + date_trunc(my_timestamp, 'month') + ``` + + will work without any changes; this is the new default behavior for drivers based on the `:sql` or `:sql-jdbc` + drivers. + + For databases that do not support ordering by output column numbers (e.g. MySQL/MariaDB), you can mark your driver + as not supporting the `:sql/window-functions.order-by-output-column-numbers` feature flag, e.g. + + ```clj + (defmethod driver/database-supports? [:my-driver :sql/window-functions.order-by-output-column-numbers] + [_driver _feature _database] + false) + ``` + + In this case, the `:sql` driver will instead generate something like + + ```sql + SELECT + sum(sum(my_column) OVER (ORDER BY date_trunc(my_timestamp, 'month') ROWS UNBOUNDED PRECEDING) AS cumulative_sum + date_trunc(my_timestamp, 'month') AS my_timestamp_month + FROM + my_table + GROUP BY + date_trunc(my_timestamp, 'month') + ORDER BY + date_trunc(my_timestamp, 'month') + ``` + + If neither of these strategies work for you, you might need to do something more complicated -- see + [#40982](https://github.com/metabase/metabase/pull/40982) for an example of complex query transformations to get + fussy BigQuery working. (More on this soon.) If all else fails, you can always specify that your driver does not + support `:window-functions`, and it will fall back to using the old broken implementation. + ## Metabase 0.49.1 - Another driver feature has been added: `describe-fields`. If a driver opts-in to supporting this feature, The diff --git a/e2e/test/scenarios/custom-column/custom-column.cy.spec.js b/e2e/test/scenarios/custom-column/custom-column.cy.spec.js index 24499e1127d3397cbb2a0036496323f854da54b9..e948b1fae9187d72644f2cba14fbc41ded1682e7 100644 --- a/e2e/test/scenarios/custom-column/custom-column.cy.spec.js +++ b/e2e/test/scenarios/custom-column/custom-column.cy.spec.js @@ -341,7 +341,7 @@ describe("scenarios > question > custom column", () => { .should("have.length.of.at.least", 8); }); - it.skip("should create custom column after aggregation with 'cum-sum/count' (metabase#13634)", () => { + it("should create custom column after aggregation with 'cum-sum/count' (metabase#13634)", () => { cy.createQuestion( { name: "13634", @@ -365,7 +365,7 @@ describe("scenarios > question > custom column", () => { cy.log("Reported failing in v0.34.3, v0.35.4, v0.36.8.2, v0.37.0.2"); // eslint-disable-next-line no-unscoped-text-selectors -- deprecated usage cy.findByText("Foo Bar"); - cy.findAllByText("57911"); + cy.findAllByText("57,911"); }); it("should not be dropped if filter is changed after aggregation (metaabase#14193)", () => { diff --git a/modules/drivers/athena/src/metabase/driver/athena.clj b/modules/drivers/athena/src/metabase/driver/athena.clj index 56284630aa780f2cb5351ab8e1280569eb2f3aa7..77aa61c01a1a13e52abe819e58e204a10a4b9f69 100644 --- a/modules/drivers/athena/src/metabase/driver/athena.clj +++ b/modules/drivers/athena/src/metabase/driver/athena.clj @@ -32,10 +32,11 @@ ;;; | metabase.driver method impls | ;;; +----------------------------------------------------------------------------------------------------------------+ -(doseq [[feature supported?] {:foreign-keys true - :datetime-diff true - :nested-fields false #_true ; huh? Not sure why this was `true`. Disabled for now. - :test/jvm-timezone-setting false}] +(doseq [[feature supported?] {:datetime-diff true + :foreign-keys true + :nested-fields false + :sql/window-functions.order-by-output-column-numbers false + :test/jvm-timezone-setting false}] (defmethod driver/database-supports? [:athena feature] [_driver _feature _db] supported?)) ;;; +----------------------------------------------------------------------------------------------------------------+ diff --git a/modules/drivers/bigquery-cloud-sdk/src/metabase/driver/bigquery_cloud_sdk.clj b/modules/drivers/bigquery-cloud-sdk/src/metabase/driver/bigquery_cloud_sdk.clj index a64333af88140346b021e98e26c69f81a9d70d4c..3e425ea62ec62ccb05a40221a9a8dda95bb4354c 100644 --- a/modules/drivers/bigquery-cloud-sdk/src/metabase/driver/bigquery_cloud_sdk.clj +++ b/modules/drivers/bigquery-cloud-sdk/src/metabase/driver/bigquery_cloud_sdk.clj @@ -462,10 +462,13 @@ :datetime-diff true :now true :convert-timezone true - ;; BigQuery uses timezone operators and arguments on calls like extract() and timezone_trunc() rather than literally - ;; using SET TIMEZONE, but we need to flag it as supporting set-timezone anyway so that reporting timezones are - ;; returned and used, and tests expect the converted values. - :set-timezone true}] + ;; BigQuery uses timezone operators and arguments on calls like extract() and + ;; timezone_trunc() rather than literally using SET TIMEZONE, but we need to flag it as + ;; supporting set-timezone anyway so that reporting timezones are returned and used, and + ;; tests expect the converted values. + :set-timezone true + ;; temporarily disabled -- will be fixed by #40982 + :window-functions false}] (defmethod driver/database-supports? [:bigquery-cloud-sdk feature] [_driver _feature _db] supported?)) ;; BigQuery is always in UTC diff --git a/modules/drivers/mongo/test/metabase/driver/mongo/query_processor_test.clj b/modules/drivers/mongo/test/metabase/driver/mongo/query_processor_test.clj index 69e161c36856af4e5a941da38d5539a5b5c75cad..7e89dbd8e2a02ac6da508da70145e1787a304c4d 100644 --- a/modules/drivers/mongo/test/metabase/driver/mongo/query_processor_test.clj +++ b/modules/drivers/mongo/test/metabase/driver/mongo/query_processor_test.clj @@ -106,8 +106,14 @@ (qp.compile/compile (mt/mbql-query attempts {:aggregation [[:count]] - :filter [:time-interval $datetime :last :month]})))) + :filter [:time-interval $datetime :last :month]})))))))))) +(deftest ^:parallel no-initial-projection-test-2 + (mt/test-driver :mongo + (testing "Don't need to create initial projections anymore (#4216)" + (testing "Don't create an initial projection for datetime-fields that use `:default` bucketing (#14838)" + (mt/with-clock #t "2021-02-15T17:33:00-08:00[US/Pacific]" + (mt/dataset attempted-murders (testing "should still work even with bucketing bucketing" (let [tz (qp.timezone/results-timezone-id :mongo mt/db) query (mt/with-metadata-provider (mt/id) @@ -147,8 +153,7 @@ {"$project" {"_id" false "datetime" "$_id.datetime" "datetime_2" "$_id.datetime_2" - "count" true}} - {"$sort" {"datetime" 1}}] + "count" true}}] :collection "attempts" :mbql? true} query)) diff --git a/modules/drivers/presto-jdbc/src/metabase/driver/presto_jdbc.clj b/modules/drivers/presto-jdbc/src/metabase/driver/presto_jdbc.clj index 4582c1d71d6186f752621a8f67fbb2a233ac363a..428b4b1fc57cab9548165c528c3635d991a8b760 100644 --- a/modules/drivers/presto-jdbc/src/metabase/driver/presto_jdbc.clj +++ b/modules/drivers/presto-jdbc/src/metabase/driver/presto_jdbc.clj @@ -43,15 +43,16 @@ (driver/register! :presto-jdbc, :parent #{:sql-jdbc ::sql-jdbc.legacy/use-legacy-classes-for-read-and-set}) -(doseq [[feature supported?] {:set-timezone true - :basic-aggregations true - :standard-deviation-aggregations true - :expressions true - :native-parameters true - :expression-aggregations true - :binning true - :foreign-keys true - :now true}] +(doseq [[feature supported?] {:basic-aggregations true + :binning true + :expression-aggregations true + :expressions true + :foreign-keys true + :native-parameters true + :now true + :set-timezone true + :sql/window-functions.order-by-output-column-numbers false + :standard-deviation-aggregations true}] (defmethod driver/database-supports? [:presto-jdbc feature] [_driver _feature _db] supported?)) ;;; Presto API helpers diff --git a/modules/drivers/redshift/src/metabase/driver/redshift.clj b/modules/drivers/redshift/src/metabase/driver/redshift.clj index 370140b4b2f5e2d36a60fe31bbc8ef4e491d49f5..04f25b90dfcc0e142ba4bf3fc69ddd36c4179b67 100644 --- a/modules/drivers/redshift/src/metabase/driver/redshift.clj +++ b/modules/drivers/redshift/src/metabase/driver/redshift.clj @@ -33,11 +33,12 @@ (driver/register! :redshift, :parent #{:postgres ::sql-jdbc.legacy/use-legacy-classes-for-read-and-set}) -(doseq [[feature supported?] {:test/jvm-timezone-setting false - :nested-field-columns false - :describe-fields true - :describe-fks true - :connection-impersonation true}] +(doseq [[feature supported?] {:connection-impersonation true + :describe-fields true + :describe-fks true + :nested-field-columns false + :sql/window-functions.order-by-output-column-numbers false + :test/jvm-timezone-setting false}] (defmethod driver/database-supports? [:redshift feature] [_driver _feat _db] supported?)) ;;; +----------------------------------------------------------------------------------------------------------------+ diff --git a/modules/drivers/snowflake/src/metabase/driver/snowflake.clj b/modules/drivers/snowflake/src/metabase/driver/snowflake.clj index 2e906d68873d4634b7865ba2e649cef3dff27225..48b7a103867554f01854ba4ab3a9cf7499a6771e 100644 --- a/modules/drivers/snowflake/src/metabase/driver/snowflake.clj +++ b/modules/drivers/snowflake/src/metabase/driver/snowflake.clj @@ -49,11 +49,12 @@ (driver/register! :snowflake, :parent #{:sql-jdbc ::sql-jdbc.legacy/use-legacy-classes-for-read-and-set}) -(doseq [[feature supported?] {:datetime-diff true - :now true - :convert-timezone true - :connection-impersonation true - :connection-impersonation-requires-role true}] +(doseq [[feature supported?] {:connection-impersonation true + :connection-impersonation-requires-role true + :convert-timezone true + :datetime-diff true + :now true + :sql/window-functions.order-by-output-column-numbers false}] (defmethod driver/database-supports? [:snowflake feature] [_driver _feature _db] supported?)) (defmethod driver/humanize-connection-error-message :snowflake diff --git a/modules/drivers/sparksql/src/metabase/driver/sparksql.clj b/modules/drivers/sparksql/src/metabase/driver/sparksql.clj index ebe15841e9c795ecc2bbffd82a8521dac62cb4ad..d28170db64e47647fc9e1f1e8266abd7df4d4d69 100644 --- a/modules/drivers/sparksql/src/metabase/driver/sparksql.clj +++ b/modules/drivers/sparksql/src/metabase/driver/sparksql.clj @@ -209,8 +209,10 @@ :native-parameters true :nested-queries true :standard-deviation-aggregations true - :test/jvm-timezone-setting false}] - (defmethod driver/database-supports? [:sparkql feature] [_driver _feature _db] supported?)) + :test/jvm-timezone-setting false + ;; disabled for now, see issue #40991 to fix this. + :window-functions false}] + (defmethod driver/database-supports? [:sparksql feature] [_driver _feature _db] supported?)) ;; only define an implementation for `:foreign-keys` if none exists already. In test extensions we define an alternate ;; implementation, and we don't want to stomp over that if it was loaded already diff --git a/modules/drivers/sqlserver/src/metabase/driver/sqlserver.clj b/modules/drivers/sqlserver/src/metabase/driver/sqlserver.clj index ffd39807700a2b4ce3bc40f8e4a8f75b1b6c7c51..956d048dfc4214d7c906e77dea9425209863e9c7 100644 --- a/modules/drivers/sqlserver/src/metabase/driver/sqlserver.clj +++ b/modules/drivers/sqlserver/src/metabase/driver/sqlserver.clj @@ -32,13 +32,14 @@ (driver/register! :sqlserver, :parent :sql-jdbc) -(doseq [[feature supported?] {:regex false - :case-sensitivity-string-filter-options false - :now true - :datetime-diff true - :convert-timezone true - :test/jvm-timezone-setting false - :index-info true}] +(doseq [[feature supported?] {:case-sensitivity-string-filter-options false + :convert-timezone true + :datetime-diff true + :index-info true + :now true + :regex false + :sql/window-functions.order-by-output-column-numbers false + :test/jvm-timezone-setting false}] (defmethod driver/database-supports? [:sqlserver feature] [_driver _feature _db] supported?)) (defmethod driver/database-supports? [:sqlserver :percentile-aggregations] @@ -448,7 +449,9 @@ ;; For the GROUP BY, we replace the unoptimized fields with the optimized ones, e.g. ;; ;; GROUP BY year(field), month(field) - (apply sql.helpers/group-by new-hsql (mapv (partial sql.qp/->honeysql driver) optimized))))) + (apply sql.helpers/group-by new-hsql (mapv (partial sql.qp/->honeysql driver) optimized)) + ;; remove duplicate group by clauses (from the optimize breakout clauses stuff) + (update new-hsql :group-by distinct)))) (defn- optimize-order-by-subclauses "Optimize `:order-by` `subclauses` using [[optimized-temporal-buckets]], if possible." @@ -468,7 +471,9 @@ ;; year(), month(), and day() can make use of indexes while DateFromParts() cannot. (let [query (update query :order-by optimize-order-by-subclauses) parent-method (get-method sql.qp/apply-top-level-clause [:sql-jdbc :order-by])] - (parent-method driver :order-by honeysql-form query))) + (-> (parent-method driver :order-by honeysql-form query) + ;; order bys have to be distinct in SQL Server!!!!!!!1 + (update :order-by distinct)))) ;; SQLServer doesn't support `TRUE`/`FALSE`; it uses `1`/`0`, respectively; convert these booleans to numbers. (defmethod sql.qp/->honeysql [:sqlserver Boolean] diff --git a/modules/drivers/vertica/src/metabase/driver/vertica.clj b/modules/drivers/vertica/src/metabase/driver/vertica.clj index b2e5cc444aad457688653b83a1fec69850a926b2..32ea6cd41bf34de66f07a11441de02931471f183 100644 --- a/modules/drivers/vertica/src/metabase/driver/vertica.clj +++ b/modules/drivers/vertica/src/metabase/driver/vertica.clj @@ -27,11 +27,12 @@ ::sql-jdbc.legacy/use-legacy-classes-for-read-and-set ::sql.qp.empty-string-is-null/empty-string-is-null}) -(doseq [[feature supported?] {:percentile-aggregations false - :datetime-diff true - :now true - :convert-timezone true - :test/jvm-timezone-setting false}] +(doseq [[feature supported?] {:convert-timezone true + :datetime-diff true + :now true + :percentile-aggregations false + :sql/window-functions.order-by-output-column-numbers false + :test/jvm-timezone-setting false}] (defmethod driver/database-supports? [:vertica feature] [_driver _feature _db] supported?)) (defmethod driver/db-start-of-week :vertica diff --git a/src/metabase/db.clj b/src/metabase/db.clj index d436d0e5ca027070a2b8945e1816f950a48f1692..ee5f364476995edb751c77414752fcb5dfb266f6 100644 --- a/src/metabase/db.clj +++ b/src/metabase/db.clj @@ -80,7 +80,6 @@ (fn [& args] (apply f* (unique-identifier) args)))) - (defn increment-app-db-unique-indentifier! "Increment the [[unique-identifier]] for the Metabase application DB. This effectively flushes all caches using it as a key (including things using [[mdb/memoize-for-application-db]]) such as the Settings cache. Should only be used diff --git a/src/metabase/driver.clj b/src/metabase/driver.clj index fcd3544db2c321d2a2adc922ee15870c6a5786bc..aac5b659b2bf726825195aefbe7766bba63268a6 100644 --- a/src/metabase/driver.clj +++ b/src/metabase/driver.clj @@ -589,7 +589,10 @@ :describe-fields ;; Does the driver support fingerprint the fields. Default is true - :fingerprint}) + :fingerprint + + ;; Does this driver support window functions like cumulative count and cumulative sum? (default: false) + :window-functions}) (defmulti database-supports? "Does this driver and specific instance of a database support a certain `feature`? @@ -609,8 +612,11 @@ (database-supports? :mongo :set-timezone mongo-db) ; -> true" {:arglists '([driver feature database]), :added "0.41.0"} (fn [driver feature _database] - (when-not (features feature) - (throw (Exception. (tru "Invalid driver feature: {0}" feature)))) + ;; only make sure unqualified keywords are explicitly defined in [[features]]. + (when (simple-keyword? feature) + (when-not (features feature) + (throw (ex-info (tru "Invalid driver feature: {0}" feature) + {:feature feature})))) [(dispatch-on-initialized-driver driver) feature]) :hierarchy #'hierarchy) diff --git a/src/metabase/driver/h2.clj b/src/metabase/driver/h2.clj index ce2bd4d4c0ee95153909b20befb5e7bb584f1760..dbc9476d0cfcaf7ec0a2014ec218ee2b6f6219d7 100644 --- a/src/metabase/driver/h2.clj +++ b/src/metabase/driver/h2.clj @@ -66,16 +66,16 @@ ;;; | metabase.driver impls | ;;; +----------------------------------------------------------------------------------------------------------------+ -(doseq [[feature supported?] {:full-join false - :regex true - :percentile-aggregations false - :actions true +(doseq [[feature supported?] {:actions true :actions/custom true :datetime-diff true + :full-join false + :index-info true :now true + :percentile-aggregations false + :regex true :test/jvm-timezone-setting false - :uploads true - :index-info true}] + :uploads true}] (defmethod driver/database-supports? [:h2 feature] [_driver _feature _database] supported?)) diff --git a/src/metabase/driver/mysql.clj b/src/metabase/driver/mysql.clj index 65c9654ebae82b6313009f500d1140638c844c49..949c444e9ff620b08abef8f862f9fb0e3de94070 100644 --- a/src/metabase/driver/mysql.clj +++ b/src/metabase/driver/mysql.clj @@ -53,20 +53,21 @@ (defmethod driver/display-name :mysql [_] "MySQL") -(doseq [[feature supported?] {:persist-models true - :convert-timezone true - :datetime-diff true - :now true - :regex false - :percentile-aggregations false - :full-join false - :uploads true - :schemas false - ;; MySQL LIKE clauses are case-sensitive or not based on whether the collation of the server and the columns - ;; themselves. Since this isn't something we can really change in the query itself don't present the option to the - ;; users in the UI - :case-sensitivity-string-filter-options false - :index-info true}] +(doseq [[feature supported?] {;; MySQL LIKE clauses are case-sensitive or not based on whether the collation of the + ;; server and the columns themselves. Since this isn't something we can really change in + ;; the query itself don't present the option to the users in the UI + :case-sensitivity-string-filter-options false + :convert-timezone true + :datetime-diff true + :full-join false + :index-info true + :now true + :percentile-aggregations false + :persist-models true + :regex false + :schemas false + :sql/window-functions.order-by-output-column-numbers false + :uploads true}] (defmethod driver/database-supports? [:mysql feature] [_driver _feature _db] supported?)) ;; This is a bit of a lie since the JSON type was introduced for MySQL since 5.7.8. diff --git a/src/metabase/driver/postgres.clj b/src/metabase/driver/postgres.clj index 7cf962ce42c211682d0bf707975ce31fa8e8c38b..ccee5ad8e41f5b8494e343b37432e6eca9aa6da5 100644 --- a/src/metabase/driver/postgres.clj +++ b/src/metabase/driver/postgres.clj @@ -61,13 +61,14 @@ (defmethod driver/display-name :postgres [_] "PostgreSQL") ;; Features that are supported by Postgres and all of its child drivers like Redshift -(doseq [[feature supported?] {:convert-timezone true - :datetime-diff true - :now true - :persist-models true - :table-privileges true - :schemas true - :connection-impersonation true}] +(doseq [[feature supported?] {:connection-impersonation true + :convert-timezone true + :datetime-diff true + :now true + :persist-models true + :schemas true + :sql/window-functions.order-by-output-column-numbers false + :table-privileges true}] (defmethod driver/database-supports? [:postgres feature] [_driver _feature _db] supported?)) (defmethod driver/database-supports? [:postgres :nested-field-columns] diff --git a/src/metabase/driver/sql.clj b/src/metabase/driver/sql.clj index 28808e680e8ba7a96730ed0cffae91321b2d5b40..b297cfb3ea3bc5f15816839815997d505a254d01 100644 --- a/src/metabase/driver/sql.clj +++ b/src/metabase/driver/sql.clj @@ -18,16 +18,17 @@ (driver/register! :sql, :abstract? true) -(doseq [feature [:standard-deviation-aggregations - :foreign-keys - :expressions +(doseq [feature [:advanced-math-expressions + :binning :expression-aggregations + :expressions + :foreign-keys :native-parameters :nested-queries - :binning - :advanced-math-expressions :percentile-aggregations - :regex]] + :regex + :standard-deviation-aggregations + :window-functions]] (defmethod driver/database-supports? [:sql feature] [_driver _feature _db] true)) (doseq [join-feature [:left-join diff --git a/src/metabase/driver/sql/query_processor.clj b/src/metabase/driver/sql/query_processor.clj index 4e7caf061ed6153c8c5a97d1c11f5ab4a98fef4b..c16a607c7be91e6b6f5735d70bc5e48956b5fd5a 100644 --- a/src/metabase/driver/sql/query_processor.clj +++ b/src/metabase/driver/sql/query_processor.clj @@ -420,7 +420,7 @@ of `honeysql-form`. Most drivers can use the default implementations for all of these methods, but some may need to override one or more (e.g. SQL Server needs to override this method for the `:limit` clause, since T-SQL uses `TOP` instead of `LIMIT`)." - {:added "0.32.0", :arglists '([driver top-level-clause honeysql-form query]), :style/indent 2} + {:added "0.32.0", :arglists '([driver top-level-clause honeysql-form query]), :style/indent [:form]} (fn [driver top-level-clause _ _] [(driver/dispatch-on-initialized-driver driver) top-level-clause]) :hierarchy #'driver/hierarchy) @@ -687,6 +687,101 @@ (->honeysql driver mbql-expr) (->honeysql driver power)]) +;;; NOCOMMIT -- we need to document the new `:window-functions` feature. +(defmethod driver/database-supports? [:sql :sql/window-functions.order-by-output-column-numbers] + [_driver _feature _database] + true) + +(defn- format-rows-unbounded-preceding [_clause _args] + ["ROWS UNBOUNDED PRECEDING"]) + +(sql/register-clause! + ::rows-unbounded-preceding + #'format-rows-unbounded-preceding + nil) + +(defn- over-rows-with-order-by-copying-expressions + "e.g. + + OVER (ORDER BY date_trunc('month', my_column))" + [driver expr] + (let [{:keys [group-by]} (apply-top-level-clause + driver + :breakout + {} + *inner-query*)] + [:over + [expr + {:order-by (mapv (fn [expr] + [expr :asc]) + group-by) + ::rows-unbounded-preceding []}]])) + +;;; For databases that do something intelligent with ORDER BY 1, 2: we can take advantage of that functionality +;;; implement the window function versions of cumulative sum and cumulative sum more simply. + +(defn- over-rows-with-order-by-output-column-numbers + "e.g. + + OVER (ORDER BY 1 ROWS UNBOUNDED PRECEDING)" + [_driver expr] + [:over + [expr + {:order-by (mapv (fn [i] + [[:inline (inc i)] :asc]) + (range (count (:breakout *inner-query*)))) + ::rows-unbounded-preceding []}]]) + +(defn- cumulative-aggregation-window-function [driver expr] + (let [over-rows-with-order-by (if (driver/database-supports? driver + :sql/window-functions.order-by-output-column-numbers + (lib.metadata/database (qp.store/metadata-provider))) + over-rows-with-order-by-output-column-numbers + over-rows-with-order-by-copying-expressions)] + (over-rows-with-order-by driver expr))) + +;;; cum-count() +;;; +;;; should compile to SQL like +;;; +;;; sum(count()) OVER (ORDER BY ...) +;;; +;;; where the ORDER BY matches what's in the query (i.e., the breakouts), or +;;; +;;; sum(count()) OVER (ORDER BY 1 ROWS UNBOUNDED PRECEDING) +;;; +;;; if the database supports ordering by SELECT expression position +(defmethod ->honeysql [:sql :cum-count] + [driver [_cum-count expr-or-nil]] + ;; a cumulative count with no breakouts doesn't really mean anything, just compile it as a normal count. + (if (empty? (:breakout *inner-query*)) + (->honeysql driver [:count expr-or-nil]) + (cumulative-aggregation-window-function + driver + [:sum (if expr-or-nil + [:count (->honeysql driver expr-or-nil)] + [:count :*])]))) + +;;; cum-sum(total) +;;; +;;; should compile to SQL like +;;; +;;; sum(sum(total)) OVER (ORDER BY ...) +;;; +;;; where the ORDER BY matches what's in the query (i.e., the breakouts), or +;;; +;;; sum(sum(total)) OVER (ORDER BY 1 ROWS UNBOUNDED PRECEDING) +;;; +;;; if the database supports ordering by SELECT expression position +(defmethod ->honeysql [:sql :cum-sum] + [driver [_cum-sum expr]] + ;; a cumulative sum with no breakouts doesn't really mean anything, just compile it as a normal sum. + (if (empty? (:breakout *inner-query*)) + (->honeysql driver [:sum expr]) + (cumulative-aggregation-window-function + driver + [:sum [:sum (->honeysql driver expr)]]))) + (defn- interval? [expr] (mbql.u/is-clause? :interval expr)) diff --git a/src/metabase/driver/sql_jdbc/execute.clj b/src/metabase/driver/sql_jdbc/execute.clj index 96f2b4d606f2f451bf913af552385a0b35b5a31b..5fe6bacc9df765adecaba7eb9ab17680ab76d177 100644 --- a/src/metabase/driver/sql_jdbc/execute.clj +++ b/src/metabase/driver/sql_jdbc/execute.clj @@ -135,6 +135,7 @@ driver/dispatch-on-initialized-driver :hierarchy #'driver/hierarchy) +;;; TODO -- we should just make this a FEATURE!!!!!1 (defmulti ^Statement statement-supported? "Indicates whether the given driver supports creating a java.sql.Statement, via the Connection. By default, this is true for all :sql-jdbc drivers. If the underlying driver does not support Statement creation, override this as diff --git a/src/metabase/legacy_mbql/util.cljc b/src/metabase/legacy_mbql/util.cljc index 20418f7c3eddf9ae94ae324d0ed1bd16e60b469a..588a4b750c3ab50fdf9ac1bed7aab88668df5799 100644 --- a/src/metabase/legacy_mbql/util.cljc +++ b/src/metabase/legacy_mbql/util.cljc @@ -399,11 +399,13 @@ (mu/defn add-order-by-clause :- mbql.s/MBQLQuery "Add a new `:order-by` clause to an MBQL `inner-query`. If the new order-by clause references a Field that is already being used in another order-by clause, this function does nothing." - [inner-query :- mbql.s/MBQLQuery - [_ [_ id-or-name :as _field], :as order-by-clause] :- mbql.s/OrderBy] - (let [existing-fields (set (for [[_ [_ id-or-name]] (:order-by inner-query)] - id-or-name))] - (if (existing-fields id-or-name) + [inner-query :- mbql.s/MBQLQuery + [_dir orderable, :as order-by-clause] :- mbql.s/OrderBy] + (let [existing-orderables (into #{} + (map (fn [[_dir orderable]] + orderable)) + (:order-by inner-query))] + (if (existing-orderables orderable) ;; Field already referenced, nothing to do inner-query ;; otherwise add new clause at the end @@ -577,6 +579,7 @@ (assert (not= candidate original) (str "unique-alias-fn must return a different string than its input. Input: " (pr-str candidate))) + (swap! id+original->unique assoc [id name-key] candidate) (recur id candidate)))))))))) (mu/defn uniquify-names :- [:and @@ -632,7 +635,6 @@ Most often, `aggregation->name-fn` will be something like `annotate/aggregation-name`, but for purposes of keeping the `metabase.legacy-mbql` module seperate from the `metabase.query-processor` code we'll let you pass that in yourself." - {:style/indent 1} [aggregation->name-fn :- fn? aggregations :- [:sequential mbql.s/Aggregation]] (lib.util.match/replace aggregations @@ -648,7 +650,6 @@ (mu/defn pre-alias-and-uniquify-aggregations :- UniquelyNamedAggregations "Wrap every aggregation clause in a `:named` clause with a unique name. Combines `pre-alias-aggregations` with `uniquify-named-aggregations`." - {:style/indent 1} [aggregation->name-fn :- fn? aggregations :- [:sequential mbql.s/Aggregation]] (-> (pre-alias-aggregations aggregation->name-fn aggregations) diff --git a/src/metabase/lib/aggregation.cljc b/src/metabase/lib/aggregation.cljc index 60cadff4f0345ea1c1b0b6c7b298475886cb784e..9573f4eeccc1dcec7bb168fc4e3709ca84184c54 100644 --- a/src/metabase/lib/aggregation.cljc +++ b/src/metabase/lib/aggregation.cljc @@ -95,8 +95,8 @@ (defmethod lib.metadata.calculation/column-name-method ::count-aggregation [_query _stage-number [tag :as _clause]] (case tag - :count "count" - :cum-count "cum_count" + :count "count" + :cum-count "count" :count-where "count_where")) (defmethod lib.metadata.calculation/metadata-method ::quantity-aggregation diff --git a/src/metabase/query_processor/middleware/cumulative_aggregations.clj b/src/metabase/query_processor/middleware/cumulative_aggregations.clj index be04bc8b57fc9d3b7ee5ccfbd33890c405523113..ce977d22325de1149d8e05953f90b80be4e1b262 100644 --- a/src/metabase/query_processor/middleware/cumulative_aggregations.clj +++ b/src/metabase/query_processor/middleware/cumulative_aggregations.clj @@ -1,8 +1,11 @@ (ns metabase.query-processor.middleware.cumulative-aggregations "Middlware for handling cumulative count and cumulative sum aggregations." (:require + [metabase.driver :as driver] [metabase.legacy-mbql.schema :as mbql.s] + [metabase.lib.metadata :as lib.metadata] [metabase.lib.util.match :as lib.util.match] + [metabase.query-processor.store :as qp.store] [metabase.util.malli :as mu])) ;;;; Pre-processing @@ -29,8 +32,16 @@ "Pre-processing middleware. Rewrite `:cum-count` and `:cum-sum` aggregations as `:count` and `:sum` respectively. Add information about the indecies of the replaced aggregations under the `::replaced-indices` key." [{{breakouts :breakout, aggregations :aggregation} :query, :as query}] - (if-not (lib.util.match/match aggregations #{:cum-count :cum-sum}) + (cond + ;; no need to rewrite `:cum-sum` and `:cum-count` functions, this driver supports native window function versions + (driver/database-supports? driver/*driver* :window-functions (lib.metadata/database (qp.store/metadata-provider))) + query + + ;; nothing to rewrite + (not (lib.util.match/match aggregations #{:cum-count :cum-sum})) query + + :else (let [query' (replace-cumulative-ags query) ;; figure out which indexes are being changed in the results. Since breakouts always get included in ;; results first we need to offset the indexes to change by the number of breakouts diff --git a/test/metabase/driver/sql/query_processor_test.clj b/test/metabase/driver/sql/query_processor_test.clj index 085c70a7ba3059de4246f6efa3f9524b169d6cf3..9fbe28d40008ad794540e7cf187a4d47a1007058 100644 --- a/test/metabase/driver/sql/query_processor_test.clj +++ b/test/metabase/driver/sql/query_processor_test.clj @@ -275,7 +275,8 @@ query)}))) (deftest ^:parallel correct-for-null-behaviour - (testing "NULLs should be treated intuitively in filters (SQL has somewhat unintuitive semantics where NULLs get propagated out of expressions)." + (testing (str "NULLs should be treated intuitively in filters (SQL has somewhat unintuitive semantics where NULLs " + "get propagated out of expressions).") (is (= [[nil] ["bar"]] (query-on-dataset-with-nils {:filter [:not [:starts-with [:field "A" {:base-type :type/Text}] "f"]]}))) (is (= [[nil] ["bar"]] diff --git a/test/metabase/legacy_mbql/util_test.cljc b/test/metabase/legacy_mbql/util_test.cljc index 1b2925eefa4e289b39e00b261a975f4096e5af94..a0bb6bc8178ec794fd52e3eee00ff8b03c79038e 100644 --- a/test/metabase/legacy_mbql/util_test.cljc +++ b/test/metabase/legacy_mbql/util_test.cljc @@ -121,28 +121,32 @@ [:asc [:field 20 nil]]]} (mbql.u/add-order-by-clause {:source-table 1 :order-by [[:asc [:field 10 nil]]]} - [:asc [:field 20 nil]])))) + [:asc [:field 20 nil]]))))) +(t/deftest ^:parallel add-order-by-clause-test-2 (t/testing "duplicate clauses should get ignored" (t/is (= {:source-table 1 :order-by [[:asc [:field 10 nil]]]} (mbql.u/add-order-by-clause {:source-table 1 :order-by [[:asc [:field 10 nil]]]} - [:asc [:field 10 nil]])))) + [:asc [:field 10 nil]]))))) +(t/deftest ^:parallel add-order-by-clause-test-3 (t/testing "as should clauses that reference the same Field" (t/is (= {:source-table 1 :order-by [[:asc [:field 10 nil]]]} (mbql.u/add-order-by-clause {:source-table 1 :order-by [[:asc [:field 10 nil]]]} - [:desc [:field 10 nil]]))) + [:desc [:field 10 nil]]))))) - (t/testing "fields with different amounts of wrapping (plain field vs datetime-field)" - (t/is (= {:source-table 1 - :order-by [[:asc [:field 10 nil]]]} - (mbql.u/add-order-by-clause {:source-table 1 - :order-by [[:asc [:field 10 nil]]]} - [:asc [:field 10 {:temporal-unit :day}]])))))) +(t/deftest ^:parallel add-order-by-clause-test-4 + (t/testing "fields with different temporal-units should still get added (#40995)" + (t/is (= {:source-table 1 + :order-by [[:asc [:field 10 nil]] + [:asc [:field 10 {:temporal-unit :day}]]]} + (mbql.u/add-order-by-clause {:source-table 1 + :order-by [[:asc [:field 10 nil]]]} + [:asc [:field 10 {:temporal-unit :day}]]))))) (t/deftest ^:parallel combine-filter-clauses-test (t/is (= [:and [:= [:field 1 nil] 100] [:= [:field 2 nil] 200]] @@ -652,29 +656,37 @@ (t/deftest ^:parallel unique-name-generator-test (t/testing "Can we get a simple unique name generator" (t/is (= ["count" "sum" "count_2" "count_2_2"] - (map (mbql.u/unique-name-generator) ["count" "sum" "count" "count_2"])))) + (map (mbql.u/unique-name-generator) ["count" "sum" "count" "count_2"]))))) + +(t/deftest ^:parallel unique-name-generator-test-2 (t/testing "Can we get an idempotent unique name generator" (t/is (= ["count" "sum" "count" "count_2"] - (map (mbql.u/unique-name-generator) [:x :y :x :z] ["count" "sum" "count" "count_2"])))) + (map (mbql.u/unique-name-generator) [:x :y :x :z] ["count" "sum" "count" "count_2"]))))) + +(t/deftest ^:parallel unique-name-generator-test-3 (t/testing "Can the same object have multiple aliases" (t/is (= ["count" "sum" "count" "count_2"] - (map (mbql.u/unique-name-generator) [:x :y :x :x] ["count" "sum" "count" "count_2"])))) + (map (mbql.u/unique-name-generator) [:x :y :x :x] ["count" "sum" "count" "count_2"]))))) - (t/testing "idempotence (2-arity calls to generated function)" +(t/deftest ^:parallel unique-name-generator-idempotence-test + (t/testing "idempotence (2-arity calls to generated function) (#40994)" (let [unique-name (mbql.u/unique-name-generator)] - (t/is (= ["A" "B" "A" "A_2"] + (t/is (= ["A" "B" "A" "A_2" "A_2"] [(unique-name :x "A") (unique-name :x "B") (unique-name :x "A") - (unique-name :y "A")])))) + (unique-name :y "A") + (unique-name :y "A")]))))) - #_{:clj-kondo/ignore [:discouraged-var]} +(t/deftest ^:parallel unique-name-generator-options-test (t/testing "options" (t/testing :name-key-fn - (let [f (mbql.u/unique-name-generator :name-key-fn str/lower-case)] + (let [f (mbql.u/unique-name-generator :name-key-fn #_{:clj-kondo/ignore [:discouraged-var]} str/lower-case)] (t/is (= ["x" "X_2" "X_3"] - (map f ["x" "X" "X"]))))) + (map f ["x" "X" "X"]))))))) +(t/deftest ^:parallel unique-name-generator-options-test-2 + (t/testing "options" (t/testing :unique-alias-fn (let [f (mbql.u/unique-name-generator :unique-alias-fn (fn [x y] (str y "~~" x)))] (t/is (= ["x" "2~~x"] diff --git a/test/metabase/query_processor/middleware/add_implicit_clauses_test.clj b/test/metabase/query_processor/middleware/add_implicit_clauses_test.clj index 0ec1cc5e5a835727cb2ed80d1c5e4e70c15bfe68..1eabfe5535c3669a4f57009e90cc0bfe78a5ff9e 100644 --- a/test/metabase/query_processor/middleware/add_implicit_clauses_test.clj +++ b/test/metabase/query_processor/middleware/add_implicit_clauses_test.clj @@ -7,10 +7,8 @@ [metabase.lib.test-util :as lib.tu] [metabase.lib.test-util.macros :as lib.tu.macros] [metabase.lib.types.isa :as lib.types.isa] - [metabase.query-processor.middleware.add-implicit-clauses - :as qp.add-implicit-clauses] - [metabase.query-processor.middleware.add-source-metadata - :as add-source-metadata] + [metabase.query-processor.middleware.add-implicit-clauses :as qp.add-implicit-clauses] + [metabase.query-processor.middleware.add-source-metadata :as add-source-metadata] [metabase.query-processor.preprocess :as qp.preprocess] [metabase.query-processor.store :as qp.store] [metabase.query-processor.test-util :as qp.test-util] @@ -44,7 +42,10 @@ :order-by [[:asc [:field 1 nil]]]} (#'qp.add-implicit-clauses/add-implicit-breakout-order-by {:source-table 1 - :breakout [[:field 1 nil]]}))) + :breakout [[:field 1 nil]]}))))) + +(deftest ^:parallel add-order-bys-for-breakouts-test-2 + (testing "we should add order-bys for breakout clauses" (testing "Add Field to existing order-by" (is (= {:source-table 1 :breakout [[:field 2 nil]] @@ -53,8 +54,10 @@ (#'qp.add-implicit-clauses/add-implicit-breakout-order-by {:source-table 1 :breakout [[:field 2 nil]] - :order-by [[:asc [:field 1 nil]]]})))) + :order-by [[:asc [:field 1 nil]]]})))))) +(deftest ^:parallel add-order-bys-for-breakouts-test-3 + (testing "we should add order-bys for breakout clauses" (testing "...but not if the Field is already in an order-by" (is (= {:source-table 1 :breakout [[:field 1 nil]] @@ -62,22 +65,30 @@ (#'qp.add-implicit-clauses/add-implicit-breakout-order-by {:source-table 1 :breakout [[:field 1 nil]] - :order-by [[:asc [:field 1 nil]]]}))) + :order-by [[:asc [:field 1 nil]]]})))))) + +(deftest ^:parallel add-order-bys-for-breakouts-test-4 + (testing "we should add order-bys for breakout clauses" + (testing "...but not if the Field is already in an order-by" (is (= {:source-table 1 :breakout [[:field 1 nil]] :order-by [[:desc [:field 1 nil]]]} (#'qp.add-implicit-clauses/add-implicit-breakout-order-by {:source-table 1 :breakout [[:field 1 nil]] - :order-by [[:desc [:field 1 nil]]]}))) + :order-by [[:desc [:field 1 nil]]]})))))) + +(deftest ^:parallel add-order-bys-for-breakouts-test-5 + (testing "we should add order-bys for breakout clauses" + (testing "...but not if the Field is already in an order-by" (testing "With a datetime-field" (is (= {:source-table 1 :breakout [[:field 1 {:temporal-unit :day}]] - :order-by [[:asc [:field 1 nil]]]} + :order-by [[:asc [:field 1 {:temporal-unit :day}]]]} (#'qp.add-implicit-clauses/add-implicit-breakout-order-by {:source-table 1 :breakout [[:field 1 {:temporal-unit :day}]] - :order-by [[:asc [:field 1 nil]]]}))))))) + :order-by [[:asc [:field 1 {:temporal-unit :day}]]]}))))))) (defn- add-implicit-fields [inner-query] (if (qp.store/initialized?) diff --git a/test/metabase/query_processor/middleware/cumulative_aggregations_test.clj b/test/metabase/query_processor/middleware/cumulative_aggregations_test.clj index af860dbdcd51b98312dc4866e61207f10b3613bf..7f81f5e86aa54f3c99587d3431fe13a9ae1a97fd 100644 --- a/test/metabase/query_processor/middleware/cumulative_aggregations_test.clj +++ b/test/metabase/query_processor/middleware/cumulative_aggregations_test.clj @@ -1,9 +1,12 @@ (ns metabase.query-processor.middleware.cumulative-aggregations-test (:require [clojure.test :refer :all] + [metabase.driver :as driver] + [metabase.lib.test-metadata :as meta] [metabase.query-processor.middleware.cumulative-aggregations :as - qp.cumulative-aggregations])) + qp.cumulative-aggregations] + [metabase.query-processor.store :as qp.store])) (deftest ^:parallel add-values-from-last-row-test (are [expected indecies] (= expected @@ -80,16 +83,23 @@ :type :query :query {:source-table 1, :aggregation [[:* [:cum-count] 1]]}}))))) +(driver/register! ::no-window-function-driver) + +(defmethod driver/database-supports? [::no-window-function-driver :window-functions] + [_driver _feature _database] + false) (defn- handle-cumulative-aggregations [query] - (let [query (#'qp.cumulative-aggregations/rewrite-cumulative-aggregations query) - rff (qp.cumulative-aggregations/sum-cumulative-aggregation-columns query (constantly conj)) - rf (rff nil)] - (transduce identity rf [[1 1] - [2 2] - [3 3] - [4 4] - [5 5]]))) + (driver/with-driver ::no-window-function-driver + (qp.store/with-metadata-provider meta/metadata-provider + (let [query (#'qp.cumulative-aggregations/rewrite-cumulative-aggregations query) + rff (qp.cumulative-aggregations/sum-cumulative-aggregation-columns query (constantly conj)) + rf (rff nil)] + (transduce identity rf [[1 1] + [2 2] + [3 3] + [4 4] + [5 5]]))))) (deftest ^:parallel e2e-test (testing "make sure we take breakout fields into account" @@ -99,7 +109,9 @@ :type :query :query {:source-table 1 :breakout [[:field 1 nil]] - :aggregation [[:cum-sum [:field 1 nil]]]}})))) + :aggregation [[:cum-sum [:field 1 nil]]]}}))))) + +(deftest ^:parallel e2e-test-2 (testing "make sure we sum up cumulative aggregations inside expressions correctly" (testing "we shouldn't be doing anything special with the expressions, let the database figure that out. We will just SUM" (is (= [[1 1] [2 3] [3 6] [4 10] [5 15]] diff --git a/test/metabase/query_processor/util/add_alias_info_test.clj b/test/metabase/query_processor/util/add_alias_info_test.clj index d68c4fe4f7d1c031c9e322f26208e0278fa3f697..c8e8d411a5d0c56f79139cd24dace806b061580b 100644 --- a/test/metabase/query_processor/util/add_alias_info_test.clj +++ b/test/metabase/query_processor/util/add_alias_info_test.clj @@ -10,8 +10,7 @@ [metabase.lib.test-util :as lib.tu] [metabase.lib.test-util.macros :as lib.tu.macros] [metabase.lib.test-util.metadata-providers.mock :as providers.mock] - [metabase.query-processor.middleware.fix-bad-references - :as fix-bad-refs] + [metabase.query-processor.middleware.fix-bad-references :as fix-bad-refs] [metabase.query-processor.preprocess :as qp.preprocess] [metabase.query-processor.store :as qp.store] [metabase.query-processor.util.add-alias-info :as add] @@ -838,3 +837,45 @@ (add-alias-info {:type :query :database (meta/id) :query {:source-table "card__3"}})))))) + +(deftest ^:parallel handle-multiple-orders-bys-on-same-field-correctly-test + (testing "#40993" + (let [query (lib.tu.macros/mbql-query orders + {:aggregation [[:count]] + :breakout [[:field %created-at {:temporal-unit :month}] + [:field %created-at {:temporal-unit :day}]]})] + (qp.store/with-metadata-provider meta/metadata-provider + (driver/with-driver :h2 + (is (=? {:query {:source-table (meta/id :orders) + :breakout [[:field + (meta/id :orders :created-at) + {:temporal-unit :month + ::add/source-alias "CREATED_AT" + ::add/desired-alias "CREATED_AT" + ::add/position 0}] + [:field + (meta/id :orders :created-at) + {:temporal-unit :day + ::add/source-alias "CREATED_AT" + ::add/desired-alias "CREATED_AT_2" + ::add/position 1}]] + :aggregation [[:aggregation-options + [:count] + {::add/source-alias "count" + ::add/position 2 + ::add/desired-alias "count"}]] + :order-by [[:asc + [:field + (meta/id :orders :created-at) + {:temporal-unit :month + ::add/source-alias "CREATED_AT" + ::add/desired-alias "CREATED_AT" + ::add/position 0}]] + [:asc + [:field + (meta/id :orders :created-at) + {:temporal-unit :day + ::add/source-alias "CREATED_AT" + ::add/desired-alias "CREATED_AT_2" + ::add/position 1}]]]}} + (add/add-alias-info (qp.preprocess/preprocess query))))))))) diff --git a/test/metabase/query_processor_test/aggregation_test.clj b/test/metabase/query_processor_test/aggregation_test.clj index 662366a19e0a02576b417543ff7e70a78a3aed53..7ecd8252adf8285b8c9c5b672f371b6f02d430ad 100644 --- a/test/metabase/query_processor_test/aggregation_test.clj +++ b/test/metabase/query_processor_test/aggregation_test.clj @@ -116,11 +116,6 @@ {:aggregation [[:max $latitude]] :breakout [$price]})))))) - -;;; +----------------------------------------------------------------------------------------------------------------+ -;;; | MULTIPLE AGGREGATIONS | -;;; +----------------------------------------------------------------------------------------------------------------+ - (deftest ^:parallel multiple-aggregations-test (mt/test-drivers (mt/normal-drivers) (testing "two aggregations" @@ -146,132 +141,6 @@ (mt/run-mbql-query venues {:aggregation [[:count] [:count]]}))))))) - -;;; ------------------------------------------------- CUMULATIVE SUM ------------------------------------------------- - -(deftest ^:parallel cumulative-sum-test - (mt/test-drivers (mt/normal-drivers) - (testing "cum_sum w/o breakout should be treated the same as sum" - (let [result (mt/run-mbql-query users - {:aggregation [[:cum-sum $id]]})] - (is (=? [{:display_name "Cumulative sum of ID", - :source :aggregation}] - (-> result :data :cols))) - (is (= [[120]] - (mt/formatted-rows [int] - result))))))) - -(deftest ^:parallel cumulative-sum-test-2 - (mt/test-drivers (mt/normal-drivers) - (testing " Simple cumulative sum where breakout field is same as cum_sum field" - (is (= [[ 1 1] - [ 2 3] - [ 3 6] - [ 4 10] - [ 5 15] - [ 6 21] - [ 7 28] - [ 8 36] - [ 9 45] - [10 55] - [11 66] - [12 78] - [13 91] - [14 105] - [15 120]] - (mt/formatted-rows [int int] - (mt/run-mbql-query users - {:aggregation [[:cum-sum $id]] - :breakout [$id]}))))))) - -(deftest ^:parallel cumulative-sum-test-3 - (mt/test-drivers (mt/normal-drivers) - (testing " Cumulative sum w/ a different breakout field" - (is (= [["Broen Olujimi" 14] - ["Conchúr Tihomir" 21] - ["Dwight Gresham" 34] - ["Felipinho Asklepios" 36] - ["Frans Hevel" 46] - ["Kaneonuskatew Eiran" 49] - ["Kfir Caj" 61] - ["Nils Gotam" 70] - ["Plato Yeshua" 71] - ["Quentin Sören" 76] - ["Rüstem Hebel" 91] - ["Shad Ferdynand" 97] - ["Simcha Yan" 101] - ["Spiros Teofil" 112] - ["Szymon Theutrich" 120]] - (mt/formatted-rows [str int] - (mt/run-mbql-query users - {:aggregation [[:cum-sum $id]] - :breakout [$name]}))))))) - -(deftest ^:parallel cumulative-sum-test-4 - (mt/test-drivers (mt/normal-drivers) - (testing " Cumulative sum w/ a different breakout field that requires grouping" - (is (= [[1 1211] - [2 4066] - [3 4681] - [4 5050]] - (mt/formatted-rows [int int] - (mt/run-mbql-query venues - {:aggregation [[:cum-sum $id]] - :breakout [$price]}))))))) - -(deftest ^:parallel cumulative-count-test - (mt/test-drivers (mt/normal-drivers) - (testing "cumulative count aggregations" - (testing "w/o breakout should be treated the same as count" - (is (= {:rows [[15]] - :cols [(qp.test-util/aggregate-col :cum-count :users :id)]} - (qp.test-util/rows-and-cols - (mt/format-rows-by [int] - (mt/run-mbql-query users - {:aggregation [[:cum-count $id]]}))))))))) - -(deftest ^:parallel cumulative-count-with-breakout-test - (mt/test-drivers (mt/normal-drivers) - (testing "w/ breakout on field with distinct values" - (is (=? {:rows [["Broen Olujimi" 1] - ["Conchúr Tihomir" 2] - ["Dwight Gresham" 3] - ["Felipinho Asklepios" 4] - ["Frans Hevel" 5] - ["Kaneonuskatew Eiran" 6] - ["Kfir Caj" 7] - ["Nils Gotam" 8] - ["Plato Yeshua" 9] - ["Quentin Sören" 10] - ["Rüstem Hebel" 11] - ["Shad Ferdynand" 12] - ["Simcha Yan" 13] - ["Spiros Teofil" 14] - ["Szymon Theutrich" 15]] - :cols [(qp.test-util/breakout-col :users :name) - (qp.test-util/aggregate-col :cum-count :users :id)]} - (qp.test-util/rows-and-cols - (mt/format-rows-by [str int] - (mt/run-mbql-query users - {:aggregation [[:cum-count $id]] - :breakout [$name]})))))))) - - -(deftest ^:parallel cumulative-count-with-breakout-test-2 - (mt/test-drivers (mt/normal-drivers) - (testing "w/ breakout on field that requires grouping" - (is (=? {:cols [(qp.test-util/breakout-col :venues :price) - (qp.test-util/aggregate-col :cum-count :venues :id)] - :rows [[1 22] - [2 81] - [3 94] - [4 100]]} - (qp.test-util/rows-and-cols - (mt/format-rows-by [int int] - (mt/run-mbql-query venues - {:aggregation [[:cum-count $id]] - :breakout [$price]})))))))) - (deftest field-settings-for-aggregate-fields-test (testing "Does `:settings` show up for aggregate Fields?" (tu/with-temp-vals-in-db Field (data/id :venues :price) {:settings {:is_priceless false}} diff --git a/test/metabase/query_processor_test/cumulative_aggregation_test.clj b/test/metabase/query_processor_test/cumulative_aggregation_test.clj new file mode 100644 index 0000000000000000000000000000000000000000..0d283ecfa28c38e7cc0aacac51e4a98fab30e5b2 --- /dev/null +++ b/test/metabase/query_processor_test/cumulative_aggregation_test.clj @@ -0,0 +1,248 @@ +(ns metabase.query-processor-test.cumulative-aggregation-test + (:require + [clojure.test :refer :all] + [java-time.api :as t] + [metabase.lib.core :as lib] + [metabase.lib.metadata :as lib.metadata] + [metabase.lib.metadata.jvm :as lib.metadata.jvm] + [metabase.query-processor :as qp] + [metabase.test :as mt])) + +(defn- ->local-date [t] + (t/local-date + (cond-> t + (instance? java.time.Instant t) + (t/zoned-date-time (t/zone-id "UTC"))))) + +(deftest ^:parallel cumulative-sum-test + (mt/test-drivers (mt/normal-drivers) + (testing "cum_sum w/o breakout should be treated the same as sum" + (let [result (mt/run-mbql-query users + {:aggregation [[:cum-sum $id]]})] + (is (=? [{:display_name "Cumulative sum of ID", + :source :aggregation}] + (-> result :data :cols))) + (is (= [[120]] + (mt/formatted-rows [int] + result))))))) + +(deftest ^:parallel cumulative-sum-test-2 + (mt/test-drivers (mt/normal-drivers) + (testing "Simple cumulative sum where breakout field is same as cum_sum field" + (let [query (mt/mbql-query users + {:aggregation [[:cum-sum $id]] + :breakout [$id]})] + (mt/with-native-query-testing-context query + (is (= [[1 1] + [2 3] + [3 6] + [4 10] + [5 15] + [6 21] + [7 28] + [8 36] + [9 45] + [10 55] + [11 66] + [12 78] + [13 91] + [14 105] + [15 120]] + (mt/formatted-rows [int int] + (qp/process-query query))))))))) + +(deftest ^:parallel cumulative-sum-test-3 + (mt/test-drivers (mt/normal-drivers) + (testing "Cumulative sum w/ a different breakout field" + (let [query (mt/mbql-query users + {:aggregation [[:cum-sum $id]] + :breakout [$name]})] + (mt/with-native-query-testing-context query + (is (= [["Broen Olujimi" 14] + ["Conchúr Tihomir" 21] + ["Dwight Gresham" 34] + ["Felipinho Asklepios" 36] + ["Frans Hevel" 46] + ["Kaneonuskatew Eiran" 49] + ["Kfir Caj" 61] + ["Nils Gotam" 70] + ["Plato Yeshua" 71] + ["Quentin Sören" 76] + ["Rüstem Hebel" 91] + ["Shad Ferdynand" 97] + ["Simcha Yan" 101] + ["Spiros Teofil" 112] + ["Szymon Theutrich" 120]] + (mt/formatted-rows [str int] + (qp/process-query query))))))))) + +(deftest ^:parallel cumulative-sum-test-4 + (mt/test-drivers (mt/normal-drivers) + (testing "Cumulative sum w/ a different breakout field that requires grouping" + (let [query (mt/mbql-query venues + {:aggregation [[:cum-sum $id]] + :breakout [$price]})] + (mt/with-native-query-testing-context query + (is (= [[1 1211] + [2 4066] + [3 4681] + [4 5050]] + (mt/formatted-rows [int int] + (qp/process-query query))))))))) + +(deftest ^:parallel cumulative-sum-with-bucketed-breakout-test + (mt/test-drivers (mt/normal-drivers) + (testing "cumulative sum with a temporally bucketed breakout" + (let [metadata-provider (lib.metadata.jvm/application-database-metadata-provider (mt/id)) + orders (lib.metadata/table metadata-provider (mt/id :orders)) + orders-created-at (lib.metadata/field metadata-provider (mt/id :orders :created_at)) + orders-total (lib.metadata/field metadata-provider (mt/id :orders :total)) + query (-> (lib/query metadata-provider orders) + (lib/breakout (lib/with-temporal-bucket orders-created-at :month)) + (lib/aggregate (lib/cum-sum orders-total)) + (lib/limit 3) + (assoc-in [:middleware :format-rows?] false))] + (mt/with-native-query-testing-context query + (is (= [[#t "2016-04-01" 52.76] + [#t "2016-05-01" 1318.49] + [#t "2016-06-01" 3391.41]] + (mt/formatted-rows [->local-date 2.0] + (qp/process-query query))))))))) + +(deftest ^:parallel cumulative-count-test + (mt/test-drivers (mt/normal-drivers) + (testing "cumulative count aggregations" + (testing "w/o breakout should be treated the same as count" + (let [query (mt/mbql-query users + {:aggregation [[:cum-count $id]]})] + (mt/with-native-query-testing-context query + (is (= [[15]] + (mt/formatted-rows [int] + (qp/process-query query)))))))))) + +(deftest ^:parallel cumulative-count-with-breakout-test + (mt/test-drivers (mt/normal-drivers) + (testing "w/ breakout on field with distinct values" + (let [query (mt/mbql-query users + {:aggregation [[:cum-count $id]] + :breakout [$name]})] + (mt/with-native-query-testing-context query + (is (= [["Broen Olujimi" 1] + ["Conchúr Tihomir" 2] + ["Dwight Gresham" 3] + ["Felipinho Asklepios" 4] + ["Frans Hevel" 5] + ["Kaneonuskatew Eiran" 6] + ["Kfir Caj" 7] + ["Nils Gotam" 8] + ["Plato Yeshua" 9] + ["Quentin Sören" 10] + ["Rüstem Hebel" 11] + ["Shad Ferdynand" 12] + ["Simcha Yan" 13] + ["Spiros Teofil" 14] + ["Szymon Theutrich" 15]] + (mt/formatted-rows [str int] + (qp/process-query query))))))))) + +(deftest ^:parallel cumulative-count-with-breakout-test-2 + (mt/test-drivers (mt/normal-drivers) + (testing "w/ breakout on field that requires grouping" + (let [query (mt/mbql-query venues + {:aggregation [[:cum-count $id]] + :breakout [$price]})] + (mt/with-native-query-testing-context query + (is (= [[1 22] + [2 81] + [3 94] + [4 100]] + (mt/formatted-rows [int int] + (qp/process-query query))))))))) + +(deftest ^:parallel cumulative-count-with-multiple-breakouts-test + (mt/test-drivers (mt/normal-drivers) + (let [query (-> (mt/mbql-query orders + {:aggregation [[:cum-count]] + :breakout [!year.created_at !month.created_at] + :limit 3}) + (assoc-in [:middleware :format-rows?] false))] + (mt/with-native-query-testing-context query + (is (= [[#t "2016-01-01" #t "2016-04-01" 1] + [#t "2016-01-01" #t "2016-05-01" 20] + [#t "2016-01-01" #t "2016-06-01" 57]] + (mt/formatted-rows [->local-date ->local-date int] + (qp/process-query query)))))))) + +(deftest ^:parallel cumulative-count-without-field-test + (mt/test-drivers (mt/normal-drivers) + (testing "cumulative count without a field" + (let [query (mt/mbql-query venues + {:aggregation [[:cum-count]] + :breakout [$price]})] + (mt/with-native-query-testing-context query + (is (= [[1 22] + [2 81] + [3 94] + [4 100]] + (mt/formatted-rows [int int] + (qp/process-query query))))))))) + +(deftest ^:parallel cumulative-count-with-bucketed-breakout-test + (mt/test-drivers (mt/normal-drivers) + (testing "cumulative count with a temporally bucketed breakout" + (mt/test-drivers (mt/normal-drivers-with-feature :window-functions) + (let [metadata-provider (lib.metadata.jvm/application-database-metadata-provider (mt/id)) + orders (lib.metadata/table metadata-provider (mt/id :orders)) + orders-created-at (lib.metadata/field metadata-provider (mt/id :orders :created_at)) + query (-> (lib/query metadata-provider orders) + (lib/breakout (lib/with-temporal-bucket orders-created-at :month)) + (lib/aggregate (lib/cum-count)) + (lib/limit 3) + (assoc-in [:middleware :format-rows?] false))] + (mt/with-native-query-testing-context query + (is (= [[#t "2016-04-01" 1] + [#t "2016-05-01" 20] + [#t "2016-06-01" 57]] + (mt/formatted-rows [->local-date int] + (qp/process-query query)))))))))) + +(deftest ^:parallel cumulative-sum-with-multiple-breakouts-test + (mt/test-drivers (mt/normal-drivers) + (let [query (-> (mt/mbql-query orders + {:aggregation [[:cum-sum $total]] + :breakout [!year.created_at !month.created_at] + :limit 3}) + (assoc-in [:middleware :format-rows?] false))] + (mt/with-native-query-testing-context query + (is (= [[#t "2016-01-01" #t "2016-04-01" 52] + [#t "2016-01-01" #t "2016-05-01" 1318] + [#t "2016-01-01" #t "2016-06-01" 3391]] + (mt/formatted-rows [->local-date ->local-date int] + (qp/process-query query)))))))) + +(deftest ^:parallel cumulative-count-and-sum-in-expressions-test + (testing "Cumulative count should work inside expressions (#13634, #15118)" + (mt/test-drivers (mt/normal-drivers-with-feature :window-functions) + (let [metadata-provider (lib.metadata.jvm/application-database-metadata-provider (mt/id)) + orders (lib.metadata/table metadata-provider (mt/id :orders)) + orders-created-at (lib.metadata/field metadata-provider (mt/id :orders :created_at)) + orders-total (lib.metadata/field metadata-provider (mt/id :orders :total)) + query (-> (lib/query metadata-provider orders) + ;; 1. month + (lib/breakout (lib/with-temporal-bucket orders-created-at :month)) + ;; 2. cumulative count of orders + (lib/aggregate (lib/cum-count)) + ;; 3. cumulative sum of orders + (lib/aggregate (lib/cum-sum orders-total)) + ;; 4. cumulative average order price (cumulative sum of total / cumulative count) + (lib/aggregate (lib// (lib/cum-sum orders-total) + (lib/cum-count))) + (lib/limit 3) + (assoc-in [:middleware :format-rows?] false))] + (mt/with-native-query-testing-context query + ;; 1 2 3 4 + (is (= [[#t "2016-04-01" 1 52.76 52.76] + [#t "2016-05-01" 20 1318.49 65.92] + [#t "2016-06-01" 57 3391.41 59.50]] + (mt/formatted-rows [->local-date int 2.0 2.0] + (qp/process-query query))))))))) diff --git a/test/metabase/query_processor_test/expression_aggregations_test.clj b/test/metabase/query_processor_test/expression_aggregations_test.clj index 653c72b1072b6f113ccb6b6c8c4ff955fe9068ec..eaaf19f0683a33b80722c7ec980984719b45494d 100644 --- a/test/metabase/query_processor_test/expression_aggregations_test.clj +++ b/test/metabase/query_processor_test/expression_aggregations_test.clj @@ -332,33 +332,3 @@ {:aggregation [[:aggregation-options [:sum $rating] {:name "MyCE"}]] :breakout [$category] :order-by [[:asc [:aggregation 0]]]})))))))) - -;;; this is a repo for #15118, which is not fixed yet. - -#_(deftest ^:parallel multiple-cumulative-sums-test - (mt/test-drivers (mt/normal-drivers-with-feature :expression-aggregations) - (testing "The results of divide or multiply two CumulativeSum should be correct (#15118)" - (mt/dataset test-data - (is (= [["2016-01-01T00:00:00Z" 3236 2458.0 5694.0 1] - ["2017-01-01T00:00:00Z" 17587 14995.0 32582.0 2] - ["2018-01-01T00:00:00Z" 40381 35366.5 75747.5 3] - ["2019-01-01T00:00:00Z" 65835 58002.7 123837.7 4] - ["2020-01-01T00:00:00Z" 69540 64923.0 134463.0 5]] - (mt/formatted-rows [identity int 2.0 2.0 int] - (mt/run-mbql-query orders - {:aggregation - [[:aggregation-options [:cum-sum $quantity] {:display-name "C1"}] - [:aggregation-options - [:cum-sum $product_id->products.rating] - {:display-name "C2"}] - [:aggregation-options - [:+ - [:cum-sum $quantity] - [:cum-sum $product_id->products.rating]] - {:display-name "C3"}] - [:aggregation-options - [:* - [:cum-sum $quantity] - [:cum-sum $product_id->products.rating]] - {:display-name "C4"}]] - :breakout [!year.created_at]})))))))) diff --git a/test/metabase/test/data/h2.clj b/test/metabase/test/data/h2.clj index ee1657a3a012389bd705689c25959b46f995f999..c50c17fdf6715a747533b86583ff4af2c50501c1 100644 --- a/test/metabase/test/data/h2.clj +++ b/test/metabase/test/data/h2.clj @@ -100,13 +100,19 @@ (merge ((get-method tx/aggregate-column-info ::tx/test-extensions) driver ag-type) (when (= ag-type :count) - {:base_type :type/BigInteger}))) + {:base_type :type/BigInteger}) + (when (= ag-type :cum-count) + {:base_type :type/Decimal}))) ([driver ag-type field] (merge ((get-method tx/aggregate-column-info ::tx/test-extensions) driver ag-type field) - (when (#{:sum :cum-count} ag-type) - {:base_type :type/BigInteger})))) + (when (= ag-type :sum) + {:base_type :type/BigInteger}) + ;; because it's implemented as sum(count(field)) OVER (...). But shouldn't a sum of integers be an + ;; integer? :thinking_face: + (when (= ag-type :cum-count) + {:base_type :type/Decimal})))) (defmethod execute/execute-sql! :h2 [driver _ dbdef sql] diff --git a/test/metabase/test/data/interface.clj b/test/metabase/test/data/interface.clj index b0632d8b418e1c8d18099361351d6712b337d2c4..a236790c1d69db99dbec206bddf7370015b0f147 100644 --- a/test/metabase/test/data/interface.clj +++ b/test/metabase/test/data/interface.clj @@ -428,10 +428,14 @@ (defmethod aggregate-column-info ::test-extensions ([_ aggregation-type] (assert (#{:count :cum-count} aggregation-type)) - {:base_type :type/BigInteger + {:base_type (case aggregation-type + :count :type/BigInteger + :cum-count :type/Decimal) :semantic_type :type/Quantity :name "count" - :display_name "Count" + :display_name (case aggregation-type + :count "Count" + :cum-count "Cumulative count") :source :aggregation :field_ref [:aggregation 0]}) @@ -443,7 +447,7 @@ :query {:source-table table-id :aggregation [[aggregation-type [:field-id field-id]]]}})) (when (= aggregation-type :cum-count) - {:base_type :type/BigInteger + {:base_type :type/Decimal :semantic_type :type/Quantity}))))