diff --git a/src/metabase/driver/generic_sql/query_processor.clj b/src/metabase/driver/generic_sql/query_processor.clj index 8c7052f8990b4fb6b2e4013fb0c27332ff5cd3b2..74652457fef5abe36ed9c6488131d365c27d11db 100644 --- a/src/metabase/driver/generic_sql/query_processor.clj +++ b/src/metabase/driver/generic_sql/query_processor.clj @@ -142,12 +142,19 @@ ;; for division we want to go ahead and convert any integer args to floats, because something like field / 2 will do ;; integer division and give us something like 1.0 where we would rather see something like 1.5 +;; +;; also, we want to gracefully handle situations where the column is ZERO and just swap it out with NULL instead, so +;; we don't get divide by zero errors. SQL DBs always return NULL when dividing by NULL (AFAIK) (defmethod ->honeysql [Object :/] [driver [_ & args]] - (apply hsql/call :/ (for [arg args] - (->honeysql driver (if (integer? arg) - (double arg) - arg))))) + (let [args (for [arg args] + (->honeysql driver (if (integer? arg) + (double arg) + arg)))] + (apply hsql/call :/ (first args) (for [arg (rest args)] + (hsql/call :case + (hsql/call := arg 0) nil + :else arg))))) (defmethod ->honeysql [Object :named] [driver [_ ag ag-name]] (->honeysql driver ag)) diff --git a/src/metabase/mbql/schema.clj b/src/metabase/mbql/schema.clj index 9b5027e851878a7db3c326440fe43e29d26a3414..4cf3e7705d21d0f420b011af5598ff71ac400d09 100644 --- a/src/metabase/mbql/schema.clj +++ b/src/metabase/mbql/schema.clj @@ -219,7 +219,7 @@ ;; Expressions are "calculated column" definitions, defined once and then used elsewhere in the MBQL query. -(declare ExpressionDef) +(declare ArithmeticExpression) (def ^:private ExpressionArg (s/conditional @@ -227,7 +227,7 @@ s/Num (partial is-clause? #{:+ :- :/ :*}) - (s/recursive #'ExpressionDef) + (s/recursive #'ArithmeticExpression) :else Field)) @@ -237,18 +237,20 @@ (defclause ^{:requires-features #{:expressions}} /, x ExpressionArg, y ExpressionArg, more (rest ExpressionArg)) (defclause ^{:requires-features #{:expressions}} *, x ExpressionArg, y ExpressionArg, more (rest ExpressionArg)) -(def ExpressionDef - "Schema for a valid expression definition, as defined under the top-level MBQL `:expressions`." +(def ^:private ArithmeticExpression + "Schema for the definition of an arithmetic expression." (one-of + - / *)) - -;;; -------------------------------------------------- Aggregations -------------------------------------------------- - -(def ^:private FieldOrExpressionDef +(def FieldOrExpressionDef + "Schema for anything that is accepted as a top-level expression definition, either an arithmetic expression such as a + `:+` clause or a Field clause such as `:field-id`." (s/if (partial is-clause? #{:+ :- :* :/}) - ExpressionDef + ArithmeticExpression Field)) + +;;; -------------------------------------------------- Aggregations -------------------------------------------------- + ;; For all of the 'normal' Aggregations below (excluding Metrics) fields are implicit Field IDs ;; cum-sum and cum-count are SUGAR because they're implemented in middleware. They clauses are swapped out with @@ -549,7 +551,7 @@ (s/optional-key :aggregation) (su/non-empty [Aggregation]) (s/optional-key :breakout) (su/non-empty [Field]) ; TODO - expressions keys should be strings; fix this when we get a chance - (s/optional-key :expressions) {s/Keyword ExpressionDef} + (s/optional-key :expressions) {s/Keyword FieldOrExpressionDef} ;; TODO - should this be `distinct-non-empty`? (s/optional-key :fields) (su/non-empty [Field]) (s/optional-key :filter) Filter diff --git a/src/metabase/mbql/util.clj b/src/metabase/mbql/util.clj index 9489c0be21aa6dbec8e54a799da76c0461d217e5..e36f21020add389780990958a8fa05ec2fc2cf50 100644 --- a/src/metabase/mbql/util.clj +++ b/src/metabase/mbql/util.clj @@ -353,7 +353,7 @@ (-> query :query :join-tables)))) -(s/defn expression-with-name :- mbql.s/ExpressionDef +(s/defn expression-with-name :- mbql.s/FieldOrExpressionDef "Return the `Expression` referenced by a given `expression-name`." [query :- mbql.s/Query, expression-name :- su/NonBlankString] (or (get-in query, [:query :expressions (keyword expression-name)]) diff --git a/test/metabase/query_processor_test/expressions_test.clj b/test/metabase/query_processor_test/expressions_test.clj index 7982c9948ac9bc25b0e7c571634bb32af84f3573..3d975118916bef0c80faa7b2e421c9e808750398 100644 --- a/test/metabase/query_processor_test/expressions_test.clj +++ b/test/metabase/query_processor_test/expressions_test.clj @@ -96,3 +96,79 @@ {:aggregation [:named [:sum [:* $price -1]] "x"] :breakout [$category_id]}) (get-in [:data :cols]))))) + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | HANDLING NULLS AND ZEROES | +;;; +----------------------------------------------------------------------------------------------------------------+ + +;; "bird scarcity" is a scientific metric based on the number of birds seen in a given day +;; (at least for the purposes of the tests below) +;; +;; e.g. scarcity = 100.0 / num-birds +(defmacro ^:private calculate-bird-scarcity [formula & [filter-clause]] + `(metabase.test.data/dataset ~'daily-bird-counts + (->> (data/run-mbql-query ~'bird-count + {:expressions {"bird-scarcity" ~formula} + :fields [[:expression "bird-scarcity"]] + :filter ~filter-clause + :order-by [[:asc [:field-id $date]]] + :limit 10}) + rows + (format-rows-by [(partial u/round-to-decimals 2)])))) + +;; hey... expressions should work if they are just a Field! (Also, this lets us take a peek at the raw values being +;; used to calculate the formulas below, so we can tell at a glance if they're right without referring to the EDN def) +(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions) + [[nil] [0.0] [0.0] [10.0] [8.0] [5.0] [5.0] [nil] [0.0] [0.0]] + (calculate-bird-scarcity [:field-id $count])) + +;; do expressions automatically handle division by zero? Should return `nil` in the results for places where that was +;; attempted +(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions) + [[nil] [nil] [10.0] [12.5] [20.0] [20.0] [nil] [nil] [9.09] [7.14]] + (calculate-bird-scarcity [:/ 100.0 [:field-id $count]] + [:!= $count nil])) + +;; do expressions handle division by `nil`? Should return `nil` in the results for places where that was attempted +(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions) + [[nil] [10.0] [12.5] [20.0] [20.0] [nil] [9.09] [7.14] [12.5] [7.14]] + (calculate-bird-scarcity [:/ 100.0 [:field-id $count]] + [:or + [:= $count nil] + [:!= $count 0]])) + +;; can we handle BOTH NULLS AND ZEROES AT THE SAME TIME???? +(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions) + [[nil] [nil] [nil] [10.0] [12.5] [20.0] [20.0] [nil] [nil] [nil]] + (calculate-bird-scarcity [:/ 100.0 [:field-id $count]])) + +;; ok, what if we use multiple args to divide, and more than one is zero? +(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions) + [[nil] [nil] [nil] [1.0] [1.56] [4.0] [4.0] [nil] [nil] [nil]] + (calculate-bird-scarcity [:/ 100.0 [:field-id $count] [:field-id $count]])) + +;; are nulls/zeroes still handled appropriately when nested inside other expressions? +(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions) + [[nil] [nil] [nil] [20.0] [25.0] [40.0] [40.0] [nil] [nil] [nil]] + (calculate-bird-scarcity [:* [:/ 100.0 [:field-id $count]] 2])) + +;; if a zero is present in the NUMERATOR we should return ZERO and not NULL +;; (`0 / 10 = 0`; `10 / 0 = NULL`, at least as far as MBQL is concerned) +(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions) + [[nil] [0.0] [0.0] [1.0] [0.8] [0.5] [0.5] [nil] [0.0] [0.0]] + (calculate-bird-scarcity [:/ [:field-id $count] 10])) + +;; can addition handle nulls & zeroes? +(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions) + [[nil] [10.0] [10.0] [20.0] [18.0] [15.0] [15.0] [nil] [10.0] [10.0]] + (calculate-bird-scarcity [:+ [:field-id $count] 10])) + +;; can subtraction handle nulls & zeroes? +(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions) + [[nil] [10.0] [10.0] [0.0] [2.0] [5.0] [5.0] [nil] [10.0] [10.0]] + (calculate-bird-scarcity [:- 10 [:field-id $count]])) + +;; can multiplications handle nulls & zeros? +(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions) + [[nil] [0.0] [0.0] [10.0] [8.0] [5.0] [5.0] [nil] [0.0] [0.0]] + (calculate-bird-scarcity [:* 1 [:field-id $count]])) diff --git a/test/metabase/test/data/dataset_definitions.clj b/test/metabase/test/data/dataset_definitions.clj index 0d4c6b2f719ce680edae59f0295610e178d628a5..36172fe9f31d92a4e47cd5b32df80a3985208eec 100644 --- a/test/metabase/test/data/dataset_definitions.clj +++ b/test/metabase/test/data/dataset_definitions.clj @@ -24,6 +24,10 @@ ;; sender and receiver -- allowing us to test situations where multiple joins for a *single* table should occur. (di/def-database-definition-edn avian-singles) +;; A small dataset that includes an integer column with some NULL and ZERO values, meant for testing things like +;; expressions to make sure they behave correctly +(di/def-database-definition-edn daily-bird-counts) + (defn- date-only "This function emulates a date only field as it would come from the JDBC driver. The hour/minute/second/millisecond fields should be 0s" diff --git a/test/metabase/test/data/dataset_definitions/daily-bird-counts.edn b/test/metabase/test/data/dataset_definitions/daily-bird-counts.edn new file mode 100644 index 0000000000000000000000000000000000000000..3ce8deb29284d3d048f02842b18866374750d74a --- /dev/null +++ b/test/metabase/test/data/dataset_definitions/daily-bird-counts.edn @@ -0,0 +1,54 @@ +[["bird-count" [{:field-name "date" + :base-type :type/Date} + {:field-name "count" + :base-type :type/Integer}] + [[#inst "2018-09-20" nil] + [#inst "2018-09-21" 0] + [#inst "2018-09-22" 0] + [#inst "2018-09-23" 10] + [#inst "2018-09-24" 8] + [#inst "2018-09-25" 5] + [#inst "2018-09-26" 5] + [#inst "2018-09-27" nil] + [#inst "2018-09-28" 0] + [#inst "2018-09-29" 0] + [#inst "2018-09-30" 11] + [#inst "2018-10-01" 14] + [#inst "2018-10-02" 8] + [#inst "2018-10-03" 14] + [#inst "2018-10-04" nil] + [#inst "2018-10-05" 6] + [#inst "2018-10-06" 4] + [#inst "2018-10-07" 0] + [#inst "2018-10-08" nil] + [#inst "2018-10-09" 3] + [#inst "2018-10-10" 13] + [#inst "2018-10-11" nil] + [#inst "2018-10-12" 14] + [#inst "2018-10-13" 6] + [#inst "2018-10-14" 12] + [#inst "2018-10-15" 13] + [#inst "2018-10-16" 0] + [#inst "2018-10-17" 7] + [#inst "2018-10-18" 10] + [#inst "2018-10-19" nil] + [#inst "2018-10-20" 14] + [#inst "2018-10-21" 17] + [#inst "2018-10-22" 15] + [#inst "2018-10-23" 13] + [#inst "2018-10-24" 10] + [#inst "2018-10-25" 5] + [#inst "2018-10-26" nil] + [#inst "2018-10-27" 3] + [#inst "2018-10-28" 2] + [#inst "2018-10-29" 0] + [#inst "2018-10-30" 13] + [#inst "2018-10-31" nil] + [#inst "2018-11-01" nil] + [#inst "2018-11-02" 7] + [#inst "2018-11-03" 8] + [#inst "2018-11-04" 12] + [#inst "2018-11-05" 10] + [#inst "2018-11-06" 6] + [#inst "2018-11-07" 2] + [#inst "2018-11-08" 1]]]]