Skip to content
Snippets Groups Projects
Unverified Commit e0c1ce2b authored by Cam Saul's avatar Cam Saul
Browse files

Add new bird-count dataset; fix division by zero in MBQL expressions

parent 1cf37548
No related branches found
No related tags found
No related merge requests found
......@@ -142,12 +142,19 @@
;; for division we want to go ahead and convert any integer args to floats, because something like field / 2 will do
;; integer division and give us something like 1.0 where we would rather see something like 1.5
;;
;; also, we want to gracefully handle situations where the column is ZERO and just swap it out with NULL instead, so
;; we don't get divide by zero errors. SQL DBs always return NULL when dividing by NULL (AFAIK)
(defmethod ->honeysql [Object :/]
[driver [_ & args]]
(apply hsql/call :/ (for [arg args]
(->honeysql driver (if (integer? arg)
(double arg)
arg)))))
(let [args (for [arg args]
(->honeysql driver (if (integer? arg)
(double arg)
arg)))]
(apply hsql/call :/ (first args) (for [arg (rest args)]
(hsql/call :case
(hsql/call := arg 0) nil
:else arg)))))
(defmethod ->honeysql [Object :named] [driver [_ ag ag-name]]
(->honeysql driver ag))
......
......@@ -219,7 +219,7 @@
;; Expressions are "calculated column" definitions, defined once and then used elsewhere in the MBQL query.
(declare ExpressionDef)
(declare ArithmeticExpression)
(def ^:private ExpressionArg
(s/conditional
......@@ -227,7 +227,7 @@
s/Num
(partial is-clause? #{:+ :- :/ :*})
(s/recursive #'ExpressionDef)
(s/recursive #'ArithmeticExpression)
:else
Field))
......@@ -237,18 +237,20 @@
(defclause ^{:requires-features #{:expressions}} /, x ExpressionArg, y ExpressionArg, more (rest ExpressionArg))
(defclause ^{:requires-features #{:expressions}} *, x ExpressionArg, y ExpressionArg, more (rest ExpressionArg))
(def ExpressionDef
"Schema for a valid expression definition, as defined under the top-level MBQL `:expressions`."
(def ^:private ArithmeticExpression
"Schema for the definition of an arithmetic expression."
(one-of + - / *))
;;; -------------------------------------------------- Aggregations --------------------------------------------------
(def ^:private FieldOrExpressionDef
(def FieldOrExpressionDef
"Schema for anything that is accepted as a top-level expression definition, either an arithmetic expression such as a
`:+` clause or a Field clause such as `:field-id`."
(s/if (partial is-clause? #{:+ :- :* :/})
ExpressionDef
ArithmeticExpression
Field))
;;; -------------------------------------------------- Aggregations --------------------------------------------------
;; For all of the 'normal' Aggregations below (excluding Metrics) fields are implicit Field IDs
;; cum-sum and cum-count are SUGAR because they're implemented in middleware. They clauses are swapped out with
......@@ -549,7 +551,7 @@
(s/optional-key :aggregation) (su/non-empty [Aggregation])
(s/optional-key :breakout) (su/non-empty [Field])
; TODO - expressions keys should be strings; fix this when we get a chance
(s/optional-key :expressions) {s/Keyword ExpressionDef}
(s/optional-key :expressions) {s/Keyword FieldOrExpressionDef}
;; TODO - should this be `distinct-non-empty`?
(s/optional-key :fields) (su/non-empty [Field])
(s/optional-key :filter) Filter
......
......@@ -353,7 +353,7 @@
(-> query :query :join-tables))))
(s/defn expression-with-name :- mbql.s/ExpressionDef
(s/defn expression-with-name :- mbql.s/FieldOrExpressionDef
"Return the `Expression` referenced by a given `expression-name`."
[query :- mbql.s/Query, expression-name :- su/NonBlankString]
(or (get-in query, [:query :expressions (keyword expression-name)])
......
......@@ -96,3 +96,79 @@
{:aggregation [:named [:sum [:* $price -1]] "x"]
:breakout [$category_id]})
(get-in [:data :cols])))))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | HANDLING NULLS AND ZEROES |
;;; +----------------------------------------------------------------------------------------------------------------+
;; "bird scarcity" is a scientific metric based on the number of birds seen in a given day
;; (at least for the purposes of the tests below)
;;
;; e.g. scarcity = 100.0 / num-birds
(defmacro ^:private calculate-bird-scarcity [formula & [filter-clause]]
`(metabase.test.data/dataset ~'daily-bird-counts
(->> (data/run-mbql-query ~'bird-count
{:expressions {"bird-scarcity" ~formula}
:fields [[:expression "bird-scarcity"]]
:filter ~filter-clause
:order-by [[:asc [:field-id $date]]]
:limit 10})
rows
(format-rows-by [(partial u/round-to-decimals 2)]))))
;; hey... expressions should work if they are just a Field! (Also, this lets us take a peek at the raw values being
;; used to calculate the formulas below, so we can tell at a glance if they're right without referring to the EDN def)
(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions)
[[nil] [0.0] [0.0] [10.0] [8.0] [5.0] [5.0] [nil] [0.0] [0.0]]
(calculate-bird-scarcity [:field-id $count]))
;; do expressions automatically handle division by zero? Should return `nil` in the results for places where that was
;; attempted
(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions)
[[nil] [nil] [10.0] [12.5] [20.0] [20.0] [nil] [nil] [9.09] [7.14]]
(calculate-bird-scarcity [:/ 100.0 [:field-id $count]]
[:!= $count nil]))
;; do expressions handle division by `nil`? Should return `nil` in the results for places where that was attempted
(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions)
[[nil] [10.0] [12.5] [20.0] [20.0] [nil] [9.09] [7.14] [12.5] [7.14]]
(calculate-bird-scarcity [:/ 100.0 [:field-id $count]]
[:or
[:= $count nil]
[:!= $count 0]]))
;; can we handle BOTH NULLS AND ZEROES AT THE SAME TIME????
(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions)
[[nil] [nil] [nil] [10.0] [12.5] [20.0] [20.0] [nil] [nil] [nil]]
(calculate-bird-scarcity [:/ 100.0 [:field-id $count]]))
;; ok, what if we use multiple args to divide, and more than one is zero?
(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions)
[[nil] [nil] [nil] [1.0] [1.56] [4.0] [4.0] [nil] [nil] [nil]]
(calculate-bird-scarcity [:/ 100.0 [:field-id $count] [:field-id $count]]))
;; are nulls/zeroes still handled appropriately when nested inside other expressions?
(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions)
[[nil] [nil] [nil] [20.0] [25.0] [40.0] [40.0] [nil] [nil] [nil]]
(calculate-bird-scarcity [:* [:/ 100.0 [:field-id $count]] 2]))
;; if a zero is present in the NUMERATOR we should return ZERO and not NULL
;; (`0 / 10 = 0`; `10 / 0 = NULL`, at least as far as MBQL is concerned)
(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions)
[[nil] [0.0] [0.0] [1.0] [0.8] [0.5] [0.5] [nil] [0.0] [0.0]]
(calculate-bird-scarcity [:/ [:field-id $count] 10]))
;; can addition handle nulls & zeroes?
(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions)
[[nil] [10.0] [10.0] [20.0] [18.0] [15.0] [15.0] [nil] [10.0] [10.0]]
(calculate-bird-scarcity [:+ [:field-id $count] 10]))
;; can subtraction handle nulls & zeroes?
(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions)
[[nil] [10.0] [10.0] [0.0] [2.0] [5.0] [5.0] [nil] [10.0] [10.0]]
(calculate-bird-scarcity [:- 10 [:field-id $count]]))
;; can multiplications handle nulls & zeros?
(datasets/expect-with-engines (non-timeseries-engines-with-feature :expressions)
[[nil] [0.0] [0.0] [10.0] [8.0] [5.0] [5.0] [nil] [0.0] [0.0]]
(calculate-bird-scarcity [:* 1 [:field-id $count]]))
......@@ -24,6 +24,10 @@
;; sender and receiver -- allowing us to test situations where multiple joins for a *single* table should occur.
(di/def-database-definition-edn avian-singles)
;; A small dataset that includes an integer column with some NULL and ZERO values, meant for testing things like
;; expressions to make sure they behave correctly
(di/def-database-definition-edn daily-bird-counts)
(defn- date-only
"This function emulates a date only field as it would come from the
JDBC driver. The hour/minute/second/millisecond fields should be 0s"
......
[["bird-count" [{:field-name "date"
:base-type :type/Date}
{:field-name "count"
:base-type :type/Integer}]
[[#inst "2018-09-20" nil]
[#inst "2018-09-21" 0]
[#inst "2018-09-22" 0]
[#inst "2018-09-23" 10]
[#inst "2018-09-24" 8]
[#inst "2018-09-25" 5]
[#inst "2018-09-26" 5]
[#inst "2018-09-27" nil]
[#inst "2018-09-28" 0]
[#inst "2018-09-29" 0]
[#inst "2018-09-30" 11]
[#inst "2018-10-01" 14]
[#inst "2018-10-02" 8]
[#inst "2018-10-03" 14]
[#inst "2018-10-04" nil]
[#inst "2018-10-05" 6]
[#inst "2018-10-06" 4]
[#inst "2018-10-07" 0]
[#inst "2018-10-08" nil]
[#inst "2018-10-09" 3]
[#inst "2018-10-10" 13]
[#inst "2018-10-11" nil]
[#inst "2018-10-12" 14]
[#inst "2018-10-13" 6]
[#inst "2018-10-14" 12]
[#inst "2018-10-15" 13]
[#inst "2018-10-16" 0]
[#inst "2018-10-17" 7]
[#inst "2018-10-18" 10]
[#inst "2018-10-19" nil]
[#inst "2018-10-20" 14]
[#inst "2018-10-21" 17]
[#inst "2018-10-22" 15]
[#inst "2018-10-23" 13]
[#inst "2018-10-24" 10]
[#inst "2018-10-25" 5]
[#inst "2018-10-26" nil]
[#inst "2018-10-27" 3]
[#inst "2018-10-28" 2]
[#inst "2018-10-29" 0]
[#inst "2018-10-30" 13]
[#inst "2018-10-31" nil]
[#inst "2018-11-01" nil]
[#inst "2018-11-02" 7]
[#inst "2018-11-03" 8]
[#inst "2018-11-04" 12]
[#inst "2018-11-05" 10]
[#inst "2018-11-06" 6]
[#inst "2018-11-07" 2]
[#inst "2018-11-08" 1]]]]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment