Skip to content
Snippets Groups Projects
Unverified Commit 361cb0bf authored by Cam Saul's avatar Cam Saul
Browse files

Auto-bucket datetime field in filter clauses :calendar: [ci drivers]

parent af1dd5c8
No related branches found
No related tags found
No related merge requests found
Showing with 335 additions and 78 deletions
......@@ -111,7 +111,6 @@
_ (->rvalue &match)))))})
;;; ----------------------------------------------------- filter -----------------------------------------------------
(defmulti ^:private parse-filter mbql.u/dispatch-by-clause-name-or-class)
......@@ -165,8 +164,8 @@
;; remove all clauses that operate on datetime fields or built-in segments because we don't want to handle them
;; here, we'll do that seperately with the filter:interval and handle-filter:built-in-segment stuff below
;;
;; (Recall that `auto-bucket-datetime-breakouts` guarantees all datetime Fields will be wrapped by
;; `:datetime-field` clauses in a fully-preprocessed query.)
;; (Recall that `auto-bucket-datetimes` guarantees all datetime Fields will be wrapped by `:datetime-field`
;; clauses in a fully-preprocessed query.)
(let [filter (parse-filter (mbql.u/replace filter-clause
[:segment (_ :guard mbql.u/ga-id?)] nil
[_ [:datetime-field & _] & _] nil))]
......
......@@ -21,3 +21,7 @@
(def ^{:arglists '([field-clause])} Field?
"Is this a valid Field clause?"
(complement (s/checker mbql.s/Field)))
(def ^{:arglists '([filter-clause])} Filter?
"Is this a valid `:filter` clause?"
(complement (s/checker mbql.s/Filter)))
......@@ -148,8 +148,9 @@
;; `datetime-field` is used to specify DATE BUCKETING for a Field that represents a moment in time of some sort. There
;; is no requirement that all `:type/DateTime` derived Fields be wrapped in `datetime-field`, but for legacy reasons
;; `:field-id` clauses that refer to datetime Fields will be automatically "bucketed" in the `:breakout` clause, but
;; nowhere else. See `auto-bucket-datetime-breakouts` for more details. `:field-id` clauses elsewhere will not be
;; `:field-id` clauses that refer to datetime Fields will be automatically "bucketed" in the `:breakout` and `:filter`
;; clauses, but nowhere else. Auto-bucketing only applies to `:filter` clauses when values for comparison are
;; `yyyy-MM-dd` date strings. See `auto-bucket-datetimes` for more details. `:field-id` clauses elsewhere will not be
;; automatically bucketed, so drivers still need to make sure they do any special datetime handling for plain
;; `:field-id` clauses when their Field derives from `:type/DateTime`.
;;
......
......@@ -389,11 +389,30 @@
(ga-id? id))
(defn datetime-field?
"Does `field` have a base type or special type that derives from `:type/DateTime`?"
"Is `field` used to record something date or time related, i.e. does `field` have a base type or special type that
derives from `:type/DateTime`?
For historical reasons `:type/Time` derivies from `:type/DateTime`, meaning this function will still return true for
Fields that record only time. You can use `datetime-but-not-time-field?` instead if you want to exclude time
Fields."
[field]
(or (isa? (:base_type field) :type/DateTime)
(isa? (:special_type field) :type/DateTime)))
(defn time-field?
"Is `field` used to record a time of day (e.g. hour/minute/second), but not the date itself? i.e. does `field` have a
base type or special type that derives from `:type/Time`?"
[field]
(or (isa? (:base_type field) :type/Time)
(isa? (:special_type field) :type/Time)))
(defn datetime-but-not-time-field?
"Is `field` used to record a specific moment in time, i.e. does `field` have a base type or special type that derives
from `:type/DateTime` but not `:type/Time`?"
[field]
(and (datetime-field? field)
(not (time-field? field))))
;;; --------------------------------- Unique names & transforming ags to have names ----------------------------------
......
......@@ -17,7 +17,7 @@
[add-row-count-and-status :as row-count-and-status]
[add-settings :as add-settings]
[annotate :as annotate]
[auto-bucket-datetime-breakouts :as bucket-datetime]
[auto-bucket-datetimes :as bucket-datetime]
[bind-effective-timezone :as bind-timezone]
[binning :as binning]
[cache :as cache]
......@@ -118,7 +118,7 @@
add-dim/add-remapping
implicit-clauses/add-implicit-clauses
reconcile-bucketing/reconcile-breakout-and-order-by-bucketing
bucket-datetime/auto-bucket-datetime-breakouts
bucket-datetime/auto-bucket-datetimes
resolve-source-table/resolve-source-table
row-count-and-status/add-row-count-and-status
;; ▼▼▼ RESULTS WRAPPING POINT ▼▼▼ All functions *below* will see results WRAPPED in `:data` during POST-PROCESSING
......
(ns metabase.query-processor.middleware.auto-bucket-datetime-breakouts
"Middleware for automatically bucketing unbucketed `:type/DateTime` breakout Fields with `:day` bucketing."
(:require [metabase.mbql
[schema :as mbql.s]
[util :as mbql.u]]
[metabase.models.field :refer [Field]]
[metabase.util :as u]
[metabase.util.schema :as su]
[schema.core :as s]
[toucan.db :as db]))
(def ^:private FieldTypeInfo
{:base_type (s/maybe su/FieldType)
:special_type (s/maybe su/FieldType)
s/Keyword s/Any})
;; Unfortunately these Fields won't be in the store yet since Field resolution can't happen before we add the implicit
;; `:fields` clause, which happens after this
;;
;; TODO - What we could do tho is fetch all the stuff we need for the Store and then save these Fields in the store,
;; which would save a bit of time when we do resolve them
(s/defn ^:private unbucketed-breakouts->field-id->type-info :- {su/IntGreaterThanZero (s/maybe FieldTypeInfo)}
"Fetch a map of Field ID -> type information for the Fields referred to by the `unbucketed-breakouts`."
[unbucketed-breakouts :- (su/non-empty [mbql.s/field-id])]
(u/key-by :id (db/select [Field :id :base_type :special_type]
:id [:in (set (map second unbucketed-breakouts))])))
(s/defn ^:private wrap-unbucketed-datetime-breakouts :- [mbql.s/Field]
"Wrap each breakout in `breakouts` in a `:datetime-field` clause if appropriate; look at corresponing type
information in `field-id->type-inf` to see if we should do so."
[breakouts :- [mbql.s/Field], field-id->type-info :- {su/IntGreaterThanZero (s/maybe FieldTypeInfo)}]
(mbql.u/replace breakouts
;; don't replace anything that's already wrapping a `field-id`
[(_ :guard keyword?) [:field-id _] & _]
&match
[:field-id (_ :guard (comp mbql.u/datetime-field? field-id->type-info))]
[:datetime-field &match :day]))
(s/defn ^:private auto-bucket-datetime-breakouts* :- mbql.s/Query
[{{breakouts :breakout} :query, :as query} :- mbql.s/Query]
;; find any breakouts in the query that are just plain `[:field-id ...]` clauses
(if-let [unbucketed-breakouts (mbql.u/match breakouts, [(_ :guard keyword?) [:field-id _] & _] nil, [:field-id _] &match)]
;; if we found some unbuketed breakouts, fetch the Fields & type info that are referred to by those breakouts...
(let [field-id->type-info (unbucketed-breakouts->field-id->type-info unbucketed-breakouts)]
;; ...and then update each breakout by wrapping it if appropriate
(update-in query [:query :breakout] #(wrap-unbucketed-datetime-breakouts % field-id->type-info)))
;; otherwise if there are no unbuketed breakouts return the query as-is
query))
(defn auto-bucket-datetime-breakouts
"Middleware that automatically wraps breakout `:field-id` clauses in `[:datetime-field ... :day]` if the Field they
refer to has a type that derives from `:type/DateTime`. (This is done for historic reasons, before datetime
bucketing was added to MBQL; datetime Fields defaulted to breaking out by day. We might want to revisit this
behavior in the future.)"
[qp]
(comp qp auto-bucket-datetime-breakouts*))
(ns metabase.query-processor.middleware.auto-bucket-datetimes
"Middleware for automatically bucketing unbucketed `:type/DateTime` (but not `:type/Time`) Fields with `:day`
bucketing. Applies to any unbucketed Field in a breakout, or fields in a filter clause being compared against
`yyyy-MM-dd` format datetime strings."
(:require [metabase.mbql
[predicates :as mbql.preds]
[schema :as mbql.s]
[util :as mbql.u]]
[metabase.models.field :refer [Field]]
[metabase.util :as u]
[metabase.util.schema :as su]
[schema.core :as s]
[toucan.db :as db]))
(def ^:private FieldTypeInfo
{:base_type (s/maybe su/FieldType)
:special_type (s/maybe su/FieldType)
s/Keyword s/Any})
;; Unfortunately these Fields won't be in the store yet since Field resolution can't happen before we add the implicit
;; `:fields` clause, which happens after this
;;
;; TODO - What we could do tho is fetch all the stuff we need for the Store and then save these Fields in the store,
;; which would save a bit of time when we do resolve them
(s/defn ^:private unbucketed-fields->field-id->type-info :- {su/IntGreaterThanZero (s/maybe FieldTypeInfo)}
"Fetch a map of Field ID -> type information for the Fields referred to by the `unbucketed-fields`."
[unbucketed-fields :- (su/non-empty [mbql.s/field-id])]
(u/key-by :id (db/select [Field :id :base_type :special_type]
:id [:in (set (map second unbucketed-fields))])))
(defn- yyyy-MM-dd-date-string? [x]
(and (string? x)
(re-matches #"^\d{4}-\d{2}-\d{2}$" x)))
(defn- should-not-be-autobucketed?
"Is `x` a clause (or a clause that contains a clause) that we should definitely not autobucket?"
[x]
(or
;; do not autobucket Fields in a filter clause that either:
(when (mbql.preds/Filter? x)
(or
;; * is not and equality or comparison filter. e.g. wouldn't make sense to bucket a field and then check if it is
;; `NOT NULL`
(not (mbql.u/is-clause? #{:= :!= :< :> :<= :>= :between} x))
;; * has arguments that aren't `yyyy-MM-dd` date strings. The only reason we auto-bucket datetime Fields in the
;; * first place is for legacy reasons, if someone is specifying additional info like hour/minute then we
;; * shouldn't assume they want to bucket by day
(let [[_ _ & vs] x]
(not (every? yyyy-MM-dd-date-string? vs)))))
;; do not autobucket field-ids that are already wrapped by another Field clause like `datetime-field` or
;; `binning-strategy`
(and (mbql.preds/Field? x)
(not (mbql.u/is-clause? :field-id x)))))
(s/defn ^:private wrap-unbucketed-fields
"Wrap Fields in breakouts and filters in a `:datetime-field` clause if appropriate; look at corresponing type
information in `field-id->type-inf` to see if we should do so."
;; we only want to wrap clauses in `:breakout` and `:filter` so just make a 3-arg version of this fn that takes the
;; name of the clause to rewrite and call that twice
([query field-id->type-info :- {su/IntGreaterThanZero (s/maybe FieldTypeInfo)}]
(-> query
(wrap-unbucketed-fields field-id->type-info :breakout)
(wrap-unbucketed-fields field-id->type-info :filter)))
([query field-id->type-info clause-to-rewrite]
(mbql.u/replace-in query [:query clause-to-rewrite]
;; don't replace anything that's already wrapping a `field-id`
(_ :guard should-not-be-autobucketed?)
&match
;; if it's a raw `:field-id` and `field-id->type-info` tells us it's a `:type/DateTime` (but not `:type/Time`),
;; then go ahead and replace it
[:field-id (_ :guard (comp mbql.u/datetime-but-not-time-field? field-id->type-info))]
[:datetime-field &match :day])))
(s/defn ^:private auto-bucket-datetimes* :- mbql.s/Query
[{{breakouts :breakout, filter-clause :filter} :query, :as query} :- mbql.s/Query]
;; find any breakouts or filters in the query that are just plain `[:field-id ...]` clauses (unwrapped by any other
;; clause)
(if-let [unbucketed-fields (mbql.u/match (cons filter-clause breakouts)
(_ :guard should-not-be-autobucketed?) nil
[:field-id _] &match)]
;; if we found some unbucketed breakouts/filters, fetch the Fields & type info that are referred to by those
;; breakouts/filters...
(let [field-id->type-info (unbucketed-fields->field-id->type-info unbucketed-fields)]
;; ...and then update each breakout/filter by wrapping it if appropriate
(wrap-unbucketed-fields query field-id->type-info))
;; otherwise if there are no unbuketed breakouts/filters return the query as-is
query))
(defn auto-bucket-datetimes
"Middleware that automatically wraps breakout and filter `:field-id` clauses in `[:datetime-field ... :day]` if the
Field they refer to has a type that derives from `:type/DateTime` (but not `:type/Time`). (This is done for historic
reasons, before datetime bucketing was added to MBQL; datetime Fields defaulted to breaking out by day. We might
want to revisit this behavior in the future.)
Applies to any unbucketed Field in a breakout, or fields in a filter clause being compared against `yyyy-MM-dd`
format datetime strings."
[qp]
(comp qp auto-bucket-datetimes*))
......@@ -11,11 +11,11 @@
;;; --------------------------------------------------- Type Info ----------------------------------------------------
(defmulti ^:private ^{:doc (str "Get information about database, base, and special types for an object. This is passed "
"to along to various `->honeysql` method implementations so drivers have the "
"information they need to handle raw values like Strings, which may need to be parsed "
"as a certain type.")}
type-info
(defmulti ^:private type-info
"Get information about database, base, and special types for an object. This is passed to along to various
`->honeysql` method implementations so drivers have the information they need to handle raw values like Strings,
which may need to be parsed as a certain type."
{:arglists '([field-clause])}
mbql.u/dispatch-by-clause-name-or-class)
(defmethod type-info :default [_] nil)
......@@ -41,8 +41,9 @@
;;; ------------------------------------------------- add-type-info --------------------------------------------------
(defmulti ^:private add-type-info (fn [x info & [{:keys [parse-datetime-strings?]}]]
(class x)))
(defmulti ^:private add-type-info
{:arglists '([x info & {:keys [parse-datetime-strings?]}])}
(fn [x & _] (class x)))
(defmethod add-type-info nil [_ info & _]
[:value nil info])
......@@ -64,8 +65,8 @@
(du/str->time datetime-str (when-let [report-timezone (driver/report-timezone)]
(TimeZone/getTimeZone ^String report-timezone)))))
(defmethod add-type-info String [this info & [{:keys [parse-datetime-strings?]
:or {parse-datetime-strings? true}}]]
(defmethod add-type-info String [this info & {:keys [parse-datetime-strings?]
:or {parse-datetime-strings? true}}]
(if-let [unit (when (and (du/date-string? this)
parse-datetime-strings?)
(:unit info))]
......@@ -95,7 +96,7 @@
(add-type-info max-val (type-info field))]
[(clause :guard #{:starts-with :ends-with :contains}) field (s :guard string?) & more]
(apply vector clause field (add-type-info s (type-info field) {:parse-datetime-strings? false}) more))))
(apply vector clause field (add-type-info s (type-info field), :parse-datetime-strings? false) more))))
(defn- wrap-value-literals*
[{query-type :type, :as query}]
......
(ns metabase.query-processor.middleware.auto-bucket-datetime-breakouts-test
(ns metabase.query-processor.middleware.auto-bucket-datetimes-test
(:require [expectations :refer [expect]]
[metabase.models.field :refer [Field]]
[metabase.query-processor.middleware.auto-bucket-datetime-breakouts :as auto-bucket-datetime-breakouts]
[metabase.query-processor.middleware.auto-bucket-datetimes :as auto-bucket-datetimes]
[metabase.test.data :as data]
[metabase.util :as u]
[toucan.util.test :as tt]
[metabase.test.data :as data]))
[toucan.util.test :as tt]))
(defn- auto-bucket [query]
((auto-bucket-datetime-breakouts/auto-bucket-datetime-breakouts identity)
((auto-bucket-datetimes/auto-bucket-datetimes identity)
query))
(defn- auto-bucket-mbql [mbql-query]
......@@ -23,6 +22,84 @@
{:source-table 1
:breakout [[:field-id (u/get-id field)]]}))
;; does the Field get bucketed if present in the `:filter` clause? (#8932)
;;
;; e.g. `[:= <field> "2018-11-19"] should get rewritten as `[:= [:datetime-field <field> :day] "2018-11-19"]` if
;; `<field>` is a `:type/DateTime` Field
(tt/expect-with-temp [Field [field {:base_type :type/DateTime, :special_type nil}]]
{:source-table 1
:filter [:= [:datetime-field [:field-id (u/get-id field)] :day] "2018-11-19"]}
(auto-bucket-mbql
{:source-table 1
:filter [:= [:field-id (u/get-id field)] "2018-11-19"]}))
;; On the other hand, we shouldn't auto-bucket Fields inside a filter clause if they are being compared against a
;; datetime string that includes more than just yyyy-MM-dd:
(tt/expect-with-temp [Field [field {:base_type :type/DateTime, :special_type nil}]]
{:source-table 1
:filter [:= [:field-id (u/get-id field)] "2018-11-19T14:11:00"]}
(auto-bucket-mbql
{:source-table 1
:filter [:= [:field-id (u/get-id field)] "2018-11-19T14:11:00"]}))
;; for breakouts or other filters with multiple args, all args must be yyyy-MM-dd
(tt/expect-with-temp [Field [field {:base_type :type/DateTime, :special_type nil}]]
{:source-table 1
:filter [:between [:datetime-field [:field-id (u/get-id field)] :day] "2018-11-19" "2018-11-20"]}
(auto-bucket-mbql
{:source-table 1
:filter [:between [:field-id (u/get-id field)] "2018-11-19" "2018-11-20"]}))
(tt/expect-with-temp [Field [field {:base_type :type/DateTime, :special_type nil}]]
{:source-table 1
:filter [:between [:field-id (u/get-id field)] "2018-11-19" "2018-11-20T14:20:00.000Z"]}
(auto-bucket-mbql
{:source-table 1
:filter [:between [:field-id (u/get-id field)] "2018-11-19" "2018-11-20T14:20:00.000Z"]}))
;; if a Field occurs more than once we should only rewrite the instances that should be rebucketed
(tt/expect-with-temp [Field [field {:base_type :type/DateTime, :special_type nil}]]
{:source-table 1
:breakout [[:datetime-field [:field-id (u/get-id field)] :day]]
:filter [:= [:field-id (u/get-id field)] "2018-11-20T14:20:00.000Z"]}
(auto-bucket-mbql
{:source-table 1
:breakout [[:field-id (u/get-id field)]]
:filter [:= [:field-id (u/get-id field)] "2018-11-20T14:20:00.000Z"]}))
(tt/expect-with-temp [Field [field {:base_type :type/DateTime, :special_type nil}]]
{:source-table 1
:breakout [[:datetime-field [:field-id (u/get-id field)] :month]]
:filter [:= [:datetime-field [:field-id (u/get-id field)] :day] "2018-11-20"]}
(auto-bucket-mbql
{:source-table 1
:breakout [[:datetime-field [:field-id (u/get-id field)] :month]]
:filter [:= [:field-id (u/get-id field)] "2018-11-20"]}))
;; or if they are used in a non-equality or non-comparison filter clause, for example `:is-null`:
(tt/expect-with-temp [Field [field {:base_type :type/DateTime, :special_type nil}]]
{:source-table 1
:filter [:is-null [:field-id (u/get-id field)]]}
(auto-bucket-mbql
{:source-table 1
:filter [:is-null [:field-id (u/get-id field)]]}))
;; however, we should not try to bucket Fields inside a `time-interval` clause as that would be invalid
(tt/expect-with-temp [Field [field {:base_type :type/DateTime, :special_type nil}]]
{:source-table 1
:filter [:time-interval [:field-id (u/get-id field)] -30 :day]}
(auto-bucket-mbql
{:source-table 1
:filter [:time-interval [:field-id (u/get-id field)] -30 :day]}))
;; we also should not auto-bucket Fields that are `:type/Time`, because grouping a Time Field by day makes ZERO SENSE.
(tt/expect-with-temp [Field [field {:base_type :type/Time, :special_type nil}]]
{:source-table 1
:breakout [[:field-id (u/get-id field)]]}
(auto-bucket-mbql
{:source-table 1
:breakout [[:field-id (u/get-id field)]]}))
;; should be considered to be :type/DateTime based on `special_type` as well
(tt/expect-with-temp [Field [field {:base_type :type/Integer, :special_type :type/DateTime}]]
{:source-table 1
......
......@@ -27,7 +27,8 @@
[metabase.test.data
[dataset-definitions :as defs]
[datasets :as datasets :refer [*driver* *engine*]]
[interface :as i]])
[interface :as i]]
[metabase.util.date :as du])
(:import org.joda.time.DateTime))
(defn- ->long-if-number [x]
......@@ -891,3 +892,36 @@
(expect-with-non-timeseries-dbs-except #{:snowflake :bigquery}
{:rows 2, :unit :day}
(date-bucketing-unit-when-you :breakout-by "day", :filter-by "day", :with-interval 2))
;; Filtering by a unbucketed datetime Field should automatically bucket that Field by day if not already done (#8927)
;;
;; This should only apply when comparing Fields to `yyyy-MM-dd` date strings.
;;
;; e.g. `[:= <field> "2018-11-19"] should get rewritten as `[:= [:datetime-field <field> :day] "2018-11-19"]` if
;; `<field>` is a `:type/DateTime` Field
;;
;; We should get count = 1 for the current day, as opposed to count = 0 if we weren't auto-bucketing
;; (e.g. 2018-11-19T00:00 != 2018-11-19T12:37 or whatever time the checkin is at)
(expect-with-non-timeseries-dbs-except #{:snowflake :bigquery}
[[1]]
(format-rows-by [int]
(rows
(data/with-temp-db [_ (checkins:1-per-day)]
(data/run-mbql-query checkins
{:aggregation [[:count]]
:filter [:= [:field-id $timestamp] (du/format-date "yyyy-MM-dd" (du/date-trunc :day))]})))))
;; if datetime string is not yyyy-MM-dd no date bucketing should take place, and thus we should get no (exact) matches
(expect-with-non-timeseries-dbs-except #{:snowflake :bigquery}
;; Mongo returns empty row for count = 0. We should fix that
(case *engine*
:mongo []
[[0]])
(format-rows-by [int]
(rows
(data/with-temp-db [_ (checkins:1-per-day)]
(data/run-mbql-query checkins
{:aggregation [[:count]]
:filter [:= [:field-id $timestamp] (str (du/format-date "yyyy-MM-dd" (du/date-trunc :day))
"T14:16:00.000Z")]})))))
......@@ -6,8 +6,6 @@
[util :as tu]]
[metabase.test.data.datasets :as datasets]))
;;; ------------------------------------------------ "FILTER" CLAUSE -------------------------------------------------
;;; FILTER -- "AND", ">", ">="
(expect-with-non-timeseries-dbs
[[55 "Dal Rae Restaurant" 67 33.983 -118.096 4]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment