Skip to content
Snippets Groups Projects
Commit c70606ff authored by Cam Saül's avatar Cam Saül Committed by GitHub
Browse files

Merge pull request #2897 from metabase/druid-bucketing-return-integers

Return integers when appropriate for Druid date bucketing :bathtub:
parents ef201a9e 424c20ed
No related branches found
No related tags found
No related merge requests found
......@@ -163,7 +163,9 @@
(^:private ->dimension-rvalue [this]
"Format `Field` for use in a `:dimension` or `:dimensions` clause."))
(defn- extract:timeFormat [format-str]
(defn- extract:timeFormat
"Create a time format extraction. Returns a string. See http://druid.io/docs/0.9.1.1/querying/dimensionspecs.html#time-format-extraction-function"
[format-str]
{:pre [(string? format-str)]}
{:type :timeFormat
:format format-str
......@@ -171,14 +173,15 @@
"UTC")
:locale "en-US"})
(defn- extract:js [& function-str-parts]
(defn- extract:js
"Create an extraction function from JavaScript -- see http://druid.io/docs/0.9.1.1/querying/dimensionspecs.html#javascript-extraction-function"
[& function-str-parts]
{:pre [(every? string? function-str-parts)]}
{:type :javascript
:function (s/replace (apply str function-str-parts) #"\s+" " ")})
(defn- unit->extractionFn
"JODA date format strings for each datetime unit. [Described here.](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html)."
[unit]
;; don't try to make this a ^:const map -- extract:timeFormat looks up timezone info at query time
(defn- unit->extraction-fn [unit]
(case unit
:default (extract:timeFormat "yyyy-MM-dd'T'HH:mm:ssZ")
:minute (extract:timeFormat "yyyy-MM-dd'T'HH:mm:00Z")
......@@ -214,6 +217,21 @@
"}")
:year (extract:timeFormat "yyyy")))
(def ^:private ^:const units-that-need-post-processing-int-parsing
"`extract:timeFormat` always returns a string; there are cases where we'd like to return an integer instead, such as `:day-of-month`.
There's no simple way to do this in Druid -- Druid 0.9.0+ *does* let you combine extraction functions with `:cascade`, but we're still supporting 0.8.x.
Instead, we will perform the conversions in Clojure-land during post-processing. If we need to perform the extra post-processing step, we'll name the resulting
column `:timestamp___int`; otherwise we'll keep the name `:timestamp`."
#{:minute-of-hour
:hour-of-day
:day-of-week
:day-of-month
:day-of-year
:week-of-year
:month-of-year
:quarter-of-year
:year})
(extend-protocol IDimension
nil (->dimension-rvalue [this] (->rvalue this))
Object (->dimension-rvalue [this] (->rvalue this))
......@@ -222,8 +240,10 @@
(->dimension-rvalue [{:keys [unit]}]
{:type :extraction
:dimension :__time
:outputName :timestamp
:extractionFn (unit->extractionFn unit)}))
:outputName (if (contains? units-that-need-post-processing-int-parsing unit)
:timestamp___int
:timestamp)
:extractionFn (unit->extraction-fn unit)}))
(defmulti ^:private handle-breakout query-type-dispatch-fn)
......@@ -530,6 +550,20 @@
{:query druid-query
:query-type query-type}))))
(defn- columns->getter-fns
"Given a sequence of COLUMNS keywords, return a sequence of appropriate getter functions to get values from a single result row. Normally,
these are just the keyword column names themselves, but for `:timestamp___int`, we'll also parse the result as an integer (for further
explanation, see the docstring for `units-that-need-post-processing-int-parsing`)."
[columns]
(vec (for [k columns]
(if (not= k :timestamp___int)
k
(comp (fn [^String s]
(when (seq s)
(Integer/parseInt s)))
k)))))
(defn execute-query
"Execute a query for a Druid DB."
[do-query {database :database, {:keys [query query-type mbql?]} :native}]
......@@ -542,8 +576,11 @@
results (->> (do-query details query)
(post-process query-type)
remove-bonus-keys)
columns (vec (keys (first results)))]
{:columns columns
columns (keys (first results))
getters (columns->getter-fns columns)]
;; rename any occurances of `:timestamp___int` to `:timestamp` in the results so the user doesn't know about our behind-the-scenes conversion
{:columns (vec (replace {:timestamp___int :timestamp} columns))
:rows (for [row results]
(mapv row columns))
(for [getter getters]
(getter row)))
:annotate? mbql?}))
......@@ -414,7 +414,7 @@
;;; date bucketing - minute-of-hour
(expect-with-timeseries-dbs
{:columns ["timestamp" "count"]
:rows [["00" 1000]]}
:rows [[0 1000]]}
(data (data/run-query checkins
(ql/aggregation (ql/count))
(ql/breakout (ql/datetime-field $timestamp :minute-of-hour))
......@@ -436,8 +436,8 @@
;;; date bucketing - hour-of-day
(expect-with-timeseries-dbs
{:columns ["timestamp" "count"]
:rows [["07" 719]
["08" 281]]}
:rows [[7 719]
[8 281]]}
(data (data/run-query checkins
(ql/aggregation (ql/count))
(ql/breakout (ql/datetime-field $timestamp :hour-of-day))
......@@ -472,11 +472,11 @@
;;; date bucketing - day-of-week
(expect-with-timeseries-dbs
{:columns ["timestamp" "count"]
:rows [["1" 135]
["2" 143]
["3" 153]
["4" 136]
["5" 139]]}
:rows [[1 135]
[2 143]
[3 153]
[4 136]
[5 139]]}
(data (data/run-query checkins
(ql/aggregation (ql/count))
(ql/breakout (ql/datetime-field $timestamp :day-of-week))
......@@ -485,11 +485,11 @@
;;; date bucketing - day-of-month
(expect-with-timeseries-dbs
{:columns ["timestamp" "count"]
:rows [["01" 36]
["02" 36]
["03" 42]
["04" 35]
["05" 43]]}
:rows [[1 36]
[2 36]
[3 42]
[4 35]
[5 43]]}
(data (data/run-query checkins
(ql/aggregation (ql/count))
(ql/breakout (ql/datetime-field $timestamp :day-of-month))
......@@ -498,11 +498,11 @@
;;; date bucketing - day-of-year
(expect-with-timeseries-dbs
{:columns ["timestamp" "count"]
:rows [["003" 2]
["004" 6]
["005" 1]
["006" 1]
["007" 2]]}
:rows [[3 2]
[4 6]
[5 1]
[6 1]
[7 2]]}
(data (data/run-query checkins
(ql/aggregation (ql/count))
(ql/breakout (ql/datetime-field $timestamp :day-of-year))
......@@ -511,11 +511,11 @@
;;; date bucketing - week-of-year
(expect-with-timeseries-dbs
{:columns ["timestamp" "count"]
:rows [["01" 10]
["02" 7]
["03" 8]
["04" 10]
["05" 4]]}
:rows [[1 10]
[2 7]
[3 8]
[4 10]
[5 4]]}
(data (data/run-query checkins
(ql/aggregation (ql/count))
(ql/breakout (ql/datetime-field $timestamp :week-of-year))
......@@ -537,11 +537,11 @@
;;; date bucketing - month-of-year
(expect-with-timeseries-dbs
{:columns ["timestamp" "count"]
:rows [["01" 38]
["02" 70]
["03" 92]
["04" 89]
["05" 111]]}
:rows [[1 38]
[2 70]
[3 92]
[4 89]
[5 111]]}
(data (data/run-query checkins
(ql/aggregation (ql/count))
(ql/breakout (ql/datetime-field $timestamp :month-of-year))
......@@ -563,10 +563,10 @@
;;; date bucketing - quarter-of-year
(expect-with-timeseries-dbs
{:columns ["timestamp" "count"]
:rows [["1" 200]
["2" 284]
["3" 278]
["4" 238]]}
:rows [[1 200]
[2 284]
[3 278]
[4 238]]}
(data (data/run-query checkins
(ql/aggregation (ql/count))
(ql/breakout (ql/datetime-field $timestamp :quarter-of-year))
......@@ -575,9 +575,9 @@
;;; date bucketing - year
(expect-with-timeseries-dbs
{:columns ["timestamp" "count"]
:rows [["2013" 235]
["2014" 498]
["2015" 267]]}
:rows [[2013 235]
[2014 498]
[2015 267]]}
(data (data/run-query checkins
(ql/aggregation (ql/count))
(ql/breakout (ql/datetime-field $timestamp :year))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment