Skip to content
Snippets Groups Projects
Commit 0d58c0b6 authored by Cam Saül's avatar Cam Saül
Browse files

Return integers when appropriate for Druid date bucketing :bathtub:

parent cb55e85a
No related branches found
No related tags found
No related merge requests found
......@@ -163,7 +163,9 @@
(^:private ->dimension-rvalue [this]
"Format `Field` for use in a `:dimension` or `:dimensions` clause."))
(defn- extract:timeFormat [format-str]
(defn- extract:timeFormat
"Create a time format extraction. Returns a string. See http://druid.io/docs/0.9.1.1/querying/dimensionspecs.html#time-format-extraction-function"
[format-str]
{:pre [(string? format-str)]}
{:type :timeFormat
:format format-str
......@@ -171,48 +173,62 @@
"UTC")
:locale "en-US"})
(defn- extract:js [& function-str-parts]
(defn- extract:js
"Create an extraction function from JavaScript -- see http://druid.io/docs/0.9.1.1/querying/dimensionspecs.html#javascript-extraction-function"
[& function-str-parts]
{:pre [(every? string? function-str-parts)]}
{:type :javascript
:function (s/replace (apply str function-str-parts) #"\s+" " ")})
(defn- unit->extractionFn
"JODA date format strings for each datetime unit. [Described here.](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html)."
[unit]
(case unit
:default (extract:timeFormat "yyyy-MM-dd'T'HH:mm:ssZ")
:minute (extract:timeFormat "yyyy-MM-dd'T'HH:mm:00Z")
:minute-of-hour (extract:timeFormat "mm")
:hour (extract:timeFormat "yyyy-MM-dd'T'HH:00:00Z")
:hour-of-day (extract:timeFormat "HH")
:day (extract:timeFormat "yyyy-MM-ddZ")
:day-of-week (extract:js "function (timestamp) {"
" var date = new Date(timestamp);"
" return date.getDay() + 1;"
"}")
:day-of-month (extract:timeFormat "dd")
:day-of-year (extract:timeFormat "DDD")
:week (extract:js "function (timestamp) {"
" var date = new Date(timestamp);"
" var firstDOW = new Date(date - (date.getDay() * 86400000));"
" var month = firstDOW.getMonth() + 1;"
" var day = firstDOW.getDate();"
" return '' + firstDOW.getFullYear() + '-' + (month < 10 ? '0' : '') + month + '-' + (day < 10 ? '0' : '') + day;"
"}")
:week-of-year (extract:timeFormat "ww")
:month (extract:timeFormat "yyyy-MM-01")
:month-of-year (extract:timeFormat "MM")
:quarter (extract:js "function (timestamp) {"
" var date = new Date(timestamp);"
" var month = date.getMonth() + 1;" ; js months are 0 - 11
" var quarterMonth = month - ((month - 1) % 3);"
" return '' + date.getFullYear() + '-' + (quarterMonth < 10 ? '0' : '') + quarterMonth + '-01';"
"}")
:quarter-of-year (extract:js "function (timestamp) {"
" var date = new Date(timestamp);"
" return Math.floor((date.getMonth() + 3) / 3);"
"}")
:year (extract:timeFormat "yyyy")))
(def ^:private ^:const unit->extraction-fn
{:default (extract:timeFormat "yyyy-MM-dd'T'HH:mm:ssZ")
:minute (extract:timeFormat "yyyy-MM-dd'T'HH:mm:00Z")
:minute-of-hour (extract:timeFormat "mm")
:hour (extract:timeFormat "yyyy-MM-dd'T'HH:00:00Z")
:hour-of-day (extract:timeFormat "HH")
:day (extract:timeFormat "yyyy-MM-ddZ")
:day-of-week (extract:js "function (timestamp) {"
" var date = new Date(timestamp);"
" return date.getDay() + 1;"
"}")
:day-of-month (extract:timeFormat "dd")
:day-of-year (extract:timeFormat "DDD")
:week (extract:js "function (timestamp) {"
" var date = new Date(timestamp);"
" var firstDOW = new Date(date - (date.getDay() * 86400000));"
" var month = firstDOW.getMonth() + 1;"
" var day = firstDOW.getDate();"
" return '' + firstDOW.getFullYear() + '-' + (month < 10 ? '0' : '') + month + '-' + (day < 10 ? '0' : '') + day;"
"}")
:week-of-year (extract:timeFormat "ww")
:month (extract:timeFormat "yyyy-MM-01")
:month-of-year (extract:timeFormat "MM")
:quarter (extract:js "function (timestamp) {"
" var date = new Date(timestamp);"
" var month = date.getMonth() + 1;" ; js months are 0 - 11
" var quarterMonth = month - ((month - 1) % 3);"
" return '' + date.getFullYear() + '-' + (quarterMonth < 10 ? '0' : '') + quarterMonth + '-01';"
"}")
:quarter-of-year (extract:js "function (timestamp) {"
" var date = new Date(timestamp);"
" return Math.floor((date.getMonth() + 3) / 3);"
"}")
:year (extract:timeFormat "yyyy")})
(def ^:private ^:const units-that-need-post-processing-int-parsing
"`extract:timeFormat` always returns a string; there are cases where we'd like to return an integer instead, such as `:day-of-month`.
There's no simple way to do this in Druid -- Druid 0.9.0+ *does* let you combine extraction functions with `:cascade`, but we're still supporting 0.8.x.
Instead, we will perform the conversions in Clojure-land during post-processing. If we need to perform the extra post-processing step, we'll name the resulting
column `:timestamp___int`; otherwise we'll keep the name `:timestamp`."
#{:minute-of-hour
:hour-of-day
:day-of-week
:day-of-month
:day-of-year
:week-of-year
:month-of-year
:quarter-of-year
:year})
(extend-protocol IDimension
nil (->dimension-rvalue [this] (->rvalue this))
......@@ -222,8 +238,10 @@
(->dimension-rvalue [{:keys [unit]}]
{:type :extraction
:dimension :__time
:outputName :timestamp
:extractionFn (unit->extractionFn unit)}))
:outputName (if (contains? units-that-need-post-processing-int-parsing unit)
:timestamp___int
:timestamp)
:extractionFn (unit->extraction-fn unit)}))
(defmulti ^:private handle-breakout query-type-dispatch-fn)
......@@ -530,6 +548,20 @@
{:query druid-query
:query-type query-type}))))
(defn- columns->getter-fns
"Given a sequence of COLUMNS keywords, return a sequence of appropriate getter functions to get values from a single result row. Normally,
these are just the keyword column names themselves, but for `:timestamp___int`, we'll also parse the result as an integer (for further
explanation, see the docstring for `units-that-need-post-processing-int-parsing`)."
[columns]
(vec (for [k columns]
(if (not= k :timestamp___int)
k
(comp (fn [^String s]
(when (seq s)
(Integer/parseInt s)))
k)))))
(defn execute-query
"Execute a query for a Druid DB."
[do-query {database :database, {:keys [query query-type mbql?]} :native}]
......@@ -542,8 +574,11 @@
results (->> (do-query details query)
(post-process query-type)
remove-bonus-keys)
columns (vec (keys (first results)))]
{:columns columns
columns (keys (first results))
getters (columns->getter-fns columns)]
;; rename any occurances of `:timestamp___int` to `:timestamp` in the results so the user doesn't know about our behind-the-scenes conversion
{:columns (vec (replace {:timestamp___int :timestamp} columns))
:rows (for [row results]
(mapv row columns))
(for [getter getters]
(getter row)))
:annotate? mbql?}))
......@@ -414,7 +414,7 @@
;;; date bucketing - minute-of-hour
(expect-with-timeseries-dbs
{:columns ["timestamp" "count"]
:rows [["00" 1000]]}
:rows [[0 1000]]}
(data (data/run-query checkins
(ql/aggregation (ql/count))
(ql/breakout (ql/datetime-field $timestamp :minute-of-hour))
......@@ -436,8 +436,8 @@
;;; date bucketing - hour-of-day
(expect-with-timeseries-dbs
{:columns ["timestamp" "count"]
:rows [["07" 719]
["08" 281]]}
:rows [[7 719]
[8 281]]}
(data (data/run-query checkins
(ql/aggregation (ql/count))
(ql/breakout (ql/datetime-field $timestamp :hour-of-day))
......@@ -472,11 +472,11 @@
;;; date bucketing - day-of-week
(expect-with-timeseries-dbs
{:columns ["timestamp" "count"]
:rows [["1" 135]
["2" 143]
["3" 153]
["4" 136]
["5" 139]]}
:rows [[1 135]
[2 143]
[3 153]
[4 136]
[5 139]]}
(data (data/run-query checkins
(ql/aggregation (ql/count))
(ql/breakout (ql/datetime-field $timestamp :day-of-week))
......@@ -485,11 +485,11 @@
;;; date bucketing - day-of-month
(expect-with-timeseries-dbs
{:columns ["timestamp" "count"]
:rows [["01" 36]
["02" 36]
["03" 42]
["04" 35]
["05" 43]]}
:rows [[1 36]
[2 36]
[3 42]
[4 35]
[5 43]]}
(data (data/run-query checkins
(ql/aggregation (ql/count))
(ql/breakout (ql/datetime-field $timestamp :day-of-month))
......@@ -498,11 +498,11 @@
;;; date bucketing - day-of-year
(expect-with-timeseries-dbs
{:columns ["timestamp" "count"]
:rows [["003" 2]
["004" 6]
["005" 1]
["006" 1]
["007" 2]]}
:rows [[3 2]
[4 6]
[5 1]
[6 1]
[7 2]]}
(data (data/run-query checkins
(ql/aggregation (ql/count))
(ql/breakout (ql/datetime-field $timestamp :day-of-year))
......@@ -511,11 +511,11 @@
;;; date bucketing - week-of-year
(expect-with-timeseries-dbs
{:columns ["timestamp" "count"]
:rows [["01" 10]
["02" 7]
["03" 8]
["04" 10]
["05" 4]]}
:rows [[1 10]
[2 7]
[3 8]
[4 10]
[5 4]]}
(data (data/run-query checkins
(ql/aggregation (ql/count))
(ql/breakout (ql/datetime-field $timestamp :week-of-year))
......@@ -537,11 +537,11 @@
;;; date bucketing - month-of-year
(expect-with-timeseries-dbs
{:columns ["timestamp" "count"]
:rows [["01" 38]
["02" 70]
["03" 92]
["04" 89]
["05" 111]]}
:rows [[1 38]
[2 70]
[3 92]
[4 89]
[5 111]]}
(data (data/run-query checkins
(ql/aggregation (ql/count))
(ql/breakout (ql/datetime-field $timestamp :month-of-year))
......@@ -563,10 +563,10 @@
;;; date bucketing - quarter-of-year
(expect-with-timeseries-dbs
{:columns ["timestamp" "count"]
:rows [["1" 200]
["2" 284]
["3" 278]
["4" 238]]}
:rows [[1 200]
[2 284]
[3 278]
[4 238]]}
(data (data/run-query checkins
(ql/aggregation (ql/count))
(ql/breakout (ql/datetime-field $timestamp :quarter-of-year))
......@@ -575,9 +575,9 @@
;;; date bucketing - year
(expect-with-timeseries-dbs
{:columns ["timestamp" "count"]
:rows [["2013" 235]
["2014" 498]
["2015" 267]]}
:rows [[2013 235]
[2014 498]
[2015 267]]}
(data (data/run-query checkins
(ql/aggregation (ql/count))
(ql/breakout (ql/datetime-field $timestamp :year))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment