From a32d6b39fc31a3154813ed304d1af135e0cd0fdb Mon Sep 17 00:00:00 2001
From: Ryan Senior <ryan@metabase.com>
Date: Tue, 19 Sep 2017 10:40:15 -0700
Subject: [PATCH] Added timestamp formatting for druid select queries [ci
 drivers]

Druid select queries don't allow setting of a timezone. This commit
will parse the timestamps coming out of Druid and reformat them using
the user's report timezone or system timezone. This allows Druid
select queries to have similar timezone semantics to other supported
databases.

Fixes #3690
---
 src/metabase/driver/druid/query_processor.clj | 61 ++++++++++++++-----
 test/metabase/driver/druid_test.clj           | 41 +++++++++++--
 2 files changed, 82 insertions(+), 20 deletions(-)

diff --git a/src/metabase/driver/druid/query_processor.clj b/src/metabase/driver/druid/query_processor.clj
index c67db71992d..26404b8fd32 100644
--- a/src/metabase/driver/druid/query_processor.clj
+++ b/src/metabase/driver/druid/query_processor.clj
@@ -1,5 +1,7 @@
 (ns metabase.driver.druid.query-processor
   (:require [cheshire.core :as json]
+            [clj-time.core :as time]
+            [clj-time.format :as tformat]
             [clojure.core.match :refer [match]]
             [clojure.math.numeric-tower :as math]
             [clojure.string :as s]
@@ -9,7 +11,9 @@
              [annotate :as annotate]
              [interface :as i]]
             [metabase.util :as u])
-  (:import [metabase.query_processor.interface AgFieldRef DateTimeField DateTimeValue Expression Field RelativeDateTimeValue Value]))
+  (:import java.util.TimeZone
+           [metabase.query_processor.interface AgFieldRef DateTimeField DateTimeValue Expression Field RelativeDateTimeValue Value]
+           org.joda.time.DateTimeZone))
 
 (def ^:private ^:const topN-max-results
   "Maximum number of rows the topN query in Druid should return. Huge values cause significant issues with the engine.
@@ -724,24 +728,34 @@
   {:projections projections
    :results     results})
 
-(defmethod post-process ::select  [_ projections results]
-  (->> results
-       first
-       :result
-       :events
-       (map :event)
-       (post-process-map projections)))
-
-(defmethod post-process ::total   [_ projections results]
+(def ^:private druid-ts-format (tformat/formatters :date-time))
+
+(defn- reformat-timestamp [timestamp target-formatter]
+  (->> timestamp
+       (tformat/parse druid-ts-format)
+       (tformat/unparse target-formatter)))
+
+(defmethod post-process ::select  [_ projections timezone results]
+  (let [update-ts-fn (if-let [target-formater (and timezone (tformat/with-zone druid-ts-format timezone))]
+                       #(update % :timestamp reformat-timestamp target-formater)
+                       identity)]
+    (->> results
+         first
+         :result
+         :events
+         (map (comp update-ts-fn :event))
+         (post-process-map projections))))
+
+(defmethod post-process ::total   [_ projections timezone results]
   (post-process-map projections (map :result results)))
 
-(defmethod post-process ::topN    [_ projections results]
+(defmethod post-process ::topN    [_ projections timezone results]
   (post-process-map projections (-> results first :result)))
 
-(defmethod post-process ::groupBy [_ projections results]
+(defmethod post-process ::groupBy [_ projections timezone results]
   (post-process-map projections (map :event results)))
 
-(defmethod post-process ::timeseries [_ projections results]
+(defmethod post-process ::timeseries [_ projections timezone results]
   (post-process-map (conj projections :timestamp)
                     (for [event results]
                       (conj {:timestamp (:timestamp event)} (:result event)))))
@@ -791,9 +805,26 @@
                                     k)
             k))))
 
+(defn- utc?
+  "There are several timezone ids that mean UTC. This will create a
+  TimeZone object from `TIMEZONE` and check to see if it's a UTC
+  timezone"
+  [^DateTimeZone timezone]
+  (.hasSameRules (TimeZone/getTimeZone "UTC")
+                 (.toTimeZone timezone)))
+
+(defn- resolve-timezone
+  "Returns the timezone object (either report-timezone or JVM
+  timezone). Returns nil if the timezone is UTC as the timestamps from
+  Druid are already in UTC and don't need to be converted"
+  [{:keys [settings]}]
+  (let [tz (time/time-zone-for-id (:report-timezone settings (System/getProperty "user.timezone")))]
+    (when-not (utc? tz)
+      tz)))
+
 (defn execute-query
   "Execute a query for a Druid DB."
-  [do-query {database :database, {:keys [query query-type mbql? projections]} :native}]
+  [do-query {database :database, {:keys [query query-type mbql? projections]} :native :as query-ctx}]
   {:pre [database query]}
   (let [details       (:details database)
         query         (if (string? query)
@@ -802,7 +833,7 @@
         query-type    (or query-type (keyword "metabase.driver.druid.query-processor" (name (:queryType query))))
         post-proc-map (->> query
                            (do-query details)
-                           (post-process query-type projections))
+                           (post-process query-type projections (resolve-timezone query-ctx)))
         columns       (if mbql?
                         (->> post-proc-map
                              :projections
diff --git a/test/metabase/driver/druid_test.clj b/test/metabase/driver/druid_test.clj
index 8df4e288da7..d78643e4600 100644
--- a/test/metabase/driver/druid_test.clj
+++ b/test/metabase/driver/druid_test.clj
@@ -1,5 +1,6 @@
 (ns metabase.driver.druid-test
   (:require [cheshire.core :as json]
+            [clj-time.core :as time]
             [expectations :refer [expect]]
             [medley.core :as m]
             [metabase
@@ -11,10 +12,12 @@
             metabase.driver.druid
             [metabase.models
              [field :refer [Field]]
-             [table :refer [Table]]
-             [metric :refer [Metric]]]
+             [metric :refer [Metric]]
+             [table :refer [Table]]]
             [metabase.query-processor.middleware.expand :as ql]
-            [metabase.test.data :as data]
+            [metabase.test
+             [data :as data]
+             [util :as tu]]
             [metabase.test.data.datasets :as datasets :refer [expect-with-engine]]
             [toucan.util.test :as tt])
   (:import metabase.driver.druid.DruidDriver))
@@ -28,11 +31,39 @@
    ["1000" "Tito's Tacos"                "2014-06-03T07:00:00.000Z"]
    ["101"  "Golden Road Brewing"         "2015-09-04T07:00:00.000Z"]]
   (->> (driver/table-rows-sample (Table (data/id :checkins))
-         [(Field (data/id :checkins :id))
-          (Field (data/id :checkins :venue_name))])
+                                 [(Field (data/id :checkins :id))
+                                  (Field (data/id :checkins :venue_name))])
        (sort-by first)
        (take 5)))
 
+(datasets/expect-with-engine :druid
+  ;; druid returns a timestamp along with the query, but that shouldn't really matter here :D
+  [["1"    "The Misfit Restaurant + Bar" "2014-04-07T00:00:00.000-07:00"]
+   ["10"   "Dal Rae Restaurant"          "2015-08-22T00:00:00.000-07:00"]
+   ["100"  "PizzaHacker"                 "2014-07-26T00:00:00.000-07:00"]
+   ["1000" "Tito's Tacos"                "2014-06-03T00:00:00.000-07:00"]
+   ["101"  "Golden Road Brewing"         "2015-09-04T00:00:00.000-07:00"]]
+  (tu/with-temporary-setting-values [report-timezone "America/Los_Angeles"]
+    (->> (driver/table-rows-sample (Table (data/id :checkins))
+                                   [(Field (data/id :checkins :id))
+                                    (Field (data/id :checkins :venue_name))])
+         (sort-by first)
+         (take 5))))
+
+(datasets/expect-with-engine :druid
+  ;; druid returns a timestamp along with the query, but that shouldn't really matter here :D
+  [["1"    "The Misfit Restaurant + Bar" "2014-04-07T02:00:00.000-05:00"]
+   ["10"   "Dal Rae Restaurant"          "2015-08-22T02:00:00.000-05:00"]
+   ["100"  "PizzaHacker"                 "2014-07-26T02:00:00.000-05:00"]
+   ["1000" "Tito's Tacos"                "2014-06-03T02:00:00.000-05:00"]
+   ["101"  "Golden Road Brewing"         "2015-09-04T02:00:00.000-05:00"]]
+  (tu/with-jvm-tz (time/time-zone-for-id "America/Chicago")
+    (->> (driver/table-rows-sample (Table (data/id :checkins))
+                                   [(Field (data/id :checkins :id))
+                                    (Field (data/id :checkins :venue_name))])
+         (sort-by first)
+         (take 5))))
+
 (def ^:const ^:private ^String native-query-1
   (json/generate-string
     {:intervals   ["1900-01-01/2100-01-01"]
-- 
GitLab