Skip to content
Snippets Groups Projects
Commit 651fde00 authored by Cam Saül's avatar Cam Saül Committed by GitHub
Browse files

Merge pull request #4725 from metabase/swisscom-bigdata-sort-druid-select

Swisscom bigdata sort druid select
parents 96ea3d80 0a17d1f7
No related branches found
No related tags found
No related merge requests found
......@@ -577,15 +577,16 @@
{:dimension (->rvalue field)
:direction direction}))))
(defmethod handle-order-by ::grouped-timeseries [_ {[breakout-field] :breakout, [{field :field, direction :direction}] :order-by} druid-query]
(let [field (->rvalue field)
breakout-field (->rvalue breakout-field)
sort-by-breakout? (= field breakout-field)]
(if (and sort-by-breakout?
(= direction :descending))
(assoc druid-query :descending true)
druid-query)))
;; Handle order by timstamp field
(defn- handle-order-by-timestamp [field direction druid-query]
(assoc druid-query :descending (and (instance? DateTimeField field)
(= direction :descending))))
(defmethod handle-order-by ::grouped-timeseries [_ {[{field :field, direction :direction}] :order-by} druid-query]
(handle-order-by-timestamp field direction druid-query))
(defmethod handle-order-by ::select [_ {[{field :field, direction :direction}] :order-by} druid-query]
(handle-order-by-timestamp field direction druid-query))
;;; ### handle-fields
......
......@@ -27,38 +27,10 @@
;;; Setting Up a Server w/ Druid Test Data
;; Unfortunately the process of loading test data onto an external server for CI purposes is a little involved. Before testing against Druid, you'll need to perform the following steps:
;; For EC2 instances, make sure to expose ports `8082` & `8090` for Druid while loading data. Once data has finished loading, you only need to expose port `8082`.
;;
;; 1. Setup Zookeeper
;; 1A. Download & extract Zookeeper from `http://zookeeper.apache.org/releases.html#download`
;; 1B. Create `zookeeper/conf/zoo.cfg` -- see the Getting Started Guide: `http://zookeeper.apache.org/doc/r3.4.6/zookeeperStarted.html`
;; 1C. `zookeeper/bin/zkServer.sh start`
;; 1D. `zookeeper/bin/zkServer.sh status` (to make sure it started correctly)
;; 2. Setup Druid
;; 2A. Download & extract Druid from `http://druid.io/downloads.html`
;; 2B. `cp druid/run_druid_server.sh druid/run_historical.sh` and bump the `-Xmx` setting to `6g` or so
;; 2C. `cd druid && ./run_druid_server.sh coordinator`
;; 2D. `cd druid && ./run_druid_server.sh broker`
;; 2E. `cd druid && ./run_historical.sh historical`
;; 2E. `cd druid && ./run_druid_server.sh overlord`
;; 3. Generate flattened test data file. Optionally pick a <filename>
;; 3A. From this namespace in the REPL, run `(generate-json-for-batch-ingestion <filename>)`
;; 3B. `scp` or otherwise upload this file to the box running druid (if applicable)
;; 4. Launch Druid Indexing Task
;; 4A. Run the indexing task on the remote instance.
;;
;; (run-indexing-task <remote-host> :base-dir <dir-where-you-uploaded-file>, :filename <file>)
;; e.g.
;; (run-indexing-task "http://ec2-52-90-109-199.compute-1.amazonaws.com", :base-dir "/home/ec2-user", :filename "checkins.json")
;;
;; The task will keep you apprised of its progress until it completes (takes 1-2 minutes)
;; 4B. Keep an eye on `<host>:8082/druid/v2/datasources`. (e.g. "http://ec2-52-90-109-199.compute-1.amazonaws.com:8082/druid/v2/datasources")
;; This endpoint will return an empty array until the broker knows about the newly ingested segments. When it returns an array with the string `"checkins"` you're ready to
;; run the tests.
;; 4C. Kill the `overlord` process once the data has finished loading.
;; 5. Running Tests
;; 5A. You can run tests like `ENGINES=druid MB_DRUID_PORT=8082 MB_DRUID_HOST=http://ec2-52-90-109-199.compute-1.amazonaws.com lein test`
;; Unfortunately the process of loading test data onto an external server for CI purposes is a little involved.
;; A complete step-by-step guide is available on the wiki at `https://github.com/metabase/metabase/wiki/Setting-up-Druid-for-CI-on-EC2`
;; Refer to that page for more information.
(def ^:private ^:const default-filename "Default filename for batched ingestion data file."
"checkins.json")
......@@ -126,7 +98,7 @@
(def ^:private ^:const indexer-timeout-seconds
"Maximum number of seconds we should wait for the indexing task to finish before deciding it's failed."
180)
300) ; five minutes
(resolve-private-vars metabase.driver.druid GET POST)
......
......@@ -69,6 +69,44 @@
(data (data/run-query checkins
(ql/limit 2))))
;;; "bare rows" query, limit, order-by timestamp desc
(expect-with-timeseries-dbs
{:columns ["id"
"timestamp"
"count"
"user_last_login"
"user_name"
"venue_category_name"
"venue_latitude"
"venue_longitude"
"venue_name"
"venue_price"]
:rows [["693", "2015-12-29T08:00:00.000Z", 1, "2014-07-03T19:30:00.000Z", "Frans Hevel", "Mexican", "34.0489", "-118.238", "Señor Fish", "2"]
["570", "2015-12-26T08:00:00.000Z", 1, "2014-07-03T01:30:00.000Z", "Kfir Caj", "Chinese", "37.7949", "-122.406", "Empress of China", "3"]]}
(data (data/run-query checkins
(ql/order-by (ql/desc $timestamp))
(ql/limit 2))))
;;; "bare rows" query, limit, order-by timestamp asc
(expect-with-timeseries-dbs
{:columns ["id"
"timestamp"
"count"
"user_last_login"
"user_name"
"venue_category_name"
"venue_latitude"
"venue_longitude"
"venue_name"
"venue_price"]
:rows [["931", "2013-01-03T08:00:00.000Z", 1, "2014-01-01T08:30:00.000Z", "Simcha Yan", "Thai", "34.094", "-118.344", "Kinaree Thai Bistro", "1"]
["285", "2013-01-10T08:00:00.000Z", 1, "2014-07-03T01:30:00.000Z", "Kfir Caj", "Thai", "34.1021", "-118.306", "Ruen Pair Thai Restaurant", "2"]]}
(data (data/run-query checkins
(ql/order-by (ql/asc $timestamp))
(ql/limit 2))))
;;; fields clause
(expect-with-timeseries-dbs
{:columns ["venue_name" "venue_category_name" "timestamp"],
......@@ -78,6 +116,28 @@
(ql/fields $venue_name $venue_category_name)
(ql/limit 2))))
;;; fields clause, order by timestamp asc
(expect-with-timeseries-dbs
{:columns ["venue_name" "venue_category_name" "timestamp"],
:rows [["Kinaree Thai Bistro" "Thai" "2013-01-03T08:00:00.000Z"]
["Ruen Pair Thai Restaurant" "Thai" "2013-01-10T08:00:00.000Z"]]}
(data (data/run-query checkins
(ql/fields $venue_name $venue_category_name)
(ql/order-by (ql/asc $timestamp))
(ql/limit 2))))
;;; fields clause, order by timestamp desc
(expect-with-timeseries-dbs
{:columns ["venue_name" "venue_category_name" "timestamp"],
:rows [["Señor Fish" "Mexican" "2015-12-29T08:00:00.000Z"]
["Empress of China" "Chinese" "2015-12-26T08:00:00.000Z"]]}
(data (data/run-query checkins
(ql/fields $venue_name $venue_category_name)
(ql/order-by (ql/desc $timestamp))
(ql/limit 2))))
;;; count
(expect-with-timeseries-dbs
[1000]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment