Skip to content
Snippets Groups Projects
Commit e26723ec authored by Cam Saül's avatar Cam Saül
Browse files

Run Druid unit tests on CI :persevere:

parent d913b59a
No related branches found
No related tags found
No related merge requests found
......@@ -25,11 +25,11 @@ test:
# 0) runs unit tests w/ H2 local DB. Runs against H2, Mongo, MySQL
# 1) runs unit tests w/ Postgres local DB. Runs against H2, SQL Server
# 2) runs unit tests w/ MySQL local DB. Runs against H2, Postgres, SQLite
# 3) runs unit tests w/ H2 local DB. Runs against H2, Redshift
# 3) runs unit tests w/ H2 local DB. Runs against H2, Redshift, Druid
# 4) runs Eastwood linter & Bikeshed linter && ./bin/reflection-linter
# 5) runs JS linter + JS test
# 6) runs lein uberjar. (We don't run bin/build because we're not really concerned about `npm install` (etc) in this test, which runs elsewhere)
- case $CIRCLE_NODE_INDEX in 0) ENGINES=h2,mongo,mysql lein test ;; 1) ENGINES=h2,sqlserver MB_DB_TYPE=postgres MB_DB_DBNAME=circle_test MB_DB_PORT=5432 MB_DB_USER=ubuntu MB_DB_HOST=localhost lein test ;; 2) ENGINES=h2,postgres,sqlite MB_DB_TYPE=mysql MB_DB_DBNAME=circle_test MB_DB_PORT=3306 MB_DB_USER=ubuntu MB_DB_HOST=localhost lein test ;; 3) ENGINES=h2,redshift lein test ;; 4) lein eastwood 2>&1 | grep -v Reflection && lein bikeshed 2>&1 | grep -v Reflection && ./bin/reflection-linter ;; 5) npm install && npm run lint && npm run build && npm run test ;; 6) lein uberjar ;; esac:
- case $CIRCLE_NODE_INDEX in 0) ENGINES=h2,mongo,mysql lein test ;; 1) ENGINES=h2,sqlserver MB_DB_TYPE=postgres MB_DB_DBNAME=circle_test MB_DB_PORT=5432 MB_DB_USER=ubuntu MB_DB_HOST=localhost lein test ;; 2) ENGINES=h2,postgres,sqlite MB_DB_TYPE=mysql MB_DB_DBNAME=circle_test MB_DB_PORT=3306 MB_DB_USER=ubuntu MB_DB_HOST=localhost lein test ;; 3) ENGINES=h2,redshift,druid lein test ;; 4) lein eastwood 2>&1 | grep -v Reflection && lein bikeshed 2>&1 | grep -v Reflection && ./bin/reflection-linter ;; 5) npm install && npm run lint && npm run build && npm run test ;; 6) lein uberjar ;; esac:
parallel: true
deployment:
master:
......
......@@ -61,7 +61,7 @@
(def ^:private ^:const query-type->default-query
(let [defaults {:intervals ["-5000/5000"]
:granularity :all
:context {:timeout 5000}}]
:context {:timeout 60000}}]
{::select (merge defaults {:queryType :select
:pagingSpec {:threshold 100 #_qp/absolute-max-results}})
::timeseries (merge defaults {:queryType :timeseries})
......
......@@ -14,9 +14,9 @@
(defn- post-select [{:keys [engine] :as database}]
(if-not engine database
(assoc database :features (if-let [driver ((resolve 'metabase.driver/engine->driver) engine)]
((resolve 'metabase.driver/features) driver)
[]))))
(assoc database :features (or (when-let [driver ((resolve 'metabase.driver/engine->driver) engine)]
(seq ((resolve 'metabase.driver/features) driver)))
[]))))
(defn- pre-cascade-delete [{:keys [id]}]
(cascade-delete 'Card :database_id id)
......
......@@ -40,7 +40,7 @@
:is_full_sync true
:organization_id nil
:description nil
:features (mapv name (metabase.driver/features (metabase.driver/engine->driver (:engine db))))})))
:features (mapv name (driver/features (driver/engine->driver (:engine db))))})))
;; # DB LIFECYCLE ENDPOINTS
......@@ -71,7 +71,7 @@
:is_full_sync false
:organization_id nil
:description nil
:features (mapv name (metabase.driver/features (metabase.driver/engine->driver :postgres)))})
:features (mapv name (driver/features (driver/engine->driver :postgres)))})
(create-db db-name false)))
;; ## DELETE /api/database/:id
......@@ -113,41 +113,41 @@
;; Database details *should not* come back for Rasta since she's not a superuser
(let [db-name (str "A" (random-name))] ; make sure this name comes before "test-data"
(expect-eval-actual-first
(set (filter identity
(conj (for [engine datasets/all-valid-engines]
(datasets/when-testing-engine engine
(match-$ (get-or-create-test-data-db! (driver/engine->driver engine))
{:created_at $
:engine (name $engine)
:id $
:updated_at $
:name "test-data"
:is_sample false
:is_full_sync true
:organization_id nil
:description nil
:features (mapv name (metabase.driver/features (metabase.driver/engine->driver engine)))})))
(match-$ (sel :one Database :name db-name)
{:created_at $
:engine "postgres"
:id $
:updated_at $
:name $
:is_sample false
:is_full_sync true
:organization_id nil
:description nil
:features (mapv name (metabase.driver/features (metabase.driver/engine->driver :postgres)))}))))
(do
;; Delete all the randomly created Databases we've made so far
(cascade-delete Database :id [not-in (set (filter identity
(for [engine datasets/all-valid-engines]
(datasets/when-testing-engine engine
(:id (get-or-create-test-data-db! (driver/engine->driver engine)))))))])
;; Add an extra DB so we have something to fetch besides the Test DB
(create-db db-name)
;; Now hit the endpoint
(set ((user->client :rasta) :get 200 "database")))))
(set (filter identity (conj (for [engine datasets/all-valid-engines]
(datasets/when-testing-engine engine
(match-$ (get-or-create-test-data-db! (driver/engine->driver engine))
{:created_at $
:engine (name $engine)
:id $
:updated_at $
:name "test-data"
:is_sample false
:is_full_sync true
:organization_id nil
:description nil
:features (mapv name (driver/features (driver/engine->driver engine)))})))
;; (?) I don't remember why we have to do this for postgres but not any other of the bonus drivers
(match-$ (sel :one Database :name db-name)
{:created_at $
:engine "postgres"
:id $
:updated_at $
:name $
:is_sample false
:is_full_sync true
:organization_id nil
:description nil
:features (mapv name (driver/features (driver/engine->driver :postgres)))}))))
(do
;; Delete all the randomly created Databases we've made so far
(cascade-delete Database :id [not-in (set (filter identity
(for [engine datasets/all-valid-engines]
(datasets/when-testing-engine engine
(:id (get-or-create-test-data-db! (driver/engine->driver engine)))))))])
;; Add an extra DB so we have something to fetch besides the Test DB
(create-db db-name)
;; Now hit the endpoint
(set ((user->client :rasta) :get 200 "database")))))
;; ## GET /api/meta/table/:id/query_metadata
......@@ -163,7 +163,7 @@
:is_full_sync true
:organization_id nil
:description nil
:features (mapv name (metabase.driver/features (metabase.driver/engine->driver :h2)))
:features (mapv name (driver/features (driver/engine->driver :h2)))
:tables [(match-$ (Table (id :categories))
{:description nil
:entity_type nil
......
(ns metabase.test.data.druid
(:require [cheshire.core :as json]
[clojure.java.io :as io]
(:require [clojure.java.io :as io]
[cheshire.core :as json]
[environ.core :refer [env]]
[metabase.driver.druid :as druid]
(metabase.test.data dataset-definitions
(metabase.test.data [dataset-definitions :as defs]
[datasets :as datasets]
[interface :as i])
[metabase.test.util :refer [resolve-private-fns]]
[metabase.util :as u])
(:import metabase.driver.druid.DruidDriver))
(def ^:private ^:const temp-dir (System/getProperty "java.io.tmpdir"))
(def ^:private ^:const source-filename "checkins.json")
(defn- database->connection-details [& _]
{:host (or (env :mb-druid-host)
(throw (Exception. "In order to test Druid, you must specify `MB_DRUID_HOST`.")))
:port (Integer/parseInt (or (env :mb-druid-port)
(throw (Exception. "In order to test Druid, you must specify `MB_DRUID_PORT`."))))})
(extend DruidDriver
i/IDatasetLoader
(merge i/IDatasetLoaderDefaultsMixin
{:engine (constantly :druid)
:database->connection-details database->connection-details
:create-db! (constantly "nil")
:destroy-db! (constantly nil)}))
;;; Setting Up a Server w/ Druid Test Data
;; Unfortunately the process of loading test data onto an external server for CI purposes is a little involved. Before testing against Druid, you'll need to perform the following steps:
;; For EC2 instances, make sure to expose ports `8082` & `8090` for Druid while loading data. Once data has finished loading, you only need to expose port `8082`.
;;
;; 1. Setup Zookeeper
;; 1A. Download & extract Zookeeper from `http://zookeeper.apache.org/releases.html#download`
;; 1B. Create `zookeeper/conf/zoo.cfg` -- see the Getting Started Guide: `http://zookeeper.apache.org/doc/r3.4.6/zookeeperStarted.html`
;; 1C. `zookeeper/bin/zkServer.sh start`
;; 1D. `zookeeper/bin/zkServer.sh status` (to make sure it started correctly)
;; 2. Setup Druid
;; 2A. Download & extract Druid from `http://druid.io/downloads.html`
;; 2B. `cp druid/run_druid_server.sh druid/run_historical.sh` and bump the `-Xmx` setting to `6g` or so
;; 2C. `cd druid && ./run_druid_server.sh coordinator`
;; 2D. `cd druid && ./run_druid_server.sh broker`
;; 2E. `cd druid && ./run_historical.sh historical`
;; 2E. `cd druid && ./run_druid_server.sh overlord`
;; 3. Generate flattened test data file. Optionally pick a <filename>
;; 3A. From this namespace in the REPL, run `(generate-json-for-batch-ingestion <filename>)`
;; 3B. `scp` or otherwise upload this file to the box running druid (if applicable)
;; 4. Launch Druid Indexing Task
;; 4A. Run the indexing task on the remote instance.
;;
;; (run-indexing-task <remote-host> :base-dir <dir-where-you-uploaded-file>, :filename <file>)
;; e.g.
;; (run-indexing-task "http://ec2-52-90-109-199.compute-1.amazonaws.com", :base-dir "/home/ec2-user", :filename "checkins.json")
;;
;; The task will keep you apprised of its progress until it completes (takes 1-2 minutes)
;; 4B. Keep an eye on `<host>:8082/druid/v2/datasources`. (e.g. "http://ec2-52-90-109-199.compute-1.amazonaws.com:8082/druid/v2/datasources")
;; This endpoint will return an empty array until the broker knows about the newly ingested segments. When it returns an array with the string `"checkins"` you're ready to
;; run the tests.
;; 4C. Kill the `overlord` process once the data has finished loading.
;; 5. Running Tests
;; 5A. You can run tests like `ENGINES=druid MB_DRUID_PORT=8082 MB_DRUID_HOST=http://ec2-52-90-109-199.compute-1.amazonaws.com lein test`
(def ^:private ^:const default-filename "Default filename for batched ingestion data file."
"checkins.json")
;;; Generating Data File
(defn- flattened-test-data []
(let [dbdef (i/flatten-dbdef metabase.test.data.dataset-definitions/test-data "checkins")
(let [dbdef (i/flatten-dbdef defs/test-data "checkins")
tabledef (first (:table-definitions dbdef))]
(->> (:rows tabledef)
(map (partial zipmap (map :field-name (:field-definitions tabledef))))
......@@ -29,7 +84,21 @@
(json/generate-stream row writer)
(.append writer \newline)))))
(def ^:private ^:const indexing-task
(defn- generate-json-for-batch-ingestion
"Generate the file to be used for a batched data ingestion for Druid."
([]
(generate-json-for-batch-ingestion default-filename))
([filename]
(write-dbdef-to-json (flattened-test-data) filename)))
;;; Running Indexing Task
(defn- indexing-task
"Create a batched ingestion task dictionary."
[{:keys [base-dir filename]
:or {base-dir "/home/ec2-user"
filename default-filename}}]
{:type :index
:spec {:dataSchema {:dataSource "checkins"
:parser {:type :string
......@@ -53,18 +122,20 @@
:intervals ["2000/2016"]}}
:ioConfig {:type :index
:firehose {:type :local
:baseDir temp-dir
:filter source-filename}}}})
:baseDir base-dir
:filter filename}}}})
(def ^:private ^:const indexer-endpoint "http://localhost:8090/druid/indexer/v1/task")
(def ^:private ^:const indexer-timeout-seconds
"Maximum number of seconds we should wait for the indexing task to finish before deciding it's failed."
120)
180)
(resolve-private-fns metabase.driver.druid GET POST)
(defn- run-indexing-task []
(let [{:keys [task]} (POST indexer-endpoint, :body indexing-task)
(defn- run-indexing-task
"Run a batched ingestion task on HOST."
[host & {:as indexing-task-args}]
(let [indexer-endpoint (str host ":8090/druid/indexer/v1/task")
{:keys [task]} (POST indexer-endpoint, :body (indexing-task indexing-task-args))
status-url (str indexer-endpoint "/" task "/status")]
(println "STATUS URL: " (str indexer-endpoint "/" task "/status"))
(loop [remaining-seconds indexer-timeout-seconds]
......@@ -77,43 +148,3 @@
(throw (Exception. (str "Indexing task failed:\n" (u/pprint-to-str status)))))
(Thread/sleep 1000)
(recur (dec remaining-seconds)))))))
(defn- setup-druid-test-data* []
(println (u/format-color 'blue "Loading druid test data..."))
(write-dbdef-to-json (flattened-test-data) (str temp-dir "/" source-filename))
(run-indexing-task))
#_(defn- setup-druid-test-data
{:expectations-options :before-run}
[]
(datasets/when-testing-engine :druid
(setup-druid-test-data*)))
;; TODO - needs to wait until http://localhost:8082/druid/v2/datasources/checkins?interval=-5000/5000 returns data
#_{:dimensions [:venue_name
:venue_category_name
:user_password
:venue_longitude
:user_name
:id
:venue_latitude
:user_last_login
:venue_price]
:metrics [:count]}
(defn- database->connection-details [this context dbdef]
{:host "http://localhost"
:port 8082})
(extend DruidDriver
i/IDatasetLoader
(merge i/IDatasetLoaderDefaultsMixin
{:engine (constantly :druid)
:database->connection-details database->connection-details
:create-db! (constantly "nil")
:destroy-db! (constantly nil)}))
;; TODO - don't log druid query during sync
;; TODO - make `:paging` a feature?
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment