diff --git a/README.md b/README.md index 6c3dff4579d816e6ca3c12071988c2bd5f01c904..98437a7abbf4fa5609eaa0552d874e3acbd85d7e 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ For more information check out [metabase.com](http://www.metabase.com) - CrateDB - Oracle - Vertica +- Presto Don't see your favorite database? File an issue to let us know. diff --git a/bin/ci b/bin/ci index 461fdffa284967ba745f08edadf10ba88f525c38..4869c1cd4ae31e520fe065a3278870a386de2183 100755 --- a/bin/ci +++ b/bin/ci @@ -19,11 +19,15 @@ node-1() { run_step lein-test } node-2() { - is_enabled "drivers" && export ENGINES="h2,postgres,sqlite" || export ENGINES="h2" + is_enabled "drivers" && export ENGINES="h2,postgres,sqlite,presto" || export ENGINES="h2" if is_engine_enabled "crate"; then run_step install-crate fi + if is_engine_enabled "presto"; then + run_step install-presto + fi MB_ENCRYPTION_SECRET_KEY='Orw0AAyzkO/kPTLJRxiyKoBHXa/d6ZcO+p+gpZO/wSQ=' MB_DB_TYPE=mysql MB_DB_DBNAME=circle_test MB_DB_PORT=3306 MB_DB_USER=ubuntu MB_DB_HOST=localhost \ + MB_PRESTO_HOST=localhost MB_PRESTO_PORT=8080 \ run_step lein-test } node-3() { @@ -91,6 +95,11 @@ install-vertica() { sleep 60 } +install-presto() { + docker run --detach --publish 8080:8080 wiill/presto-mb-ci + sleep 10 +} + lein-test() { lein test } diff --git a/src/metabase/driver/generic_sql/util/unprepare.clj b/src/metabase/driver/generic_sql/util/unprepare.clj index d65502ac180912785228c1ac8058e590ed548f71..56845e692a68a5e0a3500406c0c5609ebed4ffcd 100644 --- a/src/metabase/driver/generic_sql/util/unprepare.clj +++ b/src/metabase/driver/generic_sql/util/unprepare.clj @@ -7,20 +7,20 @@ (:import java.util.Date)) (defprotocol ^:private IUnprepare - (^:private unprepare-arg ^String [this])) + (^:private unprepare-arg ^String [this settings])) (extend-protocol IUnprepare - nil (unprepare-arg [this] "NULL") - String (unprepare-arg [this] (str \' (str/replace this "'" "\\\\'") \')) ; escape single-quotes - Boolean (unprepare-arg [this] (if this "TRUE" "FALSE")) - Number (unprepare-arg [this] (str this)) - Date (unprepare-arg [this] (first (hsql/format (hsql/call :timestamp (hx/literal (u/date->iso-8601 this))))))) ; TODO - this probably doesn't work for every DB! + nil (unprepare-arg [this _] "NULL") + String (unprepare-arg [this {:keys [quote-escape]}] (str \' (str/replace this "'" (str quote-escape "'")) \')) ; escape single-quotes + Boolean (unprepare-arg [this _] (if this "TRUE" "FALSE")) + Number (unprepare-arg [this _] (str this)) + Date (unprepare-arg [this {:keys [iso-8601-fn]}] (first (hsql/format (hsql/call iso-8601-fn (hx/literal (u/date->iso-8601 this))))))) (defn unprepare "Convert a normal SQL `[statement & prepared-statement-args]` vector into a flat, non-prepared statement." - ^String [[sql & args]] + ^String [[sql & args] & {:keys [quote-escape iso-8601-fn], :or {quote-escape "\\\\", iso-8601-fn :timestamp}}] (loop [sql sql, [arg & more-args, :as args] args] (if-not (seq args) sql - (recur (str/replace-first sql #"(?<!\?)\?(?!\?)" (unprepare-arg arg)) + (recur (str/replace-first sql #"(?<!\?)\?(?!\?)" (unprepare-arg arg {:quote-escape quote-escape, :iso-8601-fn iso-8601-fn})) more-args)))) diff --git a/src/metabase/driver/presto.clj b/src/metabase/driver/presto.clj new file mode 100644 index 0000000000000000000000000000000000000000..97630c44ff7b08284a7b8d5a11700d335207def3 --- /dev/null +++ b/src/metabase/driver/presto.clj @@ -0,0 +1,344 @@ +(ns metabase.driver.presto + (:require [clojure.set :as set] + [clojure.string :as str] + [clj-http.client :as http] + (honeysql [core :as hsql] + [helpers :as h]) + [metabase.config :as config] + [metabase.driver :as driver] + [metabase.driver.generic-sql :as sql] + [metabase.driver.generic-sql.util.unprepare :as unprepare] + (metabase.models [field :as field] + [table :as table]) + [metabase.sync-database.analyze :as analyze] + [metabase.query-processor.util :as qputil] + [metabase.util :as u] + [metabase.util.honeysql-extensions :as hx]) + (:import java.util.Date + (metabase.query_processor.interface DateTimeValue Value))) + + +;;; Presto API helpers + +(defn- details->uri + [{:keys [ssl host port]} path] + (str (if ssl "https" "http") "://" host ":" port path)) + +(defn- details->request [{:keys [user password catalog report-timezone]}] + (merge {:headers (merge {"X-Presto-Source" "metabase" + "X-Presto-User" user} + (when catalog + {"X-Presto-Catalog" catalog}) + (when report-timezone + {"X-Presto-Time-Zone" report-timezone}))} + (when password + {:basic-auth [user password]}))) + +(defn- parse-time-with-tz [s] + ;; Try parsing with offset first then with full ZoneId + (or (u/ignore-exceptions (u/parse-date "HH:mm:ss.SSS ZZ" s)) + (u/parse-date "HH:mm:ss.SSS ZZZ" s))) + +(defn- parse-timestamp-with-tz [s] + ;; Try parsing with offset first then with full ZoneId + (or (u/ignore-exceptions (u/parse-date "yyyy-MM-dd HH:mm:ss.SSS ZZ" s)) + (u/parse-date "yyyy-MM-dd HH:mm:ss.SSS ZZZ" s))) + +(defn- field-type->parser [field-type] + (condp re-matches field-type + #"decimal.*" bigdec + #"time" (partial u/parse-date :hour-minute-second-ms) + #"time with time zone" parse-time-with-tz + #"timestamp" (partial u/parse-date "yyyy-MM-dd HH:mm:ss.SSS") + #"timestamp with time zone" parse-timestamp-with-tz + #".*" identity)) + +(defn- parse-presto-results [columns data] + (let [parsers (map (comp field-type->parser :type) columns)] + (for [row data] + (for [[value parser] (partition 2 (interleave row parsers))] + (when value + (parser value)))))) + +(defn- fetch-presto-results! [details {prev-columns :columns, prev-rows :rows} uri] + (let [{{:keys [columns data nextUri error]} :body} (http/get uri (assoc (details->request details) :as :json))] + (when error + (throw (ex-info (or (:message error) "Error running query.") error))) + (let [rows (parse-presto-results columns data) + results {:columns (or columns prev-columns) + :rows (vec (concat prev-rows rows))}] + (if (nil? nextUri) + results + (do (Thread/sleep 100) ; Might not be the best way, but the pattern is that we poll Presto at intervals + (fetch-presto-results! details results nextUri)))))) + +(defn- execute-presto-query! [details query] + (let [{{:keys [columns data nextUri error]} :body} (http/post (details->uri details "/v1/statement") + (assoc (details->request details) :body query, :as :json))] + (when error + (throw (ex-info (or (:message error) "Error preparing query.") error))) + (let [rows (parse-presto-results (or columns []) (or data [])) + results {:columns (or columns []) + :rows rows}] + (if (nil? nextUri) + results + (fetch-presto-results! details results nextUri))))) + + +;;; Generic helpers + +(defn- quote-name [nm] + (str \" (str/replace nm "\"" "\"\"") \")) + +(defn- quote+combine-names [& names] + (str/join \. (map quote-name names))) + + +;;; IDriver implementation + +(defn- field-avg-length [{field-name :name, :as field}] + (let [table (field/table field) + {:keys [details]} (table/database table) + sql (format "SELECT cast(round(avg(length(%s))) AS integer) FROM %s WHERE %s IS NOT NULL" + (quote-name field-name) + (quote+combine-names (:schema table) (:name table)) + (quote-name field-name)) + {[[v]] :rows} (execute-presto-query! details sql)] + (or v 0))) + +(defn- field-percent-urls [{field-name :name, :as field}] + (let [table (field/table field) + {:keys [details]} (table/database table) + sql (format "SELECT cast(count_if(url_extract_host(%s) <> '') AS double) / cast(count(*) AS double) FROM %s WHERE %s IS NOT NULL" + (quote-name field-name) + (quote+combine-names (:schema table) (:name table)) + (quote-name field-name)) + {[[v]] :rows} (execute-presto-query! details sql)] + (if (= v "NaN") 0.0 v))) + +(defn- analyze-table [driver table new-table-ids] + ((analyze/make-analyze-table driver + :field-avg-length-fn field-avg-length + :field-percent-urls-fn field-percent-urls) driver table new-table-ids)) + +(defn- can-connect? [{:keys [catalog] :as details}] + (let [{[[v]] :rows} (execute-presto-query! details (str "SHOW SCHEMAS FROM " (quote-name catalog) " LIKE 'information_schema'"))] + (= v "information_schema"))) + +(defn- date-interval [unit amount] + (hsql/call :date_add (hx/literal unit) amount :%now)) + +(defn- describe-schema [{{:keys [catalog] :as details} :details} {:keys [schema]}] + (let [sql (str "SHOW TABLES FROM " (quote+combine-names catalog schema)) + {:keys [rows]} (execute-presto-query! details sql) + tables (map first rows)] + (set (for [name tables] + {:name name, :schema schema})))) + +(defn- describe-database [{{:keys [catalog] :as details} :details :as database}] + (let [sql (str "SHOW SCHEMAS FROM " (quote-name catalog)) + {:keys [rows]} (execute-presto-query! details sql) + schemas (remove #{"information_schema"} (map first rows))] ; inspecting "information_schema" breaks weirdly + {:tables (apply set/union (for [name schemas] + (describe-schema database {:schema name})))})) + +(defn- presto-type->base-type [field-type] + (condp re-matches field-type + #"boolean" :type/Boolean + #"tinyint" :type/Integer + #"smallint" :type/Integer + #"integer" :type/Integer + #"bigint" :type/BigInteger + #"real" :type/Float + #"double" :type/Float + #"decimal.*" :type/Decimal + #"varchar.*" :type/Text + #"char.*" :type/Text + #"varbinary.*" :type/* + #"json" :type/Text ; TODO - this should probably be Dictionary or something + #"date" :type/Date + #"time.*" :type/DateTime + #"array" :type/Array + #"map" :type/Dictionary + #"row.*" :type/* ; TODO - again, but this time we supposedly have a schema + #".*" :type/*)) + +(defn- describe-table [{{:keys [catalog] :as details} :details} {schema :schema, table-name :name}] + (let [sql (str "DESCRIBE " (quote+combine-names catalog schema table-name)) + {:keys [rows]} (execute-presto-query! details sql)] + {:schema schema + :name table-name + :fields (set (for [[name type] rows] + {:name name, :base-type (presto-type->base-type type)}))})) + +(defprotocol ^:private IPrepareValue + (^:private prepare-value [this])) +(extend-protocol IPrepareValue + nil (prepare-value [_] nil) + DateTimeValue (prepare-value [{:keys [value]}] (prepare-value value)) + Value (prepare-value [{:keys [value]}] (prepare-value value)) + String (prepare-value [this] (hx/literal (str/replace this "'" "''"))) + Boolean (prepare-value [this] (hsql/raw (if this "TRUE" "FALSE"))) + Date (prepare-value [this] (hsql/call :from_iso8601_timestamp (hx/literal (u/date->iso-8601 this)))) + Number (prepare-value [this] this) + Object (prepare-value [this] (throw (Exception. (format "Don't know how to prepare value %s %s" (class this) this))))) + +(defn- execute-query [{:keys [database settings], {sql :query, params :params} :native, :as outer-query}] + (let [sql (str "-- " (qputil/query->remark outer-query) "\n" + (unprepare/unprepare (cons sql params) :quote-escape "'", :iso-8601-fn :from_iso8601_timestamp)) + details (merge (:details database) settings) + {:keys [columns rows]} (execute-presto-query! details sql)] + {:columns (map (comp keyword :name) columns) + :rows rows})) + +(defn- field-values-lazy-seq [{field-name :name, :as field}] + ;; TODO - look into making this actually lazy + (let [table (field/table field) + {:keys [details]} (table/database table) + sql (format "SELECT %s FROM %s LIMIT %d" + (quote-name field-name) + (quote+combine-names (:schema table) (:name table)) + driver/max-sync-lazy-seq-results) + {:keys [rows]} (execute-presto-query! details sql)] + (for [row rows] + (first row)))) + +(defn- humanize-connection-error-message [message] + (condp re-matches message + #"^java.net.ConnectException: Connection refused.*$" + (driver/connection-error-messages :cannot-connect-check-host-and-port) + + #"^clojure.lang.ExceptionInfo: Catalog .* does not exist.*$" + (driver/connection-error-messages :database-name-incorrect) + + #"^java.net.UnknownHostException.*$" + (driver/connection-error-messages :invalid-hostname) + + #".*" ; default + message)) + +(defn- table-rows-seq [{:keys [details]} {:keys [schema name]}] + (let [sql (format "SELECT * FROM %s" (quote+combine-names schema name)) + {:keys [rows], :as result} (execute-presto-query! details sql) + columns (map (comp keyword :name) (:columns result))] + (for [row rows] + (zipmap columns row)))) + + +;;; ISQLDriver implementation + +(defn- apply-page [honeysql-query {{:keys [items page]} :page}] + (let [offset (* (dec page) items)] + (if (zero? offset) + ;; if there's no offset we can simply use limit + (h/limit honeysql-query items) + ;; if we need to do an offset we have to do nesting to generate a row number and where on that + (let [over-clause (format "row_number() OVER (%s)" + (first (hsql/format (select-keys honeysql-query [:order-by]) + :allow-dashed-names? true + :quoting :ansi)))] + (-> (apply h/select (map last (:select honeysql-query))) + (h/from (h/merge-select honeysql-query [(hsql/raw over-clause) :__rownum__])) + (h/where [:> :__rownum__ offset]) + (h/limit items)))))) + +(defn- date [unit expr] + (case unit + :default expr + :minute (hsql/call :date_trunc (hx/literal :minute) expr) + :minute-of-hour (hsql/call :minute expr) + :hour (hsql/call :date_trunc (hx/literal :hour) expr) + :hour-of-day (hsql/call :hour expr) + :day (hsql/call :date_trunc (hx/literal :day) expr) + ;; Presto is ISO compliant, so we need to offset Monday = 1 to Sunday = 1 + :day-of-week (hx/+ (hx/mod (hsql/call :day_of_week expr) 7) 1) + :day-of-month (hsql/call :day expr) + :day-of-year (hsql/call :day_of_year expr) + ;; Similar to DoW, sicne Presto is ISO compliant the week starts on Monday, we need to shift that to Sunday + :week (hsql/call :date_add (hx/literal :day) -1 (hsql/call :date_trunc (hx/literal :week) (hsql/call :date_add (hx/literal :day) 1 expr))) + ;; Offset by one day forward to "fake" a Sunday starting week + :week-of-year (hsql/call :week (hsql/call :date_add (hx/literal :day) 1 expr)) + :month (hsql/call :date_trunc (hx/literal :month) expr) + :month-of-year (hsql/call :month expr) + :quarter (hsql/call :date_trunc (hx/literal :quarter) expr) + :quarter-of-year (hsql/call :quarter expr) + :year (hsql/call :year expr))) + +(defn- string-length-fn [field-key] + (hsql/call :length field-key)) + +(defn- unix-timestamp->timestamp [expr seconds-or-milliseconds] + (case seconds-or-milliseconds + :seconds (hsql/call :from_unixtime expr) + :milliseconds (recur (hx// expr 1000.0) :seconds))) + + +;;; Driver implementation + +(defrecord PrestoDriver [] + clojure.lang.Named + (getName [_] "Presto")) + +(u/strict-extend PrestoDriver + driver/IDriver + (merge (sql/IDriverSQLDefaultsMixin) + {:analyze-table analyze-table + :can-connect? (u/drop-first-arg can-connect?) + :date-interval (u/drop-first-arg date-interval) + :describe-database (u/drop-first-arg describe-database) + :describe-table (u/drop-first-arg describe-table) + :describe-table-fks (constantly nil) ; no FKs in Presto + :details-fields (constantly [{:name "host" + :display-name "Host" + :default "localhost"} + {:name "port" + :display-name "Port" + :type :integer + :default 8080} + {:name "catalog" + :display-name "Database name" + :placeholder "hive" + :required true} + {:name "user" + :display-name "Database username" + :placeholder "What username do you use to login to the database" + :default "metabase"} + {:name "password" + :display-name "Database password" + :type :password + :placeholder "*******"} + {:name "ssl" + :display-name "Use a secure connection (SSL)?" + :type :boolean + :default false}]) + :execute-query (u/drop-first-arg execute-query) + :features (constantly (set/union #{:set-timezone + :basic-aggregations + :standard-deviation-aggregations + :expressions + :native-parameters + :expression-aggregations} + (when-not config/is-test? + ;; during unit tests don't treat presto as having FK support + #{:foreign-keys}))) + :field-values-lazy-seq (u/drop-first-arg field-values-lazy-seq) + :humanize-connection-error-message (u/drop-first-arg humanize-connection-error-message) + :table-rows-seq (u/drop-first-arg table-rows-seq)}) + + sql/ISQLDriver + (merge (sql/ISQLDriverDefaultsMixin) + {:apply-page (u/drop-first-arg apply-page) + :column->base-type (constantly nil) + :connection-details->spec (constantly nil) + :current-datetime-fn (constantly :%now) + :date (u/drop-first-arg date) + :excluded-schemas (constantly #{"information_schema"}) + :field-percent-urls (u/drop-first-arg field-percent-urls) + :prepare-value (u/drop-first-arg prepare-value) + :quote-style (constantly :ansi) + :stddev-fn (constantly :stddev_samp) + :string-length-fn (u/drop-first-arg string-length-fn) + :unix-timestamp->timestamp (u/drop-first-arg unix-timestamp->timestamp)})) + + +(driver/register-driver! :presto (PrestoDriver.)) diff --git a/test/metabase/driver/generic_sql/util/unprepare_test.clj b/test/metabase/driver/generic_sql/util/unprepare_test.clj index b20a4d91e9bbf2cad244e8258bd19c5de1c573de..ce98bb08df85eeff569c6943d0b1a9155d57d096 100644 --- a/test/metabase/driver/generic_sql/util/unprepare_test.clj +++ b/test/metabase/driver/generic_sql/util/unprepare_test.clj @@ -8,3 +8,12 @@ "Cam's Cool Toucan" true #inst "2017-01-01T00:00:00.000Z"])) + +(expect + "SELECT 'Cam''s Cool Toucan' FROM TRUE WHERE x ?? y AND z = from_iso8601_timestamp('2017-01-01T00:00:00.000Z')" + (unprepare/unprepare ["SELECT ? FROM ? WHERE x ?? y AND z = ?" + "Cam's Cool Toucan" + true + #inst "2017-01-01T00:00:00.000Z"] + :quote-escape "'" + :iso-8601-fn :from_iso8601_timestamp)) diff --git a/test/metabase/driver/generic_sql_test.clj b/test/metabase/driver/generic_sql_test.clj index bbb68a74743a7d5775c342a05c8827bc168405f3..4c51952cf9e49066ce8c3f5276433e1511db5218 100644 --- a/test/metabase/driver/generic_sql_test.clj +++ b/test/metabase/driver/generic_sql_test.clj @@ -19,7 +19,7 @@ (def ^:private generic-sql-engines (delay (set (for [engine datasets/all-valid-engines :let [driver (driver/engine->driver engine)] - :when (not= engine :bigquery) ; bigquery doesn't use the generic sql implementations of things like `field-avg-length` + :when (not (contains? #{:bigquery :presto} engine)) ; bigquery and presto don't use the generic sql implementations of things like `field-avg-length` :when (extends? ISQLDriver (class driver))] (do (require (symbol (str "metabase.test.data." (name engine))) :reload) ; otherwise it gets all snippy if you try to do `lein test metabase.driver.generic-sql-test` engine))))) diff --git a/test/metabase/driver/presto_test.clj b/test/metabase/driver/presto_test.clj new file mode 100644 index 0000000000000000000000000000000000000000..86502b962ef26390c1e6f3ae9c777b69409dc94d --- /dev/null +++ b/test/metabase/driver/presto_test.clj @@ -0,0 +1,143 @@ +(ns metabase.driver.presto-test + (:require [expectations :refer :all] + [toucan.db :as db] + [metabase.driver :as driver] + [metabase.driver.generic-sql :as sql] + [metabase.models.table :as table] + [metabase.test.data :as data] + [metabase.test.data.datasets :as datasets] + [metabase.test.util :refer [resolve-private-vars]]) + (:import (metabase.driver.presto PrestoDriver))) + +(resolve-private-vars metabase.driver.presto details->uri details->request parse-presto-results quote-name quote+combine-names apply-page) + +;;; HELPERS + +(expect + "http://localhost:8080/" + (details->uri {:host "localhost", :port 8080, :ssl false} "/")) + +(expect + "https://localhost:8443/" + (details->uri {:host "localhost", :port 8443, :ssl true} "/")) + +(expect + "http://localhost:8080/v1/statement" + (details->uri {:host "localhost", :port 8080, :ssl false} "/v1/statement")) + +(expect + {:headers {"X-Presto-Source" "metabase" + "X-Presto-User" "user"}} + (details->request {:user "user"})) + +(expect + {:headers {"X-Presto-Source" "metabase" + "X-Presto-User" "user"} + :basic-auth ["user" "test"]} + (details->request {:user "user", :password "test"})) + +(expect + {:headers {"X-Presto-Source" "metabase" + "X-Presto-User" "user" + "X-Presto-Catalog" "test_data" + "X-Presto-Time-Zone" "America/Toronto"}} + (details->request {:user "user", :catalog "test_data", :report-timezone "America/Toronto"})) + +(expect + [["2017-04-03" + #inst "2017-04-03T14:19:17.417000000-00:00" + #inst "2017-04-03T10:19:17.417000000-00:00" + 3.1416M + "test"]] + (parse-presto-results [{:type "date"} {:type "timestamp with time zone"} {:type "timestamp"} {:type "decimal(10,4)"} {:type "varchar(255)"}] + [["2017-04-03", "2017-04-03 10:19:17.417 America/Toronto", "2017-04-03 10:19:17.417", "3.1416", "test"]])) + +(expect + "\"weird.table\"\" name\"" + (quote-name "weird.table\" name")) + +(expect + "\"weird . \"\"schema\".\"weird.table\"\" name\"" + (quote+combine-names "weird . \"schema" "weird.table\" name")) + +;; DESCRIBE-DATABASE +(datasets/expect-with-engine :presto + {:tables #{{:name "categories" :schema "default"} + {:name "venues" :schema "default"} + {:name "checkins" :schema "default"} + {:name "users" :schema "default"}}} + (driver/describe-database (PrestoDriver.) (data/db))) + +;; DESCRIBE-TABLE +(datasets/expect-with-engine :presto + {:name "venues" + :schema "default" + :fields #{{:name "name", + :base-type :type/Text} + {:name "latitude" + :base-type :type/Float} + {:name "longitude" + :base-type :type/Float} + {:name "price" + :base-type :type/Integer} + {:name "category_id" + :base-type :type/Integer} + {:name "id" + :base-type :type/Integer}}} + (driver/describe-table (PrestoDriver.) (data/db) (db/select-one 'Table :id (data/id :venues)))) + +;;; ANALYZE-TABLE +(datasets/expect-with-engine :presto + {:row_count 100 + :fields [{:id (data/id :venues :category_id), :values [2 3 4 5 6 7 10 11 12 13 14 15 18 19 20 29 40 43 44 46 48 49 50 58 64 67 71 74]} + {:id (data/id :venues :id)} + {:id (data/id :venues :latitude)} + {:id (data/id :venues :longitude)} + {:id (data/id :venues :name), :values (db/select-one-field :values 'FieldValues, :field_id (data/id :venues :name))} + {:id (data/id :venues :price), :values [1 2 3 4]}]} + (let [venues-table (db/select-one 'Table :id (data/id :venues))] + (driver/analyze-table (PrestoDriver.) venues-table (set (mapv :id (table/fields venues-table)))))) + +;;; FIELD-VALUES-LAZY-SEQ +(datasets/expect-with-engine :presto + ["Red Medicine" + "Stout Burgers & Beers" + "The Apple Pan" + "Wurstküche" + "Brite Spot Family Restaurant"] + (take 5 (driver/field-values-lazy-seq (PrestoDriver.) (db/select-one 'Field :id (data/id :venues :name))))) + +;;; TABLE-ROWS-SEQ +(datasets/expect-with-engine :presto + [{:name "Red Medicine", :price 3, :category_id 4, :id 1} + {:name "Stout Burgers & Beers", :price 2, :category_id 11, :id 2} + {:name "The Apple Pan", :price 2, :category_id 11, :id 3} + {:name "Wurstküche", :price 2, :category_id 29, :id 4} + {:name "Brite Spot Family Restaurant", :price 2, :category_id 20, :id 5}] + (for [row (take 5 (sort-by :id (driver/table-rows-seq (PrestoDriver.) + (db/select-one 'Database :id (data/id)) + (db/select-one 'RawTable :id (db/select-one-field :raw_table_id 'Table, :id (data/id :venues))))))] + (-> (dissoc row :latitude :longitude) + (update :price int) + (update :category_id int) + (update :id int)))) + +;;; FIELD-PERCENT-URLS +(datasets/expect-with-engine :presto + 0.5 + (data/dataset half-valid-urls + (sql/field-percent-urls (PrestoDriver.) (db/select-one 'Field :id (data/id :urls :url))))) + +;;; APPLY-PAGE +(expect + {:select ["name" "id"] + :from [{:select [[:default.categories.name "name"] [:default.categories.id "id"] [{:s "row_number() OVER (ORDER BY \"default\".\"categories\".\"id\" ASC)"} :__rownum__]] + :from [:default.categories] + :order-by [[:default.categories.id :asc]]}] + :where [:> :__rownum__ 5] + :limit 5} + (apply-page {:select [[:default.categories.name "name"] [:default.categories.id "id"]] + :from [:default.categories] + :order-by [[:default.categories.id :asc]]} + {:page {:page 2 + :items 5}})) diff --git a/test/metabase/query_processor/sql_parameters_test.clj b/test/metabase/query_processor/sql_parameters_test.clj index 1fbf45c83e4e6c00ad3494fe62c0542ea2802f8d..dda33ecf8b2442c1e0c354b4914fd7f1595774aa 100644 --- a/test/metabase/query_processor/sql_parameters_test.clj +++ b/test/metabase/query_processor/sql_parameters_test.clj @@ -428,9 +428,10 @@ (generic-sql/quote-name datasets/*driver* identifier)) (defn- checkins-identifier [] - ;; HACK ! I don't have all day to write protocol methods to make this work the "right" way so for BigQuery we will just hackily return the correct identifier here - (if (= datasets/*engine* :bigquery) - "[test_data.checkins]" + ;; HACK ! I don't have all day to write protocol methods to make this work the "right" way so for BigQuery and Presto we will just hackily return the correct identifier here + (case datasets/*engine* + :bigquery "[test_data.checkins]" + :presto "\"default\".\"checkins\"" (let [{table-name :name, schema :schema} (db/select-one ['Table :name :schema], :id (data/id :checkins))] (str (when (seq schema) (str (quote-name schema) \.)) diff --git a/test/metabase/query_processor_test/aggregation_test.clj b/test/metabase/query_processor_test/aggregation_test.clj index 759d5e26e57a62c485d473a0a5534ccaf22ad3fd..d6b8267866176ce7e2bc01ae7810ebcb0366b60f 100644 --- a/test/metabase/query_processor_test/aggregation_test.clj +++ b/test/metabase/query_processor_test/aggregation_test.clj @@ -149,8 +149,8 @@ (ql/aggregation (ql/avg $price) (ql/count) (ql/sum $price)))))) ;; make sure that multiple aggregations of the same type have the correct metadata (#4003) -;; (TODO - this isn't tested against Mongo or BigQuery because those drivers don't currently work correctly with multiple columns with the same name) -(datasets/expect-with-engines (disj non-timeseries-engines :mongo :bigquery) +;; (TODO - this isn't tested against Mongo, BigQuery or Presto because those drivers don't currently work correctly with multiple columns with the same name) +(datasets/expect-with-engines (disj non-timeseries-engines :mongo :bigquery :presto) [(aggregate-col :count) (assoc (aggregate-col :count) :display_name "count_2" diff --git a/test/metabase/query_processor_test/date_bucketing_test.clj b/test/metabase/query_processor_test/date_bucketing_test.clj index eeee25db76c9fc3660e461a2f403d29bcd9cb6a5..603e8d93057fb3fdafc4cd9473181f0a7b6fbef5 100644 --- a/test/metabase/query_processor_test/date_bucketing_test.clj +++ b/test/metabase/query_processor_test/date_bucketing_test.clj @@ -37,7 +37,7 @@ ["2015-06-02 08:20:00" 1] ["2015-06-02 11:11:00" 1]] - (contains? #{:redshift :sqlserver :bigquery :mongo :postgres :vertica :h2 :oracle} *engine*) + (contains? #{:redshift :sqlserver :bigquery :mongo :postgres :vertica :h2 :oracle :presto} *engine*) [["2015-06-01T10:31:00.000Z" 1] ["2015-06-01T16:06:00.000Z" 1] ["2015-06-01T17:23:00.000Z" 1] @@ -246,7 +246,7 @@ (contains? #{:sqlserver :sqlite :crate :oracle} *engine*) [[23 54] [24 46] [25 39] [26 61]] - (contains? #{:mongo :redshift :bigquery :postgres :vertica :h2} *engine*) + (contains? #{:mongo :redshift :bigquery :postgres :vertica :h2 :presto} *engine*) [[23 46] [24 47] [25 40] [26 60] [27 7]] :else diff --git a/test/metabase/test/data/presto.clj b/test/metabase/test/data/presto.clj new file mode 100644 index 0000000000000000000000000000000000000000..790907b1abf8aef6f032d5bdc710f1c8ee235c7c --- /dev/null +++ b/test/metabase/test/data/presto.clj @@ -0,0 +1,107 @@ +(ns metabase.test.data.presto + (:require [clojure.string :as s] + [environ.core :refer [env]] + (honeysql [core :as hsql] + [helpers :as h]) + [metabase.driver.generic-sql.util.unprepare :as unprepare] + [metabase.test.data.interface :as i] + [metabase.test.util :refer [resolve-private-vars]] + [metabase.util :as u] + [metabase.util.honeysql-extensions :as hx]) + (:import java.util.Date + metabase.driver.presto.PrestoDriver + (metabase.query_processor.interface DateTimeValue Value))) + +(resolve-private-vars metabase.driver.presto execute-presto-query! presto-type->base-type quote-name quote+combine-names) + +;;; Helpers + +(defn- get-env-var [env-var] + (or (env (keyword (format "mb-presto-%s" (name env-var)))) + (throw (Exception. (format "In order to test Presto, you must specify the env var MB_PRESTO_%s." + (s/upper-case (s/replace (name env-var) #"-" "_"))))))) + + +;;; IDatasetLoader implementation + +(defn- database->connection-details [context {:keys [database-name]}] + (merge {:host (get-env-var :host) + :port (get-env-var :port) + :user "metabase" + :ssl false} + (when (= context :db) + {:catalog database-name}))) + +(defn- qualify-name + ;; we have to use the default schema from the in-memory connectory + ([db-name] [db-name]) + ([db-name table-name] [db-name "default" table-name]) + ([db-name table-name field-name] [db-name "default" table-name field-name])) + +(defn- qualify+quote-name [& names] + (apply quote+combine-names (apply qualify-name names))) + +(defn- field-base-type->dummy-value [field-type] + ;; we need a dummy value for every base-type to make a properly typed SELECT statement + (if (keyword? field-type) + (case field-type + :type/Boolean "TRUE" + :type/Integer "1" + :type/BigInteger "cast(1 AS bigint)" + :type/Float "1.0" + :type/Decimal "DECIMAL '1.0'" + :type/Text "cast('' AS varchar(255))" + :type/Date "current_timestamp" ; this should probably be a date type, but the test data begs to differ + :type/DateTime "current_timestamp" + "from_hex('00')") ; this might not be the best default ever + ;; we were given a native type, map it back to a base-type and try again + (field-base-type->dummy-value (presto-type->base-type field-type)))) + +(defn- create-table-sql [{:keys [database-name]} {:keys [table-name], :as tabledef}] + (let [field-definitions (conj (:field-definitions tabledef) {:field-name "id", :base-type :type/Integer}) + dummy-values (map (comp field-base-type->dummy-value :base-type) field-definitions) + columns (map :field-name field-definitions)] + ;; Presto won't let us use the `CREATE TABLE (...)` form, but we can still do it creatively if we select the right types out of thin air + (format "CREATE TABLE %s AS SELECT * FROM (VALUES (%s)) AS t (%s) WHERE 1 = 0" + (qualify+quote-name database-name table-name) + (s/join \, dummy-values) + (s/join \, (map quote-name columns))))) + +(defn- drop-table-if-exists-sql [{:keys [database-name]} {:keys [table-name]}] + (str "DROP TABLE IF EXISTS " (qualify+quote-name database-name table-name))) + +(defn- insert-sql [{:keys [database-name]} {:keys [table-name], :as tabledef} rows] + (let [field-definitions (conj (:field-definitions tabledef) {:field-name "id"}) + columns (map (comp keyword :field-name) field-definitions) + [query & params] (-> (apply h/columns columns) + (h/insert-into (apply hsql/qualify (qualify-name database-name table-name))) + (h/values rows) + (hsql/format :allow-dashed-names? true, :quoting :ansi))] + (if (nil? params) + query + (unprepare/unprepare (cons query params) :quote-escape "'", :iso-8601-fn :from_iso8601_timestamp)))) + +(defn- create-db! [{:keys [table-definitions] :as dbdef}] + (let [details (database->connection-details :db dbdef)] + (doseq [tabledef table-definitions + :let [rows (:rows tabledef) + keyed-rows (map-indexed (fn [i row] (conj row (inc i))) rows) ; generate an ID for each row because we don't have auto increments + batches (partition 100 100 nil keyed-rows)]] ; make 100 rows batches since we have to inline everything + (execute-presto-query! details (drop-table-if-exists-sql dbdef tabledef)) + (execute-presto-query! details (create-table-sql dbdef tabledef)) + (doseq [batch batches] + (execute-presto-query! details (insert-sql dbdef tabledef batch)))))) + + +;;; IDatasetLoader implementation + +(u/strict-extend PrestoDriver + i/IDatasetLoader + (merge i/IDatasetLoaderDefaultsMixin + {:engine (constantly :presto) + :database->connection-details (u/drop-first-arg database->connection-details) + :create-db! (u/drop-first-arg create-db!) + :default-schema (constantly "default") + :format-name (u/drop-first-arg s/lower-case) + ;; FIXME Presto actually has very good timezone support + :has-questionable-timezone-support? (constantly true)}))