Skip to content
Snippets Groups Projects
Unverified Commit 10037cda authored by Cam Saul's avatar Cam Saul Committed by GitHub
Browse files

BigQuery code improvements (#11024)

parent cbabdf4e
No related branches found
No related tags found
No related merge requests found
(ns metabase.driver.bigquery
(:require [clj-time
[coerce :as tcoerce]
[core :as time]
[format :as tformat]]
(:require [clj-time.core :as time]
[clojure
[set :as set]
[string :as str]]
[honeysql
[core :as hsql]
[helpers :as h]]
[metabase
[driver :as driver]
[util :as u]]
[metabase.driver
[common :as driver.common]
[google :as google]]
[metabase.driver.sql.query-processor :as sql.qp]
[metabase.driver.bigquery
[common :as bigquery.common]
[query-processor :as bigquery.qp]]
[metabase.driver.sql.util.unprepare :as unprepare]
[metabase.mbql.util :as mbql.u]
[metabase.models.table :as table]
[metabase.query-processor
[store :as qp.store]
[util :as qputil]]
[metabase.util
[date :as du]
[honeysql-extensions :as hx]
[schema :as su]]
[schema.core :as s]
[toucan.db :as db])
[schema.core :as s])
(:import com.google.api.client.googleapis.auth.oauth2.GoogleCredential
com.google.api.client.http.HttpRequestInitializer
[com.google.api.services.bigquery Bigquery Bigquery$Builder BigqueryScopes]
[com.google.api.services.bigquery.model QueryRequest QueryResponse Table TableCell TableFieldSchema TableList
TableList$Tables TableReference TableRow TableSchema]
[com.google.api.services.bigquery.model QueryRequest QueryResponse Table TableCell TableFieldSchema
TableList TableList$Tables TableReference TableRow TableSchema]
java.sql.Time
[java.util Collections Date]
metabase.util.honeysql_extensions.Identifier))
[java.util Collections Date]))
(driver/register! :bigquery, :parent #{:google :sql})
(defn- valid-bigquery-identifier?
"Is String `s` a valid BigQuery identifier? Identifiers are only allowed to contain letters, numbers, and underscores;
cannot start with a number; and can be at most 128 characters long."
[s]
(boolean
(and (string? s)
(re-matches #"^([a-zA-Z_][a-zA-Z_0-9]*){1,128}$" s))))
(def ^:private BigQueryIdentifierString
(s/pred valid-bigquery-identifier? "Valid BigQuery identifier"))
(s/defn ^:private dataset-name-for-current-query :- BigQueryIdentifierString
"Fetch the dataset name for the database associated with this query, needed because BigQuery requires you to qualify
identifiers with it. This is primarily called automatically for the `to-sql` implementation of the
`BigQueryIdentifier` record type; see its definition for more details."
[]
(when (qp.store/initialized?)
(some-> (qp.store/database) :details :dataset-id)))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | Client |
;;; +----------------------------------------------------------------------------------------------------------------+
......@@ -117,7 +88,6 @@
;; g2g
(boolean (list-tables {:details details-map})))
(s/defn get-table :- Table
([{{:keys [project-id dataset-id]} :details, :as database} table-id]
(get-table (database->client database) project-id dataset-id table-id))
......@@ -154,11 +124,72 @@
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | Running Queries & Parsing Results |
;;; | Running Queries |
;;; +----------------------------------------------------------------------------------------------------------------+
(def ^:private ^:const ^Integer query-timeout-seconds 60)
(defn do-with-finished-response
"Impl for `with-finished-response`."
{:style/indent 1}
[^QueryResponse response, f]
;; 99% of the time by the time this is called `.getJobComplete` will return `true`. On the off chance it doesn't,
;; wait a few seconds for the job to finish.
(loop [remaining-timeout (double query-timeout-seconds)]
(cond
(.getJobComplete response)
(f response)
(pos? remaining-timeout)
(do
(Thread/sleep 250)
(recur (- remaining-timeout 0.25)))
:else
(throw (ex-info "Query timed out." (into {} response))))))
(defmacro with-finished-response
"Exeecute `body` with after waiting for `response` to complete. Throws exception if response does not complete before
`query-timeout-seconds`.
(with-finished-response [response (execute-bigquery ...)]
...)"
[[response-binding response] & body]
`(do-with-finished-response
~response
(fn [~(vary-meta response-binding assoc :tag 'com.google.api.services.bigquery.model.QueryResponse)]
~@body)))
(defn- post-process-native
"Parse results of a BigQuery query."
[^QueryResponse resp]
(with-finished-response [response resp]
(let [^TableSchema schema
(.getSchema response)
parsers
(doall
(for [^TableFieldSchema field (.getFields schema)
:let [column-type (.getType field)
method (get-method bigquery.qp/parse-result-of-type column-type)]]
(partial method column-type bigquery.common/*bigquery-timezone*)))
columns
(for [column (table-schema->metabase-field-info schema)]
(-> column
(set/rename-keys {:base-type :base_type})
(dissoc :database-type)))]
{:columns (map (comp u/qualified-name :name) columns)
:cols columns
:rows (for [^TableRow row (.getRows response)]
(for [[^TableCell cell, parser] (partition 2 (interleave (.getF row) parsers))]
(when-let [v (.getV cell)]
;; There is a weird error where everything that *should* be NULL comes back as an Object.
;; See https://jira.talendforge.org/browse/TBD-1592
;; Everything else comes back as a String luckily so we can proceed normally.
(when-not (= (class v) Object)
(parser v)))))})))
(defn- ^QueryResponse execute-bigquery
([{{:keys [project-id]} :details, :as database} query-string]
(execute-bigquery (database->client database) project-id query-string))
......@@ -172,84 +203,6 @@
(.setQuery query-string))]
(google/execute (.query (.jobs client) project-id request)))))
(def ^:private ^:dynamic *bigquery-timezone*
"BigQuery stores all of it's timestamps in UTC. That timezone can be changed via a SQL function invocation in a
native query, but that change in timezone is not conveyed through the BigQuery API. In most situations
`*bigquery-timezone*` will just be UTC. If the user is always changing the timezone via native SQL function
invocation, they can set their JVM TZ to the correct timezone, mark `use-jvm-timezone` to `true` and that will bind
this dynamic var to the JVM TZ rather than UTC"
time/utc)
(defn- parse-timestamp-str [timezone]
(fn [s]
;; Timestamp strings either come back as ISO-8601 strings or Unix timestamps in µs, e.g. "1.3963104E9"
(or
(du/->Timestamp s timezone)
;; If parsing as ISO-8601 fails parse as a double then convert to ms. This is ms since epoch in UTC. By using
;; `->Timestamp`, it will convert from ms in UTC to a timestamp object in the JVM timezone
(du/->Timestamp (* (Double/parseDouble s) 1000)))))
(defn- bigquery-time-format [timezone]
(tformat/formatter "HH:mm:SS" timezone))
(defn- parse-bigquery-time [timezone]
(fn [time-string]
(->> time-string
(tformat/parse (bigquery-time-format timezone))
tcoerce/to-long
Time.)))
(defn- unparse-bigquery-time [timezone coercible-to-dt]
(->> coercible-to-dt
tcoerce/to-date-time
(tformat/unparse (bigquery-time-format timezone))))
(def ^:private type->parser
"Functions that should be used to coerce string values in responses to the appropriate type for their column."
{"BOOLEAN" (constantly #(Boolean/parseBoolean %))
"FLOAT" (constantly #(Double/parseDouble %))
"INTEGER" (constantly #(Long/parseLong %))
"NUMERIC" (constantly #(bigdec %))
"RECORD" (constantly identity)
"STRING" (constantly identity)
"DATE" parse-timestamp-str
"DATETIME" parse-timestamp-str
"TIMESTAMP" parse-timestamp-str
"TIME" parse-bigquery-time})
(defn- post-process-native
([^QueryResponse response]
(post-process-native response query-timeout-seconds))
([^QueryResponse response, ^Integer timeout-seconds]
(if-not (.getJobComplete response)
;; 99% of the time by the time this is called `.getJobComplete` will return `true`. On the off chance it doesn't,
;; wait a few seconds for the job to finish.
(do
(when (zero? timeout-seconds)
(throw (ex-info "Query timed out." (into {} response))))
(Thread/sleep 1000)
(post-process-native response (dec timeout-seconds)))
;; Otherwise the job *is* complete
(let [^TableSchema schema (.getSchema response)
parsers (doall
(for [^TableFieldSchema field (.getFields schema)
:let [parser-fn (type->parser (.getType field))]]
(parser-fn *bigquery-timezone*)))
columns (for [column (table-schema->metabase-field-info schema)]
(-> column
(set/rename-keys {:base-type :base_type})
(dissoc :database-type)))]
{:columns (map (comp u/qualified-name :name) columns)
:cols columns
:rows (for [^TableRow row (.getRows response)]
(for [[^TableCell cell, parser] (partition 2 (interleave (.getF row) parsers))]
(when-let [v (.getV cell)]
;; There is a weird error where everything that *should* be NULL comes back as an Object.
;; See https://jira.talendforge.org/browse/TBD-1592
;; Everything else comes back as a String luckily so we can proceed normally.
(when-not (= (class v) Object)
(parser v)))))}))))
(defn- process-native* [database query-string]
{:pre [(map? database) (map? (:details database))]}
;; automatically retry the query if it times out or otherwise fails. This is on top of the auto-retry added by
......@@ -257,175 +210,6 @@
(u/auto-retry 1
(post-process-native (execute-bigquery database query-string))))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | SQL Driver Methods |
;;; +----------------------------------------------------------------------------------------------------------------+
(defn- trunc
"Generate raw SQL along the lines of `timestamp_trunc(cast(<some-field> AS timestamp), day)`"
[unit expr]
(hsql/call :timestamp_trunc (hx/->timestamp expr) (hsql/raw (name unit))))
(defn- extract [unit expr]
;; implemenation of extract() in `metabase.util.honeysql-extensions` handles actual conversion to raw SQL (!)
(hsql/call :extract unit (hx/->timestamp expr)))
(defmethod sql.qp/date [:bigquery :minute] [_ _ expr] (trunc :minute expr))
(defmethod sql.qp/date [:bigquery :minute-of-hour] [_ _ expr] (extract :minute expr))
(defmethod sql.qp/date [:bigquery :hour] [_ _ expr] (trunc :hour expr))
(defmethod sql.qp/date [:bigquery :hour-of-day] [_ _ expr] (extract :hour expr))
(defmethod sql.qp/date [:bigquery :day] [_ _ expr] (trunc :day expr))
(defmethod sql.qp/date [:bigquery :day-of-week] [_ _ expr] (extract :dayofweek expr))
(defmethod sql.qp/date [:bigquery :day-of-month] [_ _ expr] (extract :day expr))
(defmethod sql.qp/date [:bigquery :day-of-year] [_ _ expr] (extract :dayofyear expr))
(defmethod sql.qp/date [:bigquery :week] [_ _ expr] (trunc :week expr))
;; ; BigQuery's impl of `week` uses 0 for the first week; we use 1
(defmethod sql.qp/date [:bigquery :week-of-year] [_ _ expr] (-> (extract :week expr) hx/inc))
(defmethod sql.qp/date [:bigquery :month] [_ _ expr] (trunc :month expr))
(defmethod sql.qp/date [:bigquery :month-of-year] [_ _ expr] (extract :month expr))
(defmethod sql.qp/date [:bigquery :quarter] [_ _ expr] (trunc :quarter expr))
(defmethod sql.qp/date [:bigquery :quarter-of-year] [_ _ expr] (extract :quarter expr))
(defmethod sql.qp/date [:bigquery :year] [_ _ expr] (trunc :year expr))
(defmethod sql.qp/unix-timestamp->timestamp [:bigquery :seconds] [_ _ expr]
(hsql/call :timestamp_seconds expr))
(defmethod sql.qp/unix-timestamp->timestamp [:bigquery :milliseconds] [_ _ expr]
(hsql/call :timestamp_millis expr))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | Query Processor |
;;; +----------------------------------------------------------------------------------------------------------------+
(defn- should-qualify-identifier?
"Should we qualify an Identifier with the dataset name?
Table & Field identifiers (usually) need to be qualified with the current dataset name; this needs to be part of the
table e.g.
`table`.`field` -> `dataset.table`.`field`"
[{:keys [identifier-type components]}]
(cond
;; If we're currently using a Table alias, don't qualify the alias with the dataset name
sql.qp/*table-alias*
false
;; otherwise always qualify Table identifiers
(= identifier-type :table)
true
;; Only qualify Field identifiers that are qualified by a Table. (e.g. don't qualify stuff inside `CREATE TABLE`
;; DDL statements)
(and (= identifier-type :field)
(>= (count components) 2))
true))
(defmethod sql.qp/->honeysql [:bigquery Identifier]
[_ identifier]
(cond-> identifier
(should-qualify-identifier? identifier)
(update :components (fn [[table & more]]
(cons (str (dataset-name-for-current-query) \. table)
more)))))
(s/defn ^:private honeysql-form->sql :- s/Str
[driver, honeysql-form :- su/Map]
(let [[sql & args :as sql+args] (sql.qp/format-honeysql driver honeysql-form)]
(if (seq args)
(unprepare/unprepare driver sql+args)
sql)))
;; From the dox: Fields must contain only letters, numbers, and underscores, start with a letter or underscore, and be
;; at most 128 characters long.
(defmethod driver/format-custom-field-name :bigquery [_ custom-field-name]
(let [replaced-str (-> (str/trim custom-field-name)
(str/replace #"[^\w\d_]" "_")
(str/replace #"(^\d)" "_$1"))]
(subs replaced-str 0 (min 128 (count replaced-str)))))
;; These provide implementations of `->honeysql` that prevent HoneySQL from converting forms to prepared statement
;; parameters (`?` symbols)
(defmethod sql.qp/->honeysql [:bigquery String]
[_ s]
(hx/literal s))
(defmethod sql.qp/->honeysql [:bigquery Boolean]
[_ bool]
(hsql/raw (if bool "TRUE" "FALSE")))
(defmethod sql.qp/->honeysql [:bigquery Date]
[_ date]
(hsql/call :timestamp (hx/literal (du/date->iso-8601 date))))
(defmethod sql.qp/->honeysql [:bigquery :time]
[driver [_ value unit]]
(->> value
(unparse-bigquery-time *bigquery-timezone*)
(sql.qp/->honeysql driver)
(sql.qp/date driver unit)
hx/->time))
(defmethod sql.qp/field->identifier :bigquery [_ {table-id :table_id, field-name :name, :as field}]
;; TODO - Making a DB call for each field to fetch its Table is inefficient and makes me cry, but this method is
;; currently only used for SQL params so it's not a huge deal at this point
;;
;; TODO - we should make sure these are in the QP store somewhere and then could at least batch the calls
(let [table-name (db/select-one-field :name table/Table :id (u/get-id table-id))]
(hx/identifier :field table-name field-name)))
(defmethod sql.qp/apply-top-level-clause [:bigquery :breakout]
[driver _ honeysql-form {breakouts :breakout, fields :fields}]
(-> honeysql-form
;; Group by all the breakout fields.
;;
;; Unlike other SQL drivers, BigQuery requires that we refer to Fields using the alias we gave them in the
;; `SELECT` clause, rather than repeating their definitions.
((partial apply h/group) (map (partial sql.qp/field-clause->alias driver) breakouts))
;; Add fields form only for fields that weren't specified in :fields clause -- we don't want to include it
;; twice, or HoneySQL will barf
((partial apply h/merge-select) (for [field-clause breakouts
:when (not (contains? (set fields) field-clause))]
(sql.qp/as driver field-clause)))))
;; as with breakouts BigQuery requires that you use the Field aliases in order by clauses, so override the methods for
;; compiling `:asc` and `:desc` and alias the Fields if applicable
(defn- alias-order-by-field [driver [direction field-clause]]
(let [field-clause (if (mbql.u/is-clause? :aggregation field-clause)
field-clause
(sql.qp/field-clause->alias driver field-clause))]
((get-method sql.qp/->honeysql [:sql direction]) driver [direction field-clause])))
(defmethod sql.qp/->honeysql [:bigquery :asc] [driver clause] (alias-order-by-field driver clause))
(defmethod sql.qp/->honeysql [:bigquery :desc] [driver clause] (alias-order-by-field driver clause))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | Other Driver / SQLDriver Method Implementations |
;;; +----------------------------------------------------------------------------------------------------------------+
(defmethod driver/date-add :bigquery
[_ dt amount unit]
(hsql/call :datetime_add (hx/->datetime dt) (hsql/raw (format "INTERVAL %d %s" (int amount) (name unit)))))
(defmethod driver/mbql->native :bigquery
[driver
{database-id :database
{source-table-id :source-table, source-query :source-query} :query
:as outer-query}]
(let [dataset-id (-> (qp.store/database) :details :dataset-id)
{table-name :name} (some-> source-table-id qp.store/table)]
(assert (seq dataset-id))
(binding [sql.qp/*query* (assoc outer-query :dataset-id dataset-id)]
{:query (->> outer-query
(sql.qp/build-honeysql-form driver)
(honeysql-form->sql driver))
:table-name (or table-name
(when source-query
sql.qp/source-query-alias))
:mbql? true})))
(defn- effective-query-timezone [database]
(if-let [^java.util.TimeZone jvm-tz (and (get-in database [:details :use-jvm-timezone])
@du/jvm-timezone)]
......@@ -435,15 +219,16 @@
(defmethod driver/execute-query :bigquery
[driver {{sql :query, params :params, :keys [table-name mbql?]} :native, :as outer-query}]
(let [database (qp.store/database)]
(binding [*bigquery-timezone* (effective-query-timezone database)]
(binding [bigquery.common/*bigquery-timezone* (effective-query-timezone database)]
(let [sql (str "-- " (qputil/query->remark outer-query) "\n" (if (seq params)
(unprepare/unprepare driver (cons sql params))
sql))]
(process-native* database sql)))))
(defmethod sql.qp/current-datetime-fn :bigquery [_] :%current_timestamp)
(defmethod sql.qp/quote-style :bigquery [_] :mysql)
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | Other Driver Method Impls |
;;; +----------------------------------------------------------------------------------------------------------------+
(defmethod driver/supports? [:bigquery :expressions] [_ _] false)
......
(ns metabase.driver.bigquery.common
(:require [clj-time.core :as time]))
(def ^:dynamic *bigquery-timezone*
"BigQuery stores all of it's timestamps in UTC. That timezone can be changed via a SQL function invocation in a
native query, but that change in timezone is not conveyed through the BigQuery API. In most situations
`*bigquery-timezone*` will just be UTC. If the user is always changing the timezone via native SQL function
invocation, they can set their JVM TZ to the correct timezone, mark `use-jvm-timezone` to `true` and that will bind
this dynamic var to the JVM TZ rather than UTC"
time/utc)
(ns metabase.driver.bigquery.query-processor
(:require [clj-time
[coerce :as tcoerce]
[format :as tformat]]
[clojure.string :as str]
[honeysql
[core :as hsql]
[helpers :as h]]
[metabase
[driver :as driver]
[util :as u]]
[metabase.driver.bigquery.common :as bigquery.common]
[metabase.driver.sql.query-processor :as sql.qp]
[metabase.driver.sql.util.unprepare :as unprepare]
[metabase.mbql.util :as mbql.u]
[metabase.models.table :as table]
[metabase.query-processor.store :as qp.store]
[metabase.util
[date :as du]
[honeysql-extensions :as hx]
[schema :as su]]
[schema.core :as s]
[toucan.db :as db])
(:import java.sql.Time
java.util.Date
metabase.util.honeysql_extensions.Identifier))
(defn- valid-bigquery-identifier?
"Is String `s` a valid BigQuery identifier? Identifiers are only allowed to contain letters, numbers, and underscores;
cannot start with a number; and can be at most 128 characters long."
[s]
(boolean
(and (string? s)
(re-matches #"^([a-zA-Z_][a-zA-Z_0-9]*){1,128}$" s))))
(def ^:private BigQueryIdentifierString
(s/pred valid-bigquery-identifier? "Valid BigQuery identifier"))
(s/defn ^:private dataset-name-for-current-query :- BigQueryIdentifierString
"Fetch the dataset name for the database associated with this query, needed because BigQuery requires you to qualify
identifiers with it. This is primarily called automatically for the `to-sql` implementation of the
`BigQueryIdentifier` record type; see its definition for more details."
[]
(when (qp.store/initialized?)
(some-> (qp.store/database) :details :dataset-id)))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | Running Queries & Parsing Results |
;;; +----------------------------------------------------------------------------------------------------------------+
(defmulti parse-result-of-type
"Parse the values that come back in results of a BigQuery query based on their column type."
{:arglists '([column-type timezone v])}
(fn [column-type _ _] column-type))
(defmethod parse-result-of-type :default
[_ _ v]
v)
(defmethod parse-result-of-type "BOOLEAN"
[_ _ v]
(Boolean/parseBoolean v))
(defmethod parse-result-of-type "FLOAT"
[_ _ v]
(Double/parseDouble v))
(defmethod parse-result-of-type "INTEGER"
[_ _ v]
(Long/parseLong v))
(defmethod parse-result-of-type "NUMERIC"
[_ _ v]
(bigdec v))
(defn- parse-timestamp-str [timezone s]
;; Timestamp strings either come back as ISO-8601 strings or Unix timestamps in µs, e.g. "1.3963104E9"
(or
(du/->Timestamp s timezone)
;; If parsing as ISO-8601 fails parse as a double then convert to ms. This is ms since epoch in UTC. By using
;; `->Timestamp`, it will convert from ms in UTC to a timestamp object in the JVM timezone
(du/->Timestamp (* (Double/parseDouble s) 1000))))
(defmethod parse-result-of-type "DATE"
[_ timezone s]
(parse-timestamp-str timezone s))
(defmethod parse-result-of-type "DATETIME"
[_ timezone s]
(parse-timestamp-str timezone s))
(defmethod parse-result-of-type "TIMESTAMP"
[_ timezone s]
(parse-timestamp-str timezone s))
(defn- bigquery-time-format [timezone]
(tformat/formatter "HH:mm:SS" timezone))
(defn- unparse-bigquery-time [timezone coercible-to-dt]
(->> coercible-to-dt
tcoerce/to-date-time
(tformat/unparse (bigquery-time-format timezone))))
(defmethod parse-result-of-type "TIME"
[_ timezone s]
(->> s
(tformat/parse (bigquery-time-format timezone))
tcoerce/to-long
Time.))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | SQL Driver Methods |
;;; +----------------------------------------------------------------------------------------------------------------+
(defn- trunc
"Generate raw SQL along the lines of `timestamp_trunc(cast(<some-field> AS timestamp), day)`"
[unit expr]
(hsql/call :timestamp_trunc (hx/->timestamp expr) (hsql/raw (name unit))))
(defn- extract [unit expr]
;; implemenation of extract() in `metabase.util.honeysql-extensions` handles actual conversion to raw SQL (!)
(hsql/call :extract unit (hx/->timestamp expr)))
(defmethod sql.qp/date [:bigquery :minute] [_ _ expr] (trunc :minute expr))
(defmethod sql.qp/date [:bigquery :minute-of-hour] [_ _ expr] (extract :minute expr))
(defmethod sql.qp/date [:bigquery :hour] [_ _ expr] (trunc :hour expr))
(defmethod sql.qp/date [:bigquery :hour-of-day] [_ _ expr] (extract :hour expr))
(defmethod sql.qp/date [:bigquery :day] [_ _ expr] (trunc :day expr))
(defmethod sql.qp/date [:bigquery :day-of-week] [_ _ expr] (extract :dayofweek expr))
(defmethod sql.qp/date [:bigquery :day-of-month] [_ _ expr] (extract :day expr))
(defmethod sql.qp/date [:bigquery :day-of-year] [_ _ expr] (extract :dayofyear expr))
(defmethod sql.qp/date [:bigquery :week] [_ _ expr] (trunc :week expr))
;; ; BigQuery's impl of `week` uses 0 for the first week; we use 1
(defmethod sql.qp/date [:bigquery :week-of-year] [_ _ expr] (-> (extract :week expr) hx/inc))
(defmethod sql.qp/date [:bigquery :month] [_ _ expr] (trunc :month expr))
(defmethod sql.qp/date [:bigquery :month-of-year] [_ _ expr] (extract :month expr))
(defmethod sql.qp/date [:bigquery :quarter] [_ _ expr] (trunc :quarter expr))
(defmethod sql.qp/date [:bigquery :quarter-of-year] [_ _ expr] (extract :quarter expr))
(defmethod sql.qp/date [:bigquery :year] [_ _ expr] (trunc :year expr))
(defmethod sql.qp/unix-timestamp->timestamp [:bigquery :seconds] [_ _ expr]
(hsql/call :timestamp_seconds expr))
(defmethod sql.qp/unix-timestamp->timestamp [:bigquery :milliseconds] [_ _ expr]
(hsql/call :timestamp_millis expr))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | Query Processor |
;;; +----------------------------------------------------------------------------------------------------------------+
(defn- should-qualify-identifier?
"Should we qualify an Identifier with the dataset name?
Table & Field identifiers (usually) need to be qualified with the current dataset name; this needs to be part of the
table e.g.
`table`.`field` -> `dataset.table`.`field`"
[{:keys [identifier-type components]}]
(cond
;; If we're currently using a Table alias, don't qualify the alias with the dataset name
sql.qp/*table-alias*
false
;; otherwise always qualify Table identifiers
(= identifier-type :table)
true
;; Only qualify Field identifiers that are qualified by a Table. (e.g. don't qualify stuff inside `CREATE TABLE`
;; DDL statements)
(and (= identifier-type :field)
(>= (count components) 2))
true))
(defmethod sql.qp/->honeysql [:bigquery Identifier]
[_ identifier]
(cond-> identifier
(should-qualify-identifier? identifier)
(update :components (fn [[table & more]]
(cons (str (dataset-name-for-current-query) \. table)
more)))))
(s/defn ^:private honeysql-form->sql :- s/Str
[driver, honeysql-form :- su/Map]
(let [[sql & args :as sql+args] (sql.qp/format-honeysql driver honeysql-form)]
(if (seq args)
(unprepare/unprepare driver sql+args)
sql)))
;; From the dox: Fields must contain only letters, numbers, and underscores, start with a letter or underscore, and be
;; at most 128 characters long.
(defmethod driver/format-custom-field-name :bigquery
[_ custom-field-name]
(let [replaced-str (-> (str/trim custom-field-name)
(str/replace #"[^\w\d_]" "_")
(str/replace #"(^\d)" "_$1"))]
(subs replaced-str 0 (min 128 (count replaced-str)))))
;; These provide implementations of `->honeysql` that prevent HoneySQL from converting forms to prepared statement
;; parameters (`?` symbols)
(defmethod sql.qp/->honeysql [:bigquery String]
[_ s]
(hx/literal s))
(defmethod sql.qp/->honeysql [:bigquery Boolean]
[_ bool]
(hsql/raw (if bool "TRUE" "FALSE")))
(defmethod sql.qp/->honeysql [:bigquery Date]
[_ date]
(hsql/call :timestamp (hx/literal (du/date->iso-8601 date))))
(defmethod sql.qp/->honeysql [:bigquery :time]
[driver [_ value unit]]
(->> value
(unparse-bigquery-time bigquery.common/*bigquery-timezone*)
(sql.qp/->honeysql driver)
(sql.qp/date driver unit)
hx/->time))
(defmethod sql.qp/field->identifier :bigquery
[_ {table-id :table_id, field-name :name, :as field}]
;; TODO - Making a DB call for each field to fetch its Table is inefficient and makes me cry, but this method is
;; currently only used for SQL params so it's not a huge deal at this point
;;
;; TODO - we should make sure these are in the QP store somewhere and then could at least batch the calls
(let [table-name (db/select-one-field :name table/Table :id (u/get-id table-id))]
(hx/identifier :field table-name field-name)))
(defmethod sql.qp/apply-top-level-clause [:bigquery :breakout]
[driver _ honeysql-form {breakouts :breakout, fields :fields}]
(-> honeysql-form
;; Group by all the breakout fields.
;;
;; Unlike other SQL drivers, BigQuery requires that we refer to Fields using the alias we gave them in the
;; `SELECT` clause, rather than repeating their definitions.
((partial apply h/group) (map (partial sql.qp/field-clause->alias driver) breakouts))
;; Add fields form only for fields that weren't specified in :fields clause -- we don't want to include it
;; twice, or HoneySQL will barf
((partial apply h/merge-select) (for [field-clause breakouts
:when (not (contains? (set fields) field-clause))]
(sql.qp/as driver field-clause)))))
;; as with breakouts BigQuery requires that you use the Field aliases in order by clauses, so override the methods for
;; compiling `:asc` and `:desc` and alias the Fields if applicable
(defn- alias-order-by-field [driver [direction field-clause]]
(let [field-clause (if (mbql.u/is-clause? :aggregation field-clause)
field-clause
(sql.qp/field-clause->alias driver field-clause))]
((get-method sql.qp/->honeysql [:sql direction]) driver [direction field-clause])))
(defmethod sql.qp/->honeysql [:bigquery :asc] [driver clause] (alias-order-by-field driver clause))
(defmethod sql.qp/->honeysql [:bigquery :desc] [driver clause] (alias-order-by-field driver clause))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | Other Driver / SQLDriver Method Implementations |
;;; +----------------------------------------------------------------------------------------------------------------+
(defmethod driver/date-add :bigquery
[_ dt amount unit]
(hsql/call :datetime_add (hx/->datetime dt) (hsql/raw (format "INTERVAL %d %s" (int amount) (name unit)))))
(defmethod driver/mbql->native :bigquery
[driver
{database-id :database
{source-table-id :source-table, source-query :source-query} :query
:as outer-query}]
(let [dataset-id (-> (qp.store/database) :details :dataset-id)
{table-name :name} (some-> source-table-id qp.store/table)]
(assert (seq dataset-id))
(binding [sql.qp/*query* (assoc outer-query :dataset-id dataset-id)]
{:query (->> outer-query
(sql.qp/build-honeysql-form driver)
(honeysql-form->sql driver))
:table-name (or table-name
(when source-query
sql.qp/source-query-alias))
:mbql? true})))
(defmethod sql.qp/current-datetime-fn :bigquery
[_] :%current_timestamp)
(defmethod sql.qp/quote-style :bigquery
[_] :mysql)
(ns metabase.driver.bigquery.query-processor-test
(:require [clj-time.core :as time]
[clojure.test :refer :all]
[honeysql.core :as hsql]
[metabase
[driver :as driver]
[query-processor :as qp]
[query-processor-test :as qp.test]
[util :as u]]
[metabase.driver.bigquery :as bigquery]
[metabase.driver.sql.query-processor :as sql.qp]
[metabase.models
[database :refer [Database]]
[field :refer [Field]]]
[metabase.query-processor.test-util :as qp.test-util]
[metabase.test
[data :as data]
[util :as tu]]
[metabase.test.data.datasets :as datasets]
[metabase.test.util.timezone :as tu.tz]
[metabase.util.honeysql-extensions :as hx]
[toucan.util.test :as tt]))
(deftest native-query-test
(datasets/test-driver :bigquery
(is (= [[100]
[99]]
(get-in
(qp/process-query
{:native {:query (str "SELECT `test_data.venues`.`id` "
"FROM `test_data.venues` "
"ORDER BY `test_data.venues`.`id` DESC "
"LIMIT 2;")}
:type :native
:database (data/id)})
[:data :rows])))
(is (= [{:name "venue_id"
:display_name "venue_id"
:source :native
:base_type :type/Integer
:field_ref [:field-literal "venue_id" :type/Integer]}
{:name "user_id"
:display_name "user_id"
:source :native
:base_type :type/Integer
:field_ref [:field-literal "user_id" :type/Integer]}
{:name "checkins_id"
:display_name "checkins_id"
:source :native
:base_type :type/Integer
:field_ref [:field-literal "checkins_id" :type/Integer]}]
(qp.test/cols
(qp/process-query
{:native {:query (str "SELECT `test_data.checkins`.`venue_id` AS `venue_id`, "
" `test_data.checkins`.`user_id` AS `user_id`, "
" `test_data.checkins`.`id` AS `checkins_id` "
"FROM `test_data.checkins` "
"LIMIT 2")}
:type :native
:database (data/id)})))
(str "make sure that BigQuery native queries maintain the column ordering specified in the SQL -- "
"post-processing ordering shouldn't apply (Issue #2821)"))))
(deftest aggregations-test
(datasets/test-driver :bigquery
(testing (str "make sure queries with two or more of the same aggregation type still work. Aggregations used to be "
"deduplicated here in the BigQuery driver; now they are deduplicated as part of the main QP "
"middleware, but no reason not to keep a few of these tests just to be safe")
(let [{:keys [rows columns]} (qp.test/rows+column-names
(data/run-mbql-query checkins
{:aggregation [[:sum $user_id] [:sum $user_id]]}))]
(is (= ["sum" "sum_2"]
columns))
(is (= [[7929 7929]]
rows)))
(let [{:keys [rows columns]} (qp.test/rows+column-names
(data/run-mbql-query checkins
{:aggregation [[:sum $user_id] [:sum $user_id] [:sum $user_id]]}))]
(is (= ["sum" "sum_2" "sum_3"]
columns))
(is (= [[7929 7929 7929]]
rows))))
(testing "let's make sure we're generating correct HoneySQL + SQL for aggregations"
(is (= {:select [[(hx/identifier :field "test_data.venues" "price")
(hx/identifier :field-alias "price")]
[(hsql/call :avg (hx/identifier :field "test_data.venues" "category_id"))
(hx/identifier :field-alias "avg")]]
:from [(hx/identifier :table "test_data.venues")]
:group-by [(hx/identifier :field-alias "price")]
:order-by [[(hx/identifier :field-alias "avg") :asc]]}
(qp.test-util/with-everything-store
(#'sql.qp/mbql->honeysql
:bigquery
(data/mbql-query venues
{:aggregation [[:avg $category_id]]
:breakout [$price]
:order-by [[:asc [:aggregation 0]]]})))))
(is (= {:query (str "SELECT `test_data.venues`.`price` AS `price`,"
" avg(`test_data.venues`.`category_id`) AS `avg` "
"FROM `test_data.venues` "
"GROUP BY `price` "
"ORDER BY `avg` ASC, `price` ASC")
:table-name "venues"
:mbql? true}
(qp/query->native
(data/mbql-query venues
{:aggregation [[:avg $category_id]], :breakout [$price], :order-by [[:asc [:aggregation 0]]]})))))))
(deftest join-alias-test
(datasets/test-driver :bigquery
(is (= (str "SELECT `categories__via__category_id`.`name` AS `name`,"
" count(*) AS `count` "
"FROM `test_data.venues` "
"LEFT JOIN `test_data.categories` `categories__via__category_id`"
" ON `test_data.venues`.`category_id` = `categories__via__category_id`.`id` "
"GROUP BY `name` "
"ORDER BY `name` ASC")
;; normally for test purposes BigQuery doesn't support foreign keys so override the function that checks
;; that and make it return `true` so this test proceeds as expected
(with-redefs [driver/supports? (constantly true)]
(tu/with-temp-vals-in-db Field (data/id :venues :category_id) {:fk_target_field_id (data/id :categories :id)
:special_type "type/FK"}
(let [results (data/run-mbql-query venues
{:aggregation [:count]
:breakout [$category_id->categories.name]})]
(get-in results [:data :native_form :query] results)))))
(str "make sure that BigQuery properly aliases the names generated for Join Tables. It's important to use the "
"right alias, e.g. something like `categories__via__category_id`, which is considerably different from "
"what other SQL databases do. (#4218)"))))
(defn- native-timestamp-query [db-or-db-id timestamp-str timezone-str]
(-> (qp/process-query
{:database (u/get-id db-or-db-id)
:type :native
:native {:query (format "select datetime(TIMESTAMP \"%s\", \"%s\")" timestamp-str timezone-str)}})
:data
:rows
ffirst))
(deftest parsed-date-timezone-handling-test
(datasets/test-driver :bigquery
(is (= "2018-08-31T00:00:00.000Z"
(native-timestamp-query (data/id) "2018-08-31 00:00:00" "UTC"))
"A UTC date is returned, we should read/return it as UTC")
(is (= "2018-08-31T00:00:00.000-05:00"
(tu.tz/with-jvm-tz (time/time-zone-for-id "America/Chicago")
(tt/with-temp* [Database [db {:engine :bigquery
:details (assoc (:details (data/db))
:use-jvm-timezone true)}]]
(native-timestamp-query db "2018-08-31 00:00:00-05" "America/Chicago"))))
(str "This test includes a `use-jvm-timezone` flag of true that will assume that the date coming from BigQuery "
"is already in the JVM's timezone. The test puts the JVM's timezone into America/Chicago an ensures that "
"the correct date is compared"))
(is (= "2018-08-31T00:00:00.000+07:00"
(tu.tz/with-jvm-tz (time/time-zone-for-id "Asia/Jakarta")
(tt/with-temp* [Database [db {:engine :bigquery
:details (assoc (:details (data/db))
:use-jvm-timezone true)}]]
(native-timestamp-query db "2018-08-31 00:00:00+07" "Asia/Jakarta"))))
"Similar to the above test, but covers a positive offset")))
;; if I run a BigQuery query, does it get a remark added to it?
(defn- query->native [query]
(let [native-query (atom nil)]
(with-redefs [bigquery/process-native* (fn [_ sql]
(reset! native-query sql)
(throw (Exception. "Done.")))]
(qp/process-query {:database (data/id)
:type :query
:query {:source-table (data/id :venues)
:limit 1}
:info {:executed-by 1000
:query-hash (byte-array [1 2 3 4])}})
@native-query)))
(deftest remark-test
(datasets/test-driver :bigquery
(is (= (str
"-- Metabase:: userID: 1000 queryType: MBQL queryHash: 01020304\n"
"SELECT `test_data.venues`.`id` AS `id`,"
" `test_data.venues`.`name` AS `name`,"
" `test_data.venues`.`category_id` AS `category_id`,"
" `test_data.venues`.`latitude` AS `latitude`,"
" `test_data.venues`.`longitude` AS `longitude`,"
" `test_data.venues`.`price` AS `price` "
"FROM `test_data.venues` "
"LIMIT 1")
(query->native
{:database (data/id)
:type :query
:query {:source-table (data/id :venues)
:limit 1}
:info {:executed-by 1000
:query-hash (byte-array [1 2 3 4])}}))
"if I run a BigQuery query, does it get a remark added to it?")))
(deftest unprepare-params-test
(datasets/test-driver :bigquery
(is (= [["Red Medicine"]]
(qp.test/rows
(qp/process-query
{:database (data/id)
:type :native
:native {:query (str "SELECT `test_data.venues`.`name` AS `name` "
"FROM `test_data.venues` "
"WHERE `test_data.venues`.`name` = ?")
:params ["Red Medicine"]}})))
(str "Do we properly unprepare, and can we execute, queries that still have parameters for one reason or "
"another? (EE #277)"))))
(ns metabase.driver.bigquery-test
(:require [clj-time.core :as time]
[honeysql.core :as hsql]
(:require [clojure.test :refer :all]
[metabase
[driver :as driver]
[query-processor :as qp]
[query-processor-test :as qp.test]
[util :as u]]
[sync :as sync]]
[metabase.db.metadata-queries :as metadata-queries]
[metabase.driver.bigquery :as bigquery]
[metabase.driver.sql.query-processor :as sql.qp]
[metabase.mbql.util :as mbql.u]
[metabase.models
[database :refer [Database]]
[field :refer [Field]]
[table :refer [Table]]]
[metabase.query-processor.test-util :as qp.test-util]
[metabase.test
[data :as data]
[util :as tu]]
[metabase.test.data.datasets :as datasets]
[metabase.test.util.timezone :as tu.tz]
[metabase.util.honeysql-extensions :as hx]
[toucan.util.test :as tt]))
;; Test native queries
(datasets/expect-with-driver :bigquery
[[100]
[99]]
(get-in (qp/process-query
{:native {:query (str "SELECT `test_data.venues`.`id` "
"FROM `test_data.venues` "
"ORDER BY `test_data.venues`.`id` DESC "
"LIMIT 2;")}
:type :native
:database (data/id)})
[:data :rows]))
;;; table-rows-sample
(datasets/expect-with-driver :bigquery
[[1 "Red Medicine"]
[2 "Stout Burgers & Beers"]
[3 "The Apple Pan"]
[4 "Wurstküche"]
[5 "Brite Spot Family Restaurant"]]
(->> (metadata-queries/table-rows-sample (Table (data/id :venues))
[(Field (data/id :venues :id))
(Field (data/id :venues :name))])
(sort-by first)
(take 5)))
;; make sure that BigQuery native queries maintain the column ordering specified in the SQL -- post-processing
;; ordering shouldn't apply (Issue #2821)
(datasets/expect-with-driver :bigquery
[{:name "venue_id"
:display_name "venue_id"
:source :native
:base_type :type/Integer
:field_ref [:field-literal "venue_id" :type/Integer]}
{:name "user_id"
:display_name "user_id"
:source :native
:base_type :type/Integer
:field_ref [:field-literal "user_id" :type/Integer]}
{:name "checkins_id"
:display_name "checkins_id"
:source :native
:base_type :type/Integer
:field_ref [:field-literal "checkins_id" :type/Integer]}]
(qp.test/cols
(qp/process-query
{:native {:query (str "SELECT `test_data.checkins`.`venue_id` AS `venue_id`, "
" `test_data.checkins`.`user_id` AS `user_id`, "
" `test_data.checkins`.`id` AS `checkins_id` "
"FROM `test_data.checkins` "
"LIMIT 2")}
:type :native
:database (data/id)})))
;; ok, make sure we actually wrap all of our ag clauses in `:aggregation-options` clauses with unique names
(defn- aggregation-names [query]
(mbql.u/match (-> query :query :aggregation)
[:aggregation-options _ {:name ag-name}] ag-name))
;; make sure queries with two or more of the same aggregation type still work. Aggregations used to be deduplicated
;; here in the BigQuery driver; now they are deduplicated as part of the main QP middleware, but no reason not to keep
;; a few of these tests just to be safe
(datasets/expect-with-driver :bigquery
{:rows [[7929 7929]], :columns ["sum" "sum_2"]}
(qp.test/rows+column-names
(qp/process-query {:database (data/id)
:type "query"
:query {:source-table (data/id :checkins)
:aggregation [[:sum [:field-id (data/id :checkins :user_id)]]
[:sum [:field-id (data/id :checkins :user_id)]]]}})))
(datasets/expect-with-driver :bigquery
{:rows [[7929 7929 7929]], :columns ["sum" "sum_2" "sum_3"]}
(qp.test/rows+column-names
(qp/process-query {:database (data/id)
:type "query"
:query {:source-table (data/id :checkins)
:aggregation [[:sum [:field-id (data/id :checkins :user_id)]]
[:sum [:field-id (data/id :checkins :user_id)]]
[:sum [:field-id (data/id :checkins :user_id)]]]}})))
(datasets/expect-with-driver :bigquery
"UTC"
(tu/db-timezone-id))
;; make sure that BigQuery properly aliases the names generated for Join Tables. It's important to use the right
;; alias, e.g. something like `categories__via__category_id`, which is considerably different from what other SQL
;; databases do. (#4218)
(datasets/expect-with-driver :bigquery
(str "SELECT `categories__via__category_id`.`name` AS `name`,"
" count(*) AS `count` "
"FROM `test_data.venues` "
"LEFT JOIN `test_data.categories` `categories__via__category_id`"
" ON `test_data.venues`.`category_id` = `categories__via__category_id`.`id` "
"GROUP BY `name` "
"ORDER BY `name` ASC")
;; normally for test purposes BigQuery doesn't support foreign keys so override the function that checks that and
;; make it return `true` so this test proceeds as expected
(with-redefs [driver/supports? (constantly true)]
(tu/with-temp-vals-in-db Field (data/id :venues :category_id) {:fk_target_field_id (data/id :categories :id)
:special_type "type/FK"}
(let [results (qp/process-query
{:database (data/id)
:type "query"
:query {:source-table (data/id :venues)
:aggregation [:count]
:breakout [[:fk-> (data/id :venues :category_id) (data/id :categories :name)]]}})]
(get-in results [:data :native_form :query] results)))))
(defn- native-timestamp-query [db-or-db-id timestamp-str timezone-str]
(-> (qp/process-query
{:database (u/get-id db-or-db-id)
:type :native
:native {:query (format "select datetime(TIMESTAMP \"%s\", \"%s\")" timestamp-str timezone-str)}})
:data
:rows
ffirst))
;; This query tests out the timezone handling of parsed dates. For this test a UTC date is returned, we should
;; read/return it as UTC
(datasets/expect-with-driver :bigquery
"2018-08-31T00:00:00.000Z"
(native-timestamp-query (data/id) "2018-08-31 00:00:00" "UTC"))
;; This test includes a `use-jvm-timezone` flag of true that will assume that the date coming from BigQuery is already
;; in the JVM's timezone. The test puts the JVM's timezone into America/Chicago an ensures that the correct date is
;; compared
(datasets/expect-with-driver :bigquery
"2018-08-31T00:00:00.000-05:00"
(tu.tz/with-jvm-tz (time/time-zone-for-id "America/Chicago")
(tt/with-temp* [Database [db {:engine :bigquery
:details (assoc (:details (data/db))
:use-jvm-timezone true)}]]
(native-timestamp-query db "2018-08-31 00:00:00-05" "America/Chicago"))))
;; Similar to the above test, but covers a positive offset
(datasets/expect-with-driver :bigquery
"2018-08-31T00:00:00.000+07:00"
(tu.tz/with-jvm-tz (time/time-zone-for-id "Asia/Jakarta")
(tt/with-temp* [Database [db {:engine :bigquery
:details (assoc (:details (data/db))
:use-jvm-timezone true)}]]
(native-timestamp-query db "2018-08-31 00:00:00+07" "Asia/Jakarta"))))
;; if I run a BigQuery query, does it get a remark added to it?
(defn- query->native [query]
(let [native-query (atom nil)]
(with-redefs [bigquery/process-native* (fn [_ sql]
(reset! native-query sql)
(throw (Exception. "Done.")))]
(qp/process-query {:database (data/id)
:type :query
:query {:source-table (data/id :venues)
:limit 1}
:info {:executed-by 1000
:query-hash (byte-array [1 2 3 4])}})
@native-query)))
(datasets/expect-with-driver :bigquery
(str
"-- Metabase:: userID: 1000 queryType: MBQL queryHash: 01020304\n"
"SELECT `test_data.venues`.`id` AS `id`,"
" `test_data.venues`.`name` AS `name`,"
" `test_data.venues`.`category_id` AS `category_id`,"
" `test_data.venues`.`latitude` AS `latitude`,"
" `test_data.venues`.`longitude` AS `longitude`,"
" `test_data.venues`.`price` AS `price` "
"FROM `test_data.venues` "
"LIMIT 1")
(query->native
{:database (data/id)
:type :query
:query {:source-table (data/id :venues)
:limit 1}
:info {:executed-by 1000
:query-hash (byte-array [1 2 3 4])}}))
;; let's make sure we're generating correct HoneySQL + SQL for aggregations
(datasets/expect-with-driver :bigquery
{:select [[(hx/identifier :field "test_data.venues" "price") (hx/identifier :field-alias "price")]
[(hsql/call :avg (hx/identifier :field "test_data.venues" "category_id")) (hx/identifier :field-alias "avg")]]
:from [(hx/identifier :table "test_data.venues")]
:group-by [(hx/identifier :field-alias "price")]
:order-by [[(hx/identifier :field-alias "avg") :asc]]}
(qp.test-util/with-everything-store
(#'sql.qp/mbql->honeysql
:bigquery
(data/mbql-query venues
{:aggregation [[:avg $category_id]]
:breakout [$price]
:order-by [[:asc [:aggregation 0]]]}))))
(datasets/expect-with-driver :bigquery
{:query (str "SELECT `test_data.venues`.`price` AS `price`,"
" avg(`test_data.venues`.`category_id`) AS `avg` "
"FROM `test_data.venues` "
"GROUP BY `price` "
"ORDER BY `avg` ASC, `price` ASC")
:table-name "venues"
:mbql? true}
(qp/query->native
(data/mbql-query venues
{:aggregation [[:avg $category_id]], :breakout [$price], :order-by [[:asc [:aggregation 0]]]})))
;; Do we properly unprepare, and can we execute, queries that still have parameters for one reason or another? (EE #277)
(datasets/expect-with-driver :bigquery
[["Red Medicine"]]
(qp.test/rows
(qp/process-query
{:database (data/id)
:type :native
:native {:query (str "SELECT `test_data.venues`.`name` AS `name` "
"FROM `test_data.venues` "
"WHERE `test_data.venues`.`name` = ?")
:params ["Red Medicine"]}})))
[metabase.test.data
[bigquery :as bigquery.tx]
[datasets :as datasets]]))
(deftest table-rows-sample-test
(datasets/test-driver :bigquery
(is (= [[1 "Red Medicine"]
[2 "Stout Burgers & Beers"]
[3 "The Apple Pan"]
[4 "Wurstküche"]
[5 "Brite Spot Family Restaurant"]]
(->> (metadata-queries/table-rows-sample (Table (data/id :venues))
[(Field (data/id :venues :id))
(Field (data/id :venues :name))])
(sort-by first)
(take 5))))))
(deftest db-timezone-id-test
(datasets/test-driver :bigquery
(is (= "UTC"
(tu/db-timezone-id)))))
(defn- do-with-view [f]
(driver/with-driver :bigquery
(let [view-name (name (munge (gensym "view_")))]
(data/with-temp-copy-of-db
(try
(bigquery.tx/execute!
(str "CREATE VIEW `test_data.%s` "
"AS "
"SELECT v.id AS id, v.name AS venue_name, c.name AS category_name "
"FROM `%s.test_data.venues` v "
"LEFT JOIN `%s.test_data.categories` c "
"ON v.category_id = c.id "
"ORDER BY v.id ASC "
"LIMIT 3")
view-name
(bigquery.tx/project-id)
(bigquery.tx/project-id))
(f view-name)
(finally
(bigquery.tx/execute! "DROP VIEW IF EXISTS `test_data.%s`" view-name)))))))
(defmacro ^:private with-view [[view-name-binding] & body]
`(do-with-view (fn [~(or view-name-binding '_)] ~@body)))
(deftest sync-views-test
(datasets/test-driver :bigquery
(with-view [view-name]
(is (= {:tables
#{{:schema nil, :name "categories"}
{:schema nil, :name "checkins"}
{:schema nil, :name "users"}
{:schema nil, :name "venues"}
{:schema nil, :name view-name}}}
(driver/describe-database :bigquery (data/db)))
"`describe-database` should see the view")
(is (= {:schema nil
:name view-name
:fields #{{:name "id", :database-type "INTEGER", :base-type :type/Integer}
{:name "venue_name", :database-type "STRING", :base-type :type/Text}
{:name "category_name", :database-type "STRING", :base-type :type/Text}}}
(driver/describe-table :bigquery (data/db) {:name view-name}))
"`describe-tables` should see the fields in the view")
(sync/sync-database! (data/db))
(is (= [[1 "Asian" "Red Medicine"]
[2 "Burger" "Stout Burgers & Beers"]
[3 "Burger" "The Apple Pan"]]
(qp.test/rows
(qp/process-query
{:database (data/id)
:type :query
:query {:source-table (data/id view-name)
:order-by [[:asc (data/id view-name :id)]]}})))
"We should be able to run queries against the view (#3414)"))))
......@@ -11,8 +11,9 @@
[metabase.driver
[bigquery :as bigquery]
[google :as google]]
[metabase.driver.bigquery.query-processor :as bigquery.qp]
[metabase.test.data :as data]
[metabase.test.data
[datasets :as datasets]
[interface :as tx]
[sql :as sql.tx]]
[metabase.util
......@@ -21,8 +22,9 @@
[schema.core :as s])
(:import com.google.api.client.util.DateTime
com.google.api.services.bigquery.Bigquery
[com.google.api.services.bigquery.model Dataset DatasetReference QueryRequest Table TableDataInsertAllRequest
TableDataInsertAllRequest$Rows TableFieldSchema TableReference TableRow TableSchema]
[com.google.api.services.bigquery.model Dataset DatasetReference QueryRequest QueryResponse Table
TableDataInsertAllRequest TableDataInsertAllRequest$Rows TableFieldSchema TableReference TableRow
TableSchema]
java.sql.Time))
(sql.tx/add-test-extensions! :bigquery)
......@@ -49,7 +51,10 @@
{}
[:project-id :client-id :client-secret :access-token :refresh-token])))
(def ^:private ^String project-id (:project-id @details))
(defn project-id
"BigQuery project ID that we're using for tests, from the env var `MB_BIGQUERY_TEST_PROJECT_ID`."
^String []
(:project-id @details))
(let [bigquery* (delay (#'bigquery/database->client {:details @details}))]
(defn- bigquery ^Bigquery []
......@@ -63,19 +68,34 @@
(defn- create-dataset! [^String dataset-id]
{:pre [(seq dataset-id)]}
(google/execute (.insert (.datasets (bigquery)) project-id (doto (Dataset.)
(.setLocation "US")
(.setDatasetReference (doto (DatasetReference.)
(.setDatasetId dataset-id))))))
(google/execute
(.insert
(.datasets (bigquery))
(project-id)
(doto (Dataset.)
(.setLocation "US")
(.setDatasetReference (doto (DatasetReference.)
(.setDatasetId dataset-id))))))
(println (u/format-color 'blue "Created BigQuery dataset '%s'." dataset-id)))
(defn- destroy-dataset! [^String dataset-id]
{:pre [(seq dataset-id)]}
(google/execute-no-auto-retry (doto (.delete (.datasets (bigquery)) project-id dataset-id)
(google/execute-no-auto-retry (doto (.delete (.datasets (bigquery)) (project-id) dataset-id)
(.setDeleteContents true)))
(println (u/format-color 'red "Deleted BigQuery dataset '%s'." dataset-id)))
(def ^:private ^:const valid-field-types
(defn execute!
"Execute arbitrary (presumably DDL) SQL statements against the test project. Waits for statement to complete, throwing
an Exception if it fails."
^QueryResponse [format-string & args]
(driver/with-driver :bigquery
(let [sql (apply format format-string args)]
(printf "[BigQuery] %s\n" sql)
(flush)
(bigquery/with-finished-response [response (#'bigquery/execute-bigquery (data/db) sql)]
response))))
(def ^:private valid-field-types
#{:BOOLEAN :FLOAT :INTEGER :RECORD :STRING :TIMESTAMP :TIME})
(s/defn ^:private create-table!
......@@ -83,26 +103,27 @@
table-id :- su/NonBlankString,
field-name->type :- {su/KeywordOrString (apply s/enum valid-field-types)}]
(google/execute
(.insert (.tables (bigquery))
project-id
dataset-id
(doto (Table.)
(.setTableReference (doto (TableReference.)
(.setProjectId project-id)
(.setDatasetId dataset-id)
(.setTableId table-id)))
(.setSchema (doto (TableSchema.)
(.setFields (for [[field-name field-type] field-name->type]
(doto (TableFieldSchema.)
(.setMode "REQUIRED")
(.setName (name field-name))
(.setType (name field-type))))))))))
(.insert
(.tables (bigquery))
(project-id)
dataset-id
(doto (Table.)
(.setTableReference (doto (TableReference.)
(.setProjectId (project-id))
(.setDatasetId dataset-id)
(.setTableId table-id)))
(.setSchema (doto (TableSchema.)
(.setFields (for [[field-name field-type] field-name->type]
(doto (TableFieldSchema.)
(.setMode "REQUIRED")
(.setName (name field-name))
(.setType (name field-type))))))))))
(println (u/format-color 'blue "Created BigQuery table '%s.%s'." dataset-id table-id)))
(defn- table-row-count ^Integer [^String dataset-id, ^String table-id]
(ffirst (:rows (#'bigquery/post-process-native
(google/execute
(.query (.jobs (bigquery)) project-id
(.query (.jobs (bigquery)) (project-id)
(doto (QueryRequest.)
(.setQuery (format "SELECT COUNT(*) FROM [%s.%s]" dataset-id table-id)))))))))
......@@ -116,18 +137,20 @@
(defn- insert-data! [^String dataset-id, ^String table-id, row-maps]
{:pre [(seq dataset-id) (seq table-id) (sequential? row-maps) (seq row-maps) (every? map? row-maps)]}
(google/execute (.insertAll (.tabledata (bigquery)) project-id dataset-id table-id
(doto (TableDataInsertAllRequest.)
(.setRows (for [row-map row-maps]
(let [data (TableRow.)]
(doseq [[k v] row-map
:let [v (cond
(instance? honeysql.types.SqlCall v)
(timestamp-honeysql-form->GoogleDateTime v)
:else v)]]
(.set data (name k) v))
(doto (TableDataInsertAllRequest$Rows.)
(.setJson data))))))))
(google/execute
(.insertAll
(.tabledata (bigquery)) (project-id) dataset-id table-id
(doto (TableDataInsertAllRequest.)
(.setRows (for [row-map row-maps]
(let [data (TableRow.)]
(doseq [[k v] row-map
:let [v (cond
(instance? honeysql.types.SqlCall v)
(timestamp-honeysql-form->GoogleDateTime v)
:else v)]]
(.set data (name k) v))
(doto (TableDataInsertAllRequest$Rows.)
(.setJson data))))))))
;; Wait up to 30 seconds for all the rows to be loaded and become available by BigQuery
(let [expected-row-count (count row-maps)]
(loop [seconds-to-wait-for-load 30]
......@@ -145,7 +168,7 @@
dataset-id table-id expected-row-count actual-row-count))))))))
(def ^:private ^:const base-type->bigquery-type
(def ^:private base-type->bigquery-type
{:type/BigInteger :INTEGER
:type/Boolean :BOOLEAN
:type/Date :TIMESTAMP
......@@ -173,26 +196,27 @@
[t]
(->> t
tcoerce/to-date-time
(tformat/unparse #'bigquery/bigquery-time-format)))
(tformat/unparse #'bigquery.qp/bigquery-time-format)))
(defn- tabledef->prepared-rows
"Convert `table-definition` to a format approprate for passing to `insert-data!`."
[{:keys [field-definitions rows]}]
{:pre [(every? map? field-definitions) (sequential? rows) (seq rows)]}
(let [field-names (map :field-name field-definitions)]
(for [[i row] (m/indexed rows)]
(assoc (zipmap field-names (for [v row]
(u/prog1 (cond
(instance? Time v)
(time->string v)
(instance? java.util.Date v)
;; convert to Google version of DateTime, otherwise it doesn't work (!)
(DateTime. ^java.util.Date v)
:else v)
(assert (not (nil? <>)))))) ; make sure v is non-nil
(for [[i row] (m/indexed rows)
:let [vs (for [v row]
(u/prog1 (cond
(instance? Time v)
(time->string v)
(instance? java.util.Date v)
;; convert to Google version of DateTime, otherwise it doesn't work (!)
(DateTime. ^java.util.Date v)
:else v)
;; make sure v is non-nil
(assert (not (nil? <>)))))]]
(assoc (zipmap field-names vs)
:id (inc i)))))
(defn- load-tabledef! [dataset-name {:keys [table-name field-definitions], :as tabledef}]
......@@ -204,7 +228,7 @@
(defn- existing-dataset-names
"Fetch a list of *all* dataset names that currently exist in the BQ test project."
[]
(for [dataset (get (google/execute (doto (.list (.datasets (bigquery)) project-id)
(for [dataset (get (google/execute (doto (.list (.datasets (bigquery)) (project-id))
;; Long/MAX_VALUE barfs but it has to be a Long
(.setMaxResults (long Integer/MAX_VALUE))))
"datasets")]
......@@ -247,6 +271,7 @@
(defmethod tx/aggregate-column-info :bigquery
([driver aggregation-type]
((get-method tx/aggregate-column-info :sql-jdbc/test-extensions) driver aggregation-type))
([driver aggregation-type field]
(merge
((get-method tx/aggregate-column-info :sql-jdbc/test-extensions) driver aggregation-type field)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment