Skip to content
Snippets Groups Projects
Unverified Commit 3ede018c authored by Cam Saul's avatar Cam Saul
Browse files

Remove BigQuery driver custom implementation of field->alias [ci bigquery]

parent 5f3ad453
No related merge requests found
......@@ -351,7 +351,7 @@ jobs:
command: >
/home/circleci/metabase/metabase/.circleci/skip-driver-tests.sh redshift ||
lein with-profile +ci test
no_output_timeout: 5m
no_output_timeout: 10m
be-tests-presto:
......
......@@ -344,20 +344,6 @@
(throw (Exception. (str (tru "BigQuery statements can't be parameterized!")))))
sql))
(defn- post-process-mbql [table-name {:keys [columns rows]}]
;; Say we have an identifier like `veryNiceDataset.shakespeare`.`corpus`. We will alias it like
;; `shakespeare___corpus` (because BigQuery does not let you include symbols in identifiers); during post-processing
;; we can go ahead and strip off the table name from the alias since we don't want it to show up in the result
;; column names
(let [demangle-name #(str/replace % (re-pattern (str \^ table-name "___")) "")
columns (map demangle-name columns)
rows (for [row rows]
(zipmap columns row))
columns (vec (keys (first rows)))]
{:columns columns
:rows (for [row rows]
(mapv row columns))}))
;; From the dox: Fields must contain only letters, numbers, and underscores, start with a letter or underscore, and be
;; at most 128 characters long.
(defn- format-custom-field-name ^String [^String custom-field-name]
......@@ -428,14 +414,6 @@
[:named _ ag-name] ag-name
[ag-type & _] ag-type)))
(defn- field->alias
"Generate an appropriate alias for a `field`. This will normally be something like `tableName___fieldName` (done this
way because BigQuery will not let us include symbols in identifiers, so we can't make our alias be
`tableName.fieldName`, like we do for other drivers)."
[driver {field-name :name, table-id :table_id, :as field}]
(let [{table-name :name} (qp.store/table table-id)]
(str table-name "___" field-name)))
(defn- field->identifier
"Generate appropriate identifier for a Field for SQL parameters. (NOTE: THIS IS ONLY USED FOR SQL PARAMETERS!)"
;; TODO - Making a DB call for each field to fetch its Table is inefficient and makes me cry, but this method is
......@@ -456,7 +434,7 @@
(defn- field->breakout-identifier [driver field-clause]
(let [alias (if (mbql.u/is-clause? :aggregation field-clause)
(ag-ref->alias field-clause)
(field->alias driver (field-clause->field field-clause)))]
(sql/field->alias driver (field-clause->field field-clause)))]
(hsql/raw (str \` alias \`))))
(defn- apply-breakout [driver honeysql-form {breakout-field-clauses :breakout, fields-field-clauses :fields}]
......@@ -546,12 +524,10 @@
:as outer-query}]
(let [database (qp.store/database)]
(binding [*bigquery-timezone* (effective-query-timezone database)]
(let [sql (str "-- " (qputil/query->remark outer-query) "\n" (if (seq params)
(unprepare/unprepare (cons sql params))
sql))
results (process-native* database sql)]
(cond->> results
mbql? (post-process-mbql table-name))))))
(let [sql (str "-- " (qputil/query->remark outer-query) "\n" (if (seq params)
(unprepare/unprepare (cons sql params))
sql))]
(process-native* database sql)))))
;; BigQuery doesn't return a timezone with it's time strings as it's always UTC, JodaTime parsing also defaults to UTC
......@@ -570,7 +546,6 @@
:connection-details->spec (constantly nil)
:current-datetime-fn (constantly :%current_timestamp)
:date (u/drop-first-arg date)
:field->alias field->alias
:field->identifier (u/drop-first-arg field->identifier)
:quote-style (constantly :mysql)
:string-length-fn (u/drop-first-arg string-length-fn)
......
......@@ -243,8 +243,11 @@
(s/defn query->source-table-id :- (s/maybe su/IntGreaterThanZero)
"Return the source Table ID associated with `query`, if applicable; handles nested queries as well. If `query` is
`nil`, returns `nil`."
{:argslists '([outer-query])}
`nil`, returns `nil`.
Throws an Exception when it encounters a unresolved source query (i.e., the `:source-table \"card__id\"`
form), because it cannot return an accurate result for a query that has not yet been preprocessed."
{:arglists '([outer-query])}
[{{source-table-id :source-table, source-query :source-query} :query, query-type :type, :as query}]
(cond
;; for native queries, there's no source table to resolve
......
......@@ -31,16 +31,14 @@
[[index & more] last-row row]
(if-not index
row
(recur more last-row (update row index (partial + (nth last-row index))))))
(recur more last-row (update (vec row) index (partial + (nth last-row index))))))
(defn- sum-rows
"Sum the values in `rows` at `indexes-to-sum`.
(sum-rows #{0} [[1] [2] [3]]) ; -> [[1] [3] [6]]"
[indexes-to-sum rows]
(reductions (partial add-rows indexes-to-sum)
(first rows)
(rest rows)))
(reductions (partial add-rows indexes-to-sum) rows))
(defn handle-cumulative-aggregations
"Middleware that implements `cum-count` and `cum-sum` aggregations. These clauses are replaced with `count` and `sum`
......
......@@ -151,13 +151,12 @@
;; alias, e.g. something like `categories__via__category_id`, which is considerably different from what other SQL
;; databases do. (#4218)
(expect-with-engine :bigquery
(str "SELECT `test_data.categories__via__category_id`.`name` AS `categories___name`,"
" count(*) AS `count` "
"FROM `test_data.venues` "
(str "SELECT `test_data.categories__via__category_id`.`name` AS `name`,"
" count(*) AS `count` FROM `test_data.venues` "
"LEFT JOIN `test_data.categories` `test_data.categories__via__category_id`"
" ON `test_data.venues`.`category_id` = `test_data.categories__via__category_id`.`id` "
"GROUP BY `categories___name` "
"ORDER BY `categories___name` ASC")
"GROUP BY `name` "
"ORDER BY `name` ASC")
;; normally for test purposes BigQuery doesn't support foreign keys so override the function that checks that and
;; make it return `true` so this test proceeds as expected
(with-redefs [driver/driver-supports? (constantly true)
......@@ -218,20 +217,10 @@
(native-timestamp-query db "2018-08-31 00:00:00+07" "Asia/Jakarta"))))
;; if I run a BigQuery query, does it get a remark added to it?
(expect-with-engine :bigquery
(str
"-- Metabase:: userID: 1000 queryType: MBQL queryHash: 01020304\n"
"SELECT `test_data.venues`.`id` AS `venues___id`,"
" `test_data.venues`.`name` AS `venues___name`,"
" `test_data.venues`.`category_id` AS `venues___category_id`,"
" `test_data.venues`.`latitude` AS `venues___latitude`,"
" `test_data.venues`.`longitude` AS `venues___longitude`,"
" `test_data.venues`.`price` AS `venues___price` "
"FROM `test_data.venues` "
"LIMIT 1")
(let [native-query (atom nil)]
(defn- query->native [query]
(with-local-vars [native-query nil]
(with-redefs [bigquery/process-native* (fn [_ sql]
(reset! native-query sql)
(var-set native-query sql)
(throw (Exception. "Done.")))]
(qp/process-query {:database (data/id)
:type :query
......@@ -241,3 +230,23 @@
:query-type "MBQL"
:query-hash (byte-array [1 2 3 4])}})
@native-query)))
(expect-with-engine :bigquery
(str
"-- Metabase:: userID: 1000 queryType: MBQL queryHash: 01020304\n"
"SELECT `test_data.venues`.`id` AS `id`,"
" `test_data.venues`.`name` AS `name`,"
" `test_data.venues`.`category_id` AS `category_id`,"
" `test_data.venues`.`latitude` AS `latitude`,"
" `test_data.venues`.`longitude` AS `longitude`,"
" `test_data.venues`.`price` AS `price` "
"FROM `test_data.venues` "
"LIMIT 1")
(query->native
{:database (data/id)
:type :query
:query {:source-table (data/id :venues)
:limit 1}
:info {:executed-by 1000
:query-type "MBQL"
:query-hash (byte-array [1 2 3 4])}}))
......@@ -18,6 +18,7 @@
[1 4 6]
(#'cumulative-aggregations/add-rows #{1 2} [1 2 3] [1 2 3]))
;; should throw an Exception if index is out of bounds
(expect
IndexOutOfBoundsException
(#'cumulative-aggregations/add-rows #{4} [1 2 3] [1 2 3]))
......@@ -50,6 +51,34 @@
#{0 1}
[[0 0] [1 1] [2 2] [3 3] [4 4] [5 5] [6 6] [7 7] [8 8] [9 9]]))
;; make sure cumulative aggregations still work correctly with lists...
(expect
[[1 1 1] [2 3 2] [3 6 3]]
(#'cumulative-aggregations/sum-rows #{1} '((1 1 1) (2 2 2) (3 3 3))))
;; ...and lazy sequences
(expect
[[1 1 1] [2 3 2] [3 6 3]]
(#'cumulative-aggregations/sum-rows #{1} (lazy-cat '((1 1 1)) '((2 2 2)) '((3 3 3)))))
;; the results should be L A Z Y
(expect
{:fully-realized-after-taking-2? false
:fully-realized-after-taking-3? true}
(with-local-vars [fully-realized? false]
(let [a-lazy-seq (lazy-cat
'((1 1 1))
'((2 2 2))
(do
(var-set fully-realized? true)
'((3 3 3))))
realize-n (fn [n]
(dorun (take n (#'cumulative-aggregations/sum-rows #{1} a-lazy-seq)))
@fully-realized?)]
{:fully-realized-after-taking-2? (realize-n 2)
:fully-realized-after-taking-3? (realize-n 3)})))
;; can it go forever without a stack overflow?
(expect
[[4999850001] [4999950000]]
......
......@@ -9,7 +9,9 @@
[driver :as driver]
[util :as u]]
[metabase.test.data :as data]
[metabase.test.data.datasets :as datasets]
[metabase.test.data
[datasets :as datasets]
[interface :as di]]
[metabase.util.date :as du]))
;; make sure all the driver test extension namespaces are loaded <3 if this isn't done some things will get loaded at
......@@ -272,27 +274,9 @@
(aggregate-col :count)
(aggregate-col :avg (venues-col :id))"
{:arglists '([ag-col-kw] [ag-col-kw field])}
;; TODO - cumulative count doesn't require a FIELD !!!!!!!!!
([ag-col-kw]
(assert (= ag-col-kw) :count)
{:base_type :type/Integer
:special_type :type/Number
:name "count"
:display_name "count"
:source :aggregation})
([ag-col-kw {:keys [base_type special_type]}]
{:pre [base_type special_type]}
(merge
{:base_type base_type
:special_type special_type
:settings nil
:name (name ag-col-kw)
:display_name (name ag-col-kw)
:source :aggregation}
;; count always gets the same special type regardless
(when (= ag-col-kw :count)
(aggregate-col :count)))))
{:arglists '([ag-type] [ag-type field])}
[& args]
(apply di/aggregate-column-info datasets/*driver* args))
(defn breakout-col [col]
(assoc col :source :breakout))
......
......@@ -115,10 +115,10 @@
(datasets/expect-with-engines (non-timeseries-engines-with-feature :standard-deviation-aggregations)
{:columns [(data/format-name "price")
"stddev"]
:rows [[3 (if (contains? #{:mysql :crate} *engine*) 25 26)]
:rows [[3 (if (#{:mysql :crate} *engine*) 25 26)]
[1 24]
[2 21]
[4 (if (contains? #{:mysql :crate} *engine*) 14 15)]]
[4 (if (#{:mysql :crate} *engine*) 14 15)]]
:cols [(breakout-col (venues-col :price))
(aggregate-col :stddev (venues-col :category_id))]
:native_form true}
......@@ -129,4 +129,4 @@
booleanize-native-form
data
(format-rows-by [int (comp int math/round)])
tu/round-fingerprint-cols))
\ No newline at end of file
tu/round-fingerprint-cols))
......@@ -10,6 +10,7 @@
[metabase.test.data
[datasets :as datasets]
[interface :as i]]
[metabase.query-processor-test :as qp.test]
[metabase.util :as u]
[metabase.util
[date :as du]
......@@ -233,6 +234,17 @@
(destroy-dataset! database-name))
(throw e)))))))
(defn aggregate-column-info
([driver aggregation-type]
(i/default-aggregate-column-info driver aggregation-type))
([driver aggregation-type field]
(merge
(i/default-aggregate-column-info driver aggregation-type field)
;; BigQuery averages, standard deviations come back as Floats. This might apply to some other ag types as well;
;; add them as we come across them.
(when (#{:avg :stddev} aggregation-type)
{:base_type :type/Float}))))
;;; --------------------------------------------- IDriverTestExtensions ----------------------------------------------
......@@ -241,4 +253,5 @@
(merge i/IDriverTestExtensionsDefaultsMixin
{:engine (constantly :bigquery)
:database->connection-details (u/drop-first-arg database->connection-details)
:create-db! (u/drop-first-arg create-db!)}))
:create-db! (u/drop-first-arg create-db!)
:aggregate-column-info aggregate-column-info}))
......@@ -71,6 +71,30 @@
([session-schema _ db-name table-name] [session-schema (db-qualified-table-name db-name table-name)])
([session-schema _ db-name table-name field-name] [session-schema (db-qualified-table-name db-name table-name) field-name]))
(defn default-aggregate-column-info
"Default implementation of `aggregate-column-info` for drivers using the `IDriverTestExtensionsDefaultsMixin`."
{:arglists '([driver aggregation-type] [driver aggregation-type field])}
([_ aggregation-type]
;; TODO - cumulative count doesn't require a FIELD !!!!!!!!!
(assert (= aggregation-type) :count)
{:base_type :type/Integer
:special_type :type/Number
:name "count"
:display_name "count"
:source :aggregation})
([driver aggregation-type {:keys [base_type special_type]}]
{:pre [base_type special_type]}
(merge
{:base_type base_type
:special_type special_type
:settings nil
:name (name aggregation-type)
:display_name (name aggregation-type)
:source :aggregation}
;; count always gets the same special type regardless
(when (= aggregation-type :count)
(default-aggregate-column-info driver :count)))))
(defprotocol IMetabaseInstance
(metabase-instance [this context]
......@@ -148,7 +172,11 @@
(id-field-type ^clojure.lang.Keyword [this]
"*OPTIONAL* Return the `base_type` of the `id` `Field` (e.g. `:type/Integer` or `:type/BigInteger`). Defaults to
`:type/Integer`."))
`:type/Integer`.")
(aggregate-column-info [this aggregation-type] [this aggregation-type field]
"*OPTIONAL*. Return the expected type information that should come back for QP results as part of `:cols` for an
aggregation of a given type (and applied to a given Field, when applicable)."))
(def IDriverTestExtensionsDefaultsMixin
"Default implementations for the `IDriverTestExtensions` methods marked *OPTIONAL*."
......@@ -157,7 +185,8 @@
:format-name (u/drop-first-arg identity)
:has-questionable-timezone-support? (fn [driver]
(not (contains? (driver/features driver) :set-timezone)))
:id-field-type (constantly :type/Integer)})
:id-field-type (constantly :type/Integer)
:aggregate-column-info default-aggregate-column-info})
;; ## Helper Functions for Creating New Definitions
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment