From 3ede018c5453053cc42cff1964e5ff6e6d82cb14 Mon Sep 17 00:00:00 2001 From: Cam Saul <cammsaul@gmail.com> Date: Tue, 6 Nov 2018 13:47:49 -0800 Subject: [PATCH] Remove BigQuery driver custom implementation of field->alias [ci bigquery] --- .circleci/config.yml | 2 +- src/metabase/driver/bigquery.clj | 35 +++------------ src/metabase/mbql/util.clj | 7 ++- .../middleware/cumulative_aggregations.clj | 6 +-- test/metabase/driver/bigquery_test.clj | 45 +++++++++++-------- .../cumulative_aggregations_test.clj | 29 ++++++++++++ test/metabase/query_processor_test.clj | 28 +++--------- .../query_processor_test/order_by_test.clj | 6 +-- test/metabase/test/data/bigquery.clj | 15 ++++++- test/metabase/test/data/interface.clj | 33 +++++++++++++- 10 files changed, 123 insertions(+), 83 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0d9a0c00ccf..76ab5a5a0fb 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -351,7 +351,7 @@ jobs: command: > /home/circleci/metabase/metabase/.circleci/skip-driver-tests.sh redshift || lein with-profile +ci test - no_output_timeout: 5m + no_output_timeout: 10m be-tests-presto: diff --git a/src/metabase/driver/bigquery.clj b/src/metabase/driver/bigquery.clj index 993331dfd02..ec288f523c6 100644 --- a/src/metabase/driver/bigquery.clj +++ b/src/metabase/driver/bigquery.clj @@ -344,20 +344,6 @@ (throw (Exception. (str (tru "BigQuery statements can't be parameterized!"))))) sql)) -(defn- post-process-mbql [table-name {:keys [columns rows]}] - ;; Say we have an identifier like `veryNiceDataset.shakespeare`.`corpus`. We will alias it like - ;; `shakespeare___corpus` (because BigQuery does not let you include symbols in identifiers); during post-processing - ;; we can go ahead and strip off the table name from the alias since we don't want it to show up in the result - ;; column names - (let [demangle-name #(str/replace % (re-pattern (str \^ table-name "___")) "") - columns (map demangle-name columns) - rows (for [row rows] - (zipmap columns row)) - columns (vec (keys (first rows)))] - {:columns columns - :rows (for [row rows] - (mapv row columns))})) - ;; From the dox: Fields must contain only letters, numbers, and underscores, start with a letter or underscore, and be ;; at most 128 characters long. (defn- format-custom-field-name ^String [^String custom-field-name] @@ -428,14 +414,6 @@ [:named _ ag-name] ag-name [ag-type & _] ag-type))) -(defn- field->alias - "Generate an appropriate alias for a `field`. This will normally be something like `tableName___fieldName` (done this - way because BigQuery will not let us include symbols in identifiers, so we can't make our alias be - `tableName.fieldName`, like we do for other drivers)." - [driver {field-name :name, table-id :table_id, :as field}] - (let [{table-name :name} (qp.store/table table-id)] - (str table-name "___" field-name))) - (defn- field->identifier "Generate appropriate identifier for a Field for SQL parameters. (NOTE: THIS IS ONLY USED FOR SQL PARAMETERS!)" ;; TODO - Making a DB call for each field to fetch its Table is inefficient and makes me cry, but this method is @@ -456,7 +434,7 @@ (defn- field->breakout-identifier [driver field-clause] (let [alias (if (mbql.u/is-clause? :aggregation field-clause) (ag-ref->alias field-clause) - (field->alias driver (field-clause->field field-clause)))] + (sql/field->alias driver (field-clause->field field-clause)))] (hsql/raw (str \` alias \`)))) (defn- apply-breakout [driver honeysql-form {breakout-field-clauses :breakout, fields-field-clauses :fields}] @@ -546,12 +524,10 @@ :as outer-query}] (let [database (qp.store/database)] (binding [*bigquery-timezone* (effective-query-timezone database)] - (let [sql (str "-- " (qputil/query->remark outer-query) "\n" (if (seq params) - (unprepare/unprepare (cons sql params)) - sql)) - results (process-native* database sql)] - (cond->> results - mbql? (post-process-mbql table-name)))))) + (let [sql (str "-- " (qputil/query->remark outer-query) "\n" (if (seq params) + (unprepare/unprepare (cons sql params)) + sql))] + (process-native* database sql))))) ;; BigQuery doesn't return a timezone with it's time strings as it's always UTC, JodaTime parsing also defaults to UTC @@ -570,7 +546,6 @@ :connection-details->spec (constantly nil) :current-datetime-fn (constantly :%current_timestamp) :date (u/drop-first-arg date) - :field->alias field->alias :field->identifier (u/drop-first-arg field->identifier) :quote-style (constantly :mysql) :string-length-fn (u/drop-first-arg string-length-fn) diff --git a/src/metabase/mbql/util.clj b/src/metabase/mbql/util.clj index 2687a3be36c..9489c0be21a 100644 --- a/src/metabase/mbql/util.clj +++ b/src/metabase/mbql/util.clj @@ -243,8 +243,11 @@ (s/defn query->source-table-id :- (s/maybe su/IntGreaterThanZero) "Return the source Table ID associated with `query`, if applicable; handles nested queries as well. If `query` is - `nil`, returns `nil`." - {:argslists '([outer-query])} + `nil`, returns `nil`. + + Throws an Exception when it encounters a unresolved source query (i.e., the `:source-table \"card__id\"` + form), because it cannot return an accurate result for a query that has not yet been preprocessed." + {:arglists '([outer-query])} [{{source-table-id :source-table, source-query :source-query} :query, query-type :type, :as query}] (cond ;; for native queries, there's no source table to resolve diff --git a/src/metabase/query_processor/middleware/cumulative_aggregations.clj b/src/metabase/query_processor/middleware/cumulative_aggregations.clj index 923325f3c37..f4eb6a94cc0 100644 --- a/src/metabase/query_processor/middleware/cumulative_aggregations.clj +++ b/src/metabase/query_processor/middleware/cumulative_aggregations.clj @@ -31,16 +31,14 @@ [[index & more] last-row row] (if-not index row - (recur more last-row (update row index (partial + (nth last-row index)))))) + (recur more last-row (update (vec row) index (partial + (nth last-row index)))))) (defn- sum-rows "Sum the values in `rows` at `indexes-to-sum`. (sum-rows #{0} [[1] [2] [3]]) ; -> [[1] [3] [6]]" [indexes-to-sum rows] - (reductions (partial add-rows indexes-to-sum) - (first rows) - (rest rows))) + (reductions (partial add-rows indexes-to-sum) rows)) (defn handle-cumulative-aggregations "Middleware that implements `cum-count` and `cum-sum` aggregations. These clauses are replaced with `count` and `sum` diff --git a/test/metabase/driver/bigquery_test.clj b/test/metabase/driver/bigquery_test.clj index 8d899f96ff7..06775218852 100644 --- a/test/metabase/driver/bigquery_test.clj +++ b/test/metabase/driver/bigquery_test.clj @@ -151,13 +151,12 @@ ;; alias, e.g. something like `categories__via__category_id`, which is considerably different from what other SQL ;; databases do. (#4218) (expect-with-engine :bigquery - (str "SELECT `test_data.categories__via__category_id`.`name` AS `categories___name`," - " count(*) AS `count` " - "FROM `test_data.venues` " + (str "SELECT `test_data.categories__via__category_id`.`name` AS `name`," + " count(*) AS `count` FROM `test_data.venues` " "LEFT JOIN `test_data.categories` `test_data.categories__via__category_id`" " ON `test_data.venues`.`category_id` = `test_data.categories__via__category_id`.`id` " - "GROUP BY `categories___name` " - "ORDER BY `categories___name` ASC") + "GROUP BY `name` " + "ORDER BY `name` ASC") ;; normally for test purposes BigQuery doesn't support foreign keys so override the function that checks that and ;; make it return `true` so this test proceeds as expected (with-redefs [driver/driver-supports? (constantly true) @@ -218,20 +217,10 @@ (native-timestamp-query db "2018-08-31 00:00:00+07" "Asia/Jakarta")))) ;; if I run a BigQuery query, does it get a remark added to it? -(expect-with-engine :bigquery - (str - "-- Metabase:: userID: 1000 queryType: MBQL queryHash: 01020304\n" - "SELECT `test_data.venues`.`id` AS `venues___id`," - " `test_data.venues`.`name` AS `venues___name`," - " `test_data.venues`.`category_id` AS `venues___category_id`," - " `test_data.venues`.`latitude` AS `venues___latitude`," - " `test_data.venues`.`longitude` AS `venues___longitude`," - " `test_data.venues`.`price` AS `venues___price` " - "FROM `test_data.venues` " - "LIMIT 1") - (let [native-query (atom nil)] +(defn- query->native [query] + (with-local-vars [native-query nil] (with-redefs [bigquery/process-native* (fn [_ sql] - (reset! native-query sql) + (var-set native-query sql) (throw (Exception. "Done.")))] (qp/process-query {:database (data/id) :type :query @@ -241,3 +230,23 @@ :query-type "MBQL" :query-hash (byte-array [1 2 3 4])}}) @native-query))) + +(expect-with-engine :bigquery + (str + "-- Metabase:: userID: 1000 queryType: MBQL queryHash: 01020304\n" + "SELECT `test_data.venues`.`id` AS `id`," + " `test_data.venues`.`name` AS `name`," + " `test_data.venues`.`category_id` AS `category_id`," + " `test_data.venues`.`latitude` AS `latitude`," + " `test_data.venues`.`longitude` AS `longitude`," + " `test_data.venues`.`price` AS `price` " + "FROM `test_data.venues` " + "LIMIT 1") + (query->native + {:database (data/id) + :type :query + :query {:source-table (data/id :venues) + :limit 1} + :info {:executed-by 1000 + :query-type "MBQL" + :query-hash (byte-array [1 2 3 4])}})) diff --git a/test/metabase/query_processor/middleware/cumulative_aggregations_test.clj b/test/metabase/query_processor/middleware/cumulative_aggregations_test.clj index 72a3e144a42..ae0921af28c 100644 --- a/test/metabase/query_processor/middleware/cumulative_aggregations_test.clj +++ b/test/metabase/query_processor/middleware/cumulative_aggregations_test.clj @@ -18,6 +18,7 @@ [1 4 6] (#'cumulative-aggregations/add-rows #{1 2} [1 2 3] [1 2 3])) +;; should throw an Exception if index is out of bounds (expect IndexOutOfBoundsException (#'cumulative-aggregations/add-rows #{4} [1 2 3] [1 2 3])) @@ -50,6 +51,34 @@ #{0 1} [[0 0] [1 1] [2 2] [3 3] [4 4] [5 5] [6 6] [7 7] [8 8] [9 9]])) +;; make sure cumulative aggregations still work correctly with lists... +(expect + [[1 1 1] [2 3 2] [3 6 3]] + (#'cumulative-aggregations/sum-rows #{1} '((1 1 1) (2 2 2) (3 3 3)))) + +;; ...and lazy sequences +(expect + [[1 1 1] [2 3 2] [3 6 3]] + (#'cumulative-aggregations/sum-rows #{1} (lazy-cat '((1 1 1)) '((2 2 2)) '((3 3 3))))) + +;; the results should be L A Z Y +(expect + {:fully-realized-after-taking-2? false + :fully-realized-after-taking-3? true} + (with-local-vars [fully-realized? false] + (let [a-lazy-seq (lazy-cat + '((1 1 1)) + '((2 2 2)) + (do + (var-set fully-realized? true) + '((3 3 3)))) + realize-n (fn [n] + (dorun (take n (#'cumulative-aggregations/sum-rows #{1} a-lazy-seq))) + @fully-realized?)] + {:fully-realized-after-taking-2? (realize-n 2) + :fully-realized-after-taking-3? (realize-n 3)}))) + + ;; can it go forever without a stack overflow? (expect [[4999850001] [4999950000]] diff --git a/test/metabase/query_processor_test.clj b/test/metabase/query_processor_test.clj index 469eb3ddd34..778f86b3e16 100644 --- a/test/metabase/query_processor_test.clj +++ b/test/metabase/query_processor_test.clj @@ -9,7 +9,9 @@ [driver :as driver] [util :as u]] [metabase.test.data :as data] - [metabase.test.data.datasets :as datasets] + [metabase.test.data + [datasets :as datasets] + [interface :as di]] [metabase.util.date :as du])) ;; make sure all the driver test extension namespaces are loaded <3 if this isn't done some things will get loaded at @@ -272,27 +274,9 @@ (aggregate-col :count) (aggregate-col :avg (venues-col :id))" - {:arglists '([ag-col-kw] [ag-col-kw field])} - ;; TODO - cumulative count doesn't require a FIELD !!!!!!!!! - ([ag-col-kw] - (assert (= ag-col-kw) :count) - {:base_type :type/Integer - :special_type :type/Number - :name "count" - :display_name "count" - :source :aggregation}) - ([ag-col-kw {:keys [base_type special_type]}] - {:pre [base_type special_type]} - (merge - {:base_type base_type - :special_type special_type - :settings nil - :name (name ag-col-kw) - :display_name (name ag-col-kw) - :source :aggregation} - ;; count always gets the same special type regardless - (when (= ag-col-kw :count) - (aggregate-col :count))))) + {:arglists '([ag-type] [ag-type field])} + [& args] + (apply di/aggregate-column-info datasets/*driver* args)) (defn breakout-col [col] (assoc col :source :breakout)) diff --git a/test/metabase/query_processor_test/order_by_test.clj b/test/metabase/query_processor_test/order_by_test.clj index d3edcadb6a1..2ef8d3f7c2c 100644 --- a/test/metabase/query_processor_test/order_by_test.clj +++ b/test/metabase/query_processor_test/order_by_test.clj @@ -115,10 +115,10 @@ (datasets/expect-with-engines (non-timeseries-engines-with-feature :standard-deviation-aggregations) {:columns [(data/format-name "price") "stddev"] - :rows [[3 (if (contains? #{:mysql :crate} *engine*) 25 26)] + :rows [[3 (if (#{:mysql :crate} *engine*) 25 26)] [1 24] [2 21] - [4 (if (contains? #{:mysql :crate} *engine*) 14 15)]] + [4 (if (#{:mysql :crate} *engine*) 14 15)]] :cols [(breakout-col (venues-col :price)) (aggregate-col :stddev (venues-col :category_id))] :native_form true} @@ -129,4 +129,4 @@ booleanize-native-form data (format-rows-by [int (comp int math/round)]) - tu/round-fingerprint-cols)) \ No newline at end of file + tu/round-fingerprint-cols)) diff --git a/test/metabase/test/data/bigquery.clj b/test/metabase/test/data/bigquery.clj index 23e0e38d253..9f9fb8a2ea7 100644 --- a/test/metabase/test/data/bigquery.clj +++ b/test/metabase/test/data/bigquery.clj @@ -10,6 +10,7 @@ [metabase.test.data [datasets :as datasets] [interface :as i]] + [metabase.query-processor-test :as qp.test] [metabase.util :as u] [metabase.util [date :as du] @@ -233,6 +234,17 @@ (destroy-dataset! database-name)) (throw e))))))) +(defn aggregate-column-info + ([driver aggregation-type] + (i/default-aggregate-column-info driver aggregation-type)) + ([driver aggregation-type field] + (merge + (i/default-aggregate-column-info driver aggregation-type field) + ;; BigQuery averages, standard deviations come back as Floats. This might apply to some other ag types as well; + ;; add them as we come across them. + (when (#{:avg :stddev} aggregation-type) + {:base_type :type/Float})))) + ;;; --------------------------------------------- IDriverTestExtensions ---------------------------------------------- @@ -241,4 +253,5 @@ (merge i/IDriverTestExtensionsDefaultsMixin {:engine (constantly :bigquery) :database->connection-details (u/drop-first-arg database->connection-details) - :create-db! (u/drop-first-arg create-db!)})) + :create-db! (u/drop-first-arg create-db!) + :aggregate-column-info aggregate-column-info})) diff --git a/test/metabase/test/data/interface.clj b/test/metabase/test/data/interface.clj index 7d538d13269..d8a3465dd79 100644 --- a/test/metabase/test/data/interface.clj +++ b/test/metabase/test/data/interface.clj @@ -71,6 +71,30 @@ ([session-schema _ db-name table-name] [session-schema (db-qualified-table-name db-name table-name)]) ([session-schema _ db-name table-name field-name] [session-schema (db-qualified-table-name db-name table-name) field-name])) +(defn default-aggregate-column-info + "Default implementation of `aggregate-column-info` for drivers using the `IDriverTestExtensionsDefaultsMixin`." + {:arglists '([driver aggregation-type] [driver aggregation-type field])} + ([_ aggregation-type] + ;; TODO - cumulative count doesn't require a FIELD !!!!!!!!! + (assert (= aggregation-type) :count) + {:base_type :type/Integer + :special_type :type/Number + :name "count" + :display_name "count" + :source :aggregation}) + ([driver aggregation-type {:keys [base_type special_type]}] + {:pre [base_type special_type]} + (merge + {:base_type base_type + :special_type special_type + :settings nil + :name (name aggregation-type) + :display_name (name aggregation-type) + :source :aggregation} + ;; count always gets the same special type regardless + (when (= aggregation-type :count) + (default-aggregate-column-info driver :count))))) + (defprotocol IMetabaseInstance (metabase-instance [this context] @@ -148,7 +172,11 @@ (id-field-type ^clojure.lang.Keyword [this] "*OPTIONAL* Return the `base_type` of the `id` `Field` (e.g. `:type/Integer` or `:type/BigInteger`). Defaults to - `:type/Integer`.")) + `:type/Integer`.") + + (aggregate-column-info [this aggregation-type] [this aggregation-type field] + "*OPTIONAL*. Return the expected type information that should come back for QP results as part of `:cols` for an + aggregation of a given type (and applied to a given Field, when applicable).")) (def IDriverTestExtensionsDefaultsMixin "Default implementations for the `IDriverTestExtensions` methods marked *OPTIONAL*." @@ -157,7 +185,8 @@ :format-name (u/drop-first-arg identity) :has-questionable-timezone-support? (fn [driver] (not (contains? (driver/features driver) :set-timezone))) - :id-field-type (constantly :type/Integer)}) + :id-field-type (constantly :type/Integer) + :aggregate-column-info default-aggregate-column-info}) ;; ## Helper Functions for Creating New Definitions -- GitLab