From 3ede018c5453053cc42cff1964e5ff6e6d82cb14 Mon Sep 17 00:00:00 2001
From: Cam Saul <cammsaul@gmail.com>
Date: Tue, 6 Nov 2018 13:47:49 -0800
Subject: [PATCH] Remove BigQuery driver custom implementation of field->alias
 [ci bigquery]

---
 .circleci/config.yml                          |  2 +-
 src/metabase/driver/bigquery.clj              | 35 +++------------
 src/metabase/mbql/util.clj                    |  7 ++-
 .../middleware/cumulative_aggregations.clj    |  6 +--
 test/metabase/driver/bigquery_test.clj        | 45 +++++++++++--------
 .../cumulative_aggregations_test.clj          | 29 ++++++++++++
 test/metabase/query_processor_test.clj        | 28 +++---------
 .../query_processor_test/order_by_test.clj    |  6 +--
 test/metabase/test/data/bigquery.clj          | 15 ++++++-
 test/metabase/test/data/interface.clj         | 33 +++++++++++++-
 10 files changed, 123 insertions(+), 83 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 0d9a0c00ccf..76ab5a5a0fb 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -351,7 +351,7 @@ jobs:
           command: >
             /home/circleci/metabase/metabase/.circleci/skip-driver-tests.sh redshift ||
             lein with-profile +ci test
-          no_output_timeout: 5m
+          no_output_timeout: 10m
 
 
   be-tests-presto:
diff --git a/src/metabase/driver/bigquery.clj b/src/metabase/driver/bigquery.clj
index 993331dfd02..ec288f523c6 100644
--- a/src/metabase/driver/bigquery.clj
+++ b/src/metabase/driver/bigquery.clj
@@ -344,20 +344,6 @@
       (throw (Exception. (str (tru "BigQuery statements can't be parameterized!")))))
     sql))
 
-(defn- post-process-mbql [table-name {:keys [columns rows]}]
-  ;; Say we have an identifier like `veryNiceDataset.shakespeare`.`corpus`. We will alias it like
-  ;; `shakespeare___corpus` (because BigQuery does not let you include symbols in identifiers); during post-processing
-  ;; we can go ahead and strip off the table name from the alias since we don't want it to show up in the result
-  ;; column names
-  (let [demangle-name #(str/replace % (re-pattern (str \^ table-name "___")) "")
-        columns       (map demangle-name columns)
-        rows          (for [row rows]
-                        (zipmap columns row))
-        columns       (vec (keys (first rows)))]
-    {:columns columns
-     :rows    (for [row rows]
-                (mapv row columns))}))
-
 ;; From the dox: Fields must contain only letters, numbers, and underscores, start with a letter or underscore, and be
 ;; at most 128 characters long.
 (defn- format-custom-field-name ^String [^String custom-field-name]
@@ -428,14 +414,6 @@
       [:named _ ag-name]         ag-name
       [ag-type & _]              ag-type)))
 
-(defn- field->alias
-  "Generate an appropriate alias for a `field`. This will normally be something like `tableName___fieldName` (done this
-  way because BigQuery will not let us include symbols in identifiers, so we can't make our alias be
-  `tableName.fieldName`, like we do for other drivers)."
-  [driver {field-name :name, table-id :table_id, :as field}]
-  (let [{table-name :name} (qp.store/table table-id)]
-    (str table-name "___" field-name)))
-
 (defn- field->identifier
   "Generate appropriate identifier for a Field for SQL parameters. (NOTE: THIS IS ONLY USED FOR SQL PARAMETERS!)"
   ;; TODO - Making a DB call for each field to fetch its Table is inefficient and makes me cry, but this method is
@@ -456,7 +434,7 @@
 (defn- field->breakout-identifier [driver field-clause]
   (let [alias (if (mbql.u/is-clause? :aggregation field-clause)
                 (ag-ref->alias field-clause)
-                (field->alias driver (field-clause->field field-clause)))]
+                (sql/field->alias driver (field-clause->field field-clause)))]
     (hsql/raw (str \` alias \`))))
 
 (defn- apply-breakout [driver honeysql-form {breakout-field-clauses :breakout, fields-field-clauses :fields}]
@@ -546,12 +524,10 @@
                        :as                                                    outer-query}]
   (let [database (qp.store/database)]
     (binding [*bigquery-timezone* (effective-query-timezone database)]
-      (let [sql     (str "-- " (qputil/query->remark outer-query) "\n" (if (seq params)
-                                                                         (unprepare/unprepare (cons sql params))
-                                                                         sql))
-            results (process-native* database sql)]
-        (cond->> results
-          mbql? (post-process-mbql table-name))))))
+      (let [sql (str "-- " (qputil/query->remark outer-query) "\n" (if (seq params)
+                                                                     (unprepare/unprepare (cons sql params))
+                                                                     sql))]
+        (process-native* database sql)))))
 
 
 ;; BigQuery doesn't return a timezone with it's time strings as it's always UTC, JodaTime parsing also defaults to UTC
@@ -570,7 +546,6 @@
           :connection-details->spec  (constantly nil)
           :current-datetime-fn       (constantly :%current_timestamp)
           :date                      (u/drop-first-arg date)
-          :field->alias              field->alias
           :field->identifier         (u/drop-first-arg field->identifier)
           :quote-style               (constantly :mysql)
           :string-length-fn          (u/drop-first-arg string-length-fn)
diff --git a/src/metabase/mbql/util.clj b/src/metabase/mbql/util.clj
index 2687a3be36c..9489c0be21a 100644
--- a/src/metabase/mbql/util.clj
+++ b/src/metabase/mbql/util.clj
@@ -243,8 +243,11 @@
 
 (s/defn query->source-table-id :- (s/maybe su/IntGreaterThanZero)
   "Return the source Table ID associated with `query`, if applicable; handles nested queries as well. If `query` is
-  `nil`, returns `nil`."
-  {:argslists '([outer-query])}
+  `nil`, returns `nil`.
+
+  Throws an Exception when it encounters a unresolved source query (i.e., the `:source-table \"card__id\"`
+  form), because it cannot return an accurate result for a query that has not yet been preprocessed."
+  {:arglists '([outer-query])}
   [{{source-table-id :source-table, source-query :source-query} :query, query-type :type, :as query}]
   (cond
     ;; for native queries, there's no source table to resolve
diff --git a/src/metabase/query_processor/middleware/cumulative_aggregations.clj b/src/metabase/query_processor/middleware/cumulative_aggregations.clj
index 923325f3c37..f4eb6a94cc0 100644
--- a/src/metabase/query_processor/middleware/cumulative_aggregations.clj
+++ b/src/metabase/query_processor/middleware/cumulative_aggregations.clj
@@ -31,16 +31,14 @@
   [[index & more] last-row row]
   (if-not index
     row
-    (recur more last-row (update row index (partial + (nth last-row index))))))
+    (recur more last-row (update (vec row) index (partial + (nth last-row index))))))
 
 (defn- sum-rows
   "Sum the values in `rows` at `indexes-to-sum`.
 
     (sum-rows #{0} [[1] [2] [3]]) ; -> [[1] [3] [6]]"
   [indexes-to-sum rows]
-  (reductions (partial add-rows indexes-to-sum)
-              (first rows)
-              (rest rows)))
+  (reductions (partial add-rows indexes-to-sum) rows))
 
 (defn handle-cumulative-aggregations
   "Middleware that implements `cum-count` and `cum-sum` aggregations. These clauses are replaced with `count` and `sum`
diff --git a/test/metabase/driver/bigquery_test.clj b/test/metabase/driver/bigquery_test.clj
index 8d899f96ff7..06775218852 100644
--- a/test/metabase/driver/bigquery_test.clj
+++ b/test/metabase/driver/bigquery_test.clj
@@ -151,13 +151,12 @@
 ;; alias, e.g. something like `categories__via__category_id`, which is considerably different from what other SQL
 ;; databases do. (#4218)
 (expect-with-engine :bigquery
-  (str "SELECT `test_data.categories__via__category_id`.`name` AS `categories___name`,"
-       " count(*) AS `count` "
-       "FROM `test_data.venues` "
+  (str "SELECT `test_data.categories__via__category_id`.`name` AS `name`,"
+       " count(*) AS `count` FROM `test_data.venues` "
        "LEFT JOIN `test_data.categories` `test_data.categories__via__category_id`"
        " ON `test_data.venues`.`category_id` = `test_data.categories__via__category_id`.`id` "
-       "GROUP BY `categories___name` "
-       "ORDER BY `categories___name` ASC")
+       "GROUP BY `name` "
+       "ORDER BY `name` ASC")
   ;; normally for test purposes BigQuery doesn't support foreign keys so override the function that checks that and
   ;; make it return `true` so this test proceeds as expected
   (with-redefs [driver/driver-supports?         (constantly true)
@@ -218,20 +217,10 @@
       (native-timestamp-query db "2018-08-31 00:00:00+07" "Asia/Jakarta"))))
 
 ;; if I run a BigQuery query, does it get a remark added to it?
-(expect-with-engine :bigquery
-  (str
-   "-- Metabase:: userID: 1000 queryType: MBQL queryHash: 01020304\n"
-   "SELECT `test_data.venues`.`id` AS `venues___id`,"
-   " `test_data.venues`.`name` AS `venues___name`,"
-   " `test_data.venues`.`category_id` AS `venues___category_id`,"
-   " `test_data.venues`.`latitude` AS `venues___latitude`,"
-   " `test_data.venues`.`longitude` AS `venues___longitude`,"
-   " `test_data.venues`.`price` AS `venues___price` "
-   "FROM `test_data.venues` "
-   "LIMIT 1")
-  (let [native-query (atom nil)]
+(defn- query->native [query]
+  (with-local-vars [native-query nil]
     (with-redefs [bigquery/process-native* (fn [_ sql]
-                                             (reset! native-query sql)
+                                             (var-set native-query sql)
                                              (throw (Exception. "Done.")))]
       (qp/process-query {:database (data/id)
                          :type     :query
@@ -241,3 +230,23 @@
                                     :query-type  "MBQL"
                                     :query-hash  (byte-array [1 2 3 4])}})
       @native-query)))
+
+(expect-with-engine :bigquery
+  (str
+   "-- Metabase:: userID: 1000 queryType: MBQL queryHash: 01020304\n"
+   "SELECT `test_data.venues`.`id` AS `id`,"
+   " `test_data.venues`.`name` AS `name`,"
+   " `test_data.venues`.`category_id` AS `category_id`,"
+   " `test_data.venues`.`latitude` AS `latitude`,"
+   " `test_data.venues`.`longitude` AS `longitude`,"
+   " `test_data.venues`.`price` AS `price` "
+   "FROM `test_data.venues` "
+   "LIMIT 1")
+  (query->native
+   {:database (data/id)
+    :type     :query
+    :query    {:source-table (data/id :venues)
+               :limit        1}
+    :info     {:executed-by 1000
+               :query-type  "MBQL"
+               :query-hash  (byte-array [1 2 3 4])}}))
diff --git a/test/metabase/query_processor/middleware/cumulative_aggregations_test.clj b/test/metabase/query_processor/middleware/cumulative_aggregations_test.clj
index 72a3e144a42..ae0921af28c 100644
--- a/test/metabase/query_processor/middleware/cumulative_aggregations_test.clj
+++ b/test/metabase/query_processor/middleware/cumulative_aggregations_test.clj
@@ -18,6 +18,7 @@
   [1 4 6]
   (#'cumulative-aggregations/add-rows #{1 2} [1 2 3] [1 2 3]))
 
+;; should throw an Exception if index is out of bounds
 (expect
   IndexOutOfBoundsException
   (#'cumulative-aggregations/add-rows #{4} [1 2 3] [1 2 3]))
@@ -50,6 +51,34 @@
    #{0 1}
    [[0 0] [1 1] [2 2] [3 3] [4 4] [5 5] [6 6] [7 7] [8 8] [9 9]]))
 
+;; make sure cumulative aggregations still work correctly with lists...
+(expect
+  [[1 1 1] [2 3 2] [3 6 3]]
+  (#'cumulative-aggregations/sum-rows #{1} '((1 1 1) (2 2 2) (3 3 3))))
+
+;; ...and lazy sequences
+(expect
+  [[1 1 1] [2 3 2] [3 6 3]]
+  (#'cumulative-aggregations/sum-rows #{1} (lazy-cat '((1 1 1)) '((2 2 2)) '((3 3 3)))))
+
+;; the results should be L A Z Y
+(expect
+  {:fully-realized-after-taking-2? false
+   :fully-realized-after-taking-3? true}
+  (with-local-vars [fully-realized? false]
+    (let [a-lazy-seq (lazy-cat
+                      '((1 1 1))
+                      '((2 2 2))
+                      (do
+                        (var-set fully-realized? true)
+                        '((3 3 3))))
+          realize-n  (fn [n]
+                       (dorun (take n (#'cumulative-aggregations/sum-rows #{1} a-lazy-seq)))
+                       @fully-realized?)]
+      {:fully-realized-after-taking-2? (realize-n 2)
+       :fully-realized-after-taking-3? (realize-n 3)})))
+
+
 ;; can it go forever without a stack overflow?
 (expect
   [[4999850001] [4999950000]]
diff --git a/test/metabase/query_processor_test.clj b/test/metabase/query_processor_test.clj
index 469eb3ddd34..778f86b3e16 100644
--- a/test/metabase/query_processor_test.clj
+++ b/test/metabase/query_processor_test.clj
@@ -9,7 +9,9 @@
              [driver :as driver]
              [util :as u]]
             [metabase.test.data :as data]
-            [metabase.test.data.datasets :as datasets]
+            [metabase.test.data
+             [datasets :as datasets]
+             [interface :as di]]
             [metabase.util.date :as du]))
 
 ;; make sure all the driver test extension namespaces are loaded <3 if this isn't done some things will get loaded at
@@ -272,27 +274,9 @@
 
     (aggregate-col :count)
     (aggregate-col :avg (venues-col :id))"
-  {:arglists '([ag-col-kw] [ag-col-kw field])}
-  ;; TODO - cumulative count doesn't require a FIELD !!!!!!!!!
-  ([ag-col-kw]
-   (assert (= ag-col-kw) :count)
-   {:base_type    :type/Integer
-    :special_type :type/Number
-    :name         "count"
-    :display_name "count"
-    :source       :aggregation})
-  ([ag-col-kw {:keys [base_type special_type]}]
-   {:pre [base_type special_type]}
-   (merge
-    {:base_type    base_type
-     :special_type special_type
-     :settings     nil
-     :name         (name ag-col-kw)
-     :display_name (name ag-col-kw)
-     :source       :aggregation}
-    ;; count always gets the same special type regardless
-    (when (= ag-col-kw :count)
-      (aggregate-col :count)))))
+  {:arglists '([ag-type] [ag-type field])}
+  [& args]
+  (apply di/aggregate-column-info datasets/*driver* args))
 
 (defn breakout-col [col]
   (assoc col :source :breakout))
diff --git a/test/metabase/query_processor_test/order_by_test.clj b/test/metabase/query_processor_test/order_by_test.clj
index d3edcadb6a1..2ef8d3f7c2c 100644
--- a/test/metabase/query_processor_test/order_by_test.clj
+++ b/test/metabase/query_processor_test/order_by_test.clj
@@ -115,10 +115,10 @@
 (datasets/expect-with-engines (non-timeseries-engines-with-feature :standard-deviation-aggregations)
   {:columns     [(data/format-name "price")
                  "stddev"]
-   :rows        [[3 (if (contains? #{:mysql :crate} *engine*) 25 26)]
+   :rows        [[3 (if (#{:mysql :crate} *engine*) 25 26)]
                  [1 24]
                  [2 21]
-                 [4 (if (contains? #{:mysql :crate} *engine*) 14 15)]]
+                 [4 (if (#{:mysql :crate} *engine*) 14 15)]]
    :cols        [(breakout-col (venues-col :price))
                  (aggregate-col :stddev (venues-col :category_id))]
    :native_form true}
@@ -129,4 +129,4 @@
        booleanize-native-form
        data
        (format-rows-by [int (comp int math/round)])
-       tu/round-fingerprint-cols))
\ No newline at end of file
+       tu/round-fingerprint-cols))
diff --git a/test/metabase/test/data/bigquery.clj b/test/metabase/test/data/bigquery.clj
index 23e0e38d253..9f9fb8a2ea7 100644
--- a/test/metabase/test/data/bigquery.clj
+++ b/test/metabase/test/data/bigquery.clj
@@ -10,6 +10,7 @@
             [metabase.test.data
              [datasets :as datasets]
              [interface :as i]]
+            [metabase.query-processor-test :as qp.test]
             [metabase.util :as u]
             [metabase.util
              [date :as du]
@@ -233,6 +234,17 @@
              (destroy-dataset! database-name))
            (throw e)))))))
 
+(defn aggregate-column-info
+  ([driver aggregation-type]
+   (i/default-aggregate-column-info driver aggregation-type))
+  ([driver aggregation-type field]
+   (merge
+    (i/default-aggregate-column-info driver aggregation-type field)
+    ;; BigQuery averages, standard deviations come back as Floats. This might apply to some other ag types as well;
+    ;; add them as we come across them.
+    (when (#{:avg :stddev} aggregation-type)
+      {:base_type :type/Float}))))
+
 
 ;;; --------------------------------------------- IDriverTestExtensions ----------------------------------------------
 
@@ -241,4 +253,5 @@
   (merge i/IDriverTestExtensionsDefaultsMixin
          {:engine                       (constantly :bigquery)
           :database->connection-details (u/drop-first-arg database->connection-details)
-          :create-db!                   (u/drop-first-arg create-db!)}))
+          :create-db!                   (u/drop-first-arg create-db!)
+          :aggregate-column-info        aggregate-column-info}))
diff --git a/test/metabase/test/data/interface.clj b/test/metabase/test/data/interface.clj
index 7d538d13269..d8a3465dd79 100644
--- a/test/metabase/test/data/interface.clj
+++ b/test/metabase/test/data/interface.clj
@@ -71,6 +71,30 @@
   ([session-schema _ db-name table-name]            [session-schema (db-qualified-table-name db-name table-name)])
   ([session-schema _ db-name table-name field-name] [session-schema (db-qualified-table-name db-name table-name) field-name]))
 
+(defn default-aggregate-column-info
+  "Default implementation of `aggregate-column-info` for drivers using the `IDriverTestExtensionsDefaultsMixin`."
+  {:arglists '([driver aggregation-type] [driver aggregation-type field])}
+  ([_ aggregation-type]
+   ;; TODO - cumulative count doesn't require a FIELD !!!!!!!!!
+   (assert (= aggregation-type) :count)
+   {:base_type    :type/Integer
+    :special_type :type/Number
+    :name         "count"
+    :display_name "count"
+    :source       :aggregation})
+  ([driver aggregation-type {:keys [base_type special_type]}]
+   {:pre [base_type special_type]}
+   (merge
+    {:base_type    base_type
+     :special_type special_type
+     :settings     nil
+     :name         (name aggregation-type)
+     :display_name (name aggregation-type)
+     :source       :aggregation}
+    ;; count always gets the same special type regardless
+    (when (= aggregation-type :count)
+      (default-aggregate-column-info driver :count)))))
+
 
 (defprotocol IMetabaseInstance
   (metabase-instance [this context]
@@ -148,7 +172,11 @@
 
   (id-field-type ^clojure.lang.Keyword [this]
     "*OPTIONAL* Return the `base_type` of the `id` `Field` (e.g. `:type/Integer` or `:type/BigInteger`). Defaults to
-    `:type/Integer`."))
+    `:type/Integer`.")
+
+  (aggregate-column-info [this aggregation-type] [this aggregation-type field]
+    "*OPTIONAL*. Return the expected type information that should come back for QP results as part of `:cols` for an
+     aggregation of a given type (and applied to a given Field, when applicable)."))
 
 (def IDriverTestExtensionsDefaultsMixin
   "Default implementations for the `IDriverTestExtensions` methods marked *OPTIONAL*."
@@ -157,7 +185,8 @@
    :format-name                        (u/drop-first-arg identity)
    :has-questionable-timezone-support? (fn [driver]
                                          (not (contains? (driver/features driver) :set-timezone)))
-   :id-field-type                      (constantly :type/Integer)})
+   :id-field-type                      (constantly :type/Integer)
+   :aggregate-column-info              default-aggregate-column-info})
 
 
 ;; ## Helper Functions for Creating New Definitions
-- 
GitLab