From f0b4f2f7a1e0ec20f0662633ab7dd1871c28ae0a Mon Sep 17 00:00:00 2001
From: Jeff Evans <jeff303@users.noreply.github.com>
Date: Thu, 16 Sep 2021 13:14:32 -0500
Subject: [PATCH] Handle REPEATED mode fields in BigQuery sync (#17907)

Recognize mode=REPEATED and return :type/Array for it

Add test for temp table using an array type column to ensure base type is recongized properly
---
 .../metabase/driver/bigquery_cloud_sdk.clj    | 51 ++++++++++++-------
 .../driver/bigquery_cloud_sdk_test.clj        | 15 ++++++
 2 files changed, 47 insertions(+), 19 deletions(-)

diff --git a/modules/drivers/bigquery-cloud-sdk/src/metabase/driver/bigquery_cloud_sdk.clj b/modules/drivers/bigquery-cloud-sdk/src/metabase/driver/bigquery_cloud_sdk.clj
index b4713fde16a..58e0c66f55e 100644
--- a/modules/drivers/bigquery-cloud-sdk/src/metabase/driver/bigquery_cloud_sdk.clj
+++ b/modules/drivers/bigquery-cloud-sdk/src/metabase/driver/bigquery_cloud_sdk.clj
@@ -17,8 +17,8 @@
             [schema.core :as s])
   (:import com.google.auth.oauth2.ServiceAccountCredentials
            [com.google.cloud.bigquery BigQuery BigQuery$DatasetOption BigQuery$JobOption BigQuery$TableListOption
-                                      BigQuery$TableOption BigQueryOptions DatasetId EmptyTableResult Field FieldValue
-                                      FieldValueList QueryJobConfiguration Schema Table TableId TableResult]
+                                      BigQuery$TableOption BigQueryOptions DatasetId EmptyTableResult Field Field$Mode
+                                      FieldValue FieldValueList QueryJobConfiguration Schema Table TableId TableResult]
            java.io.ByteArrayInputStream
            java.util.Collections))
 
@@ -90,28 +90,38 @@
      (.getTable client (TableId/of project-id dataset-id table-id) empty-table-options)
      (.getTable client dataset-id table-id empty-table-options))))
 
-(defn- bigquery-type->base-type [field-type]
-  (case field-type
-    "BOOLEAN"    :type/Boolean
-    "FLOAT"      :type/Float
-    "INTEGER"    :type/Integer
-    "RECORD"     :type/Dictionary ; RECORD -> field has a nested schema
-    "STRING"     :type/Text
-    "DATE"       :type/Date
-    "DATETIME"   :type/DateTime
-    "TIMESTAMP"  :type/DateTimeWithLocalTZ
-    "TIME"       :type/Time
-    "NUMERIC"    :type/Decimal
-    "BIGNUMERIC" :type/Decimal
-    :type/*))
+(defn- bigquery-type->base-type
+  "Returns the base type for the given BigQuery field's `field-mode` and `field-type`. In BQ, an ARRAY of INTEGER has
+  \"REPEATED\" as the mode, and \"INTEGER\" as the type name.
+
+  If/when we are able to represent complex types more precisely, we may want to capture that information separately.
+  For now, though, we will check if the `field-mode` is \"REPEATED\" and return our :type/Array for that case, then
+  proceed to check the `field-type` otherwise."
+  [field-mode field-type]
+  (if (= Field$Mode/REPEATED field-mode)
+    :type/Array
+    (case field-type
+      "BOOLEAN"    :type/Boolean
+      "FLOAT"      :type/Float
+      "INTEGER"    :type/Integer
+      "RECORD"     :type/Dictionary ; RECORD -> field has a nested schema
+      "STRING"     :type/Text
+      "DATE"       :type/Date
+      "DATETIME"   :type/DateTime
+      "TIMESTAMP"  :type/DateTimeWithLocalTZ
+      "TIME"       :type/Time
+      "NUMERIC"    :type/Decimal
+      "BIGNUMERIC" :type/Decimal
+      :type/*)))
 
 (s/defn ^:private table-schema->metabase-field-info
   [schema :- Schema]
   (for [[idx ^Field field] (m/indexed (.getFields schema))]
-    (let [type-name (.. field getType name)]
+    (let [type-name (.. field getType name)
+          f-mode    (.getMode field)]
       {:name              (.getName field)
        :database-type     type-name
-       :base-type         (bigquery-type->base-type type-name)
+       :base-type         (bigquery-type->base-type f-mode type-name)
        :database-position idx})))
 
 (defmethod driver/describe-table :bigquery-cloud-sdk
@@ -146,8 +156,11 @@
   com.google.api.services.bigquery.model.GetQueryResultsResponse
   (job-complete? [this] (.getJobComplete ^com.google.api.services.bigquery.model.GetQueryResultsResponse this))
 
+  TableResult
+  (job-complete? [_] true)
+
   EmptyTableResult
-  (job-complete? [this] true))
+  (job-complete? [_] true))
 
 (defn do-with-finished-response
   "Impl for `with-finished-response`."
diff --git a/modules/drivers/bigquery-cloud-sdk/test/metabase/driver/bigquery_cloud_sdk_test.clj b/modules/drivers/bigquery-cloud-sdk/test/metabase/driver/bigquery_cloud_sdk_test.clj
index 0a4a22904eb..f42bcb8a3ad 100644
--- a/modules/drivers/bigquery-cloud-sdk/test/metabase/driver/bigquery_cloud_sdk_test.clj
+++ b/modules/drivers/bigquery-cloud-sdk/test/metabase/driver/bigquery_cloud_sdk_test.clj
@@ -315,3 +315,18 @@
                                          :target [:field (mt/id tbl-nm col-nm)]
                                          :value  [param-v]}]}))
                     first))))))))
+
+(deftest sync-table-with-array-test
+  (testing "Tables with ARRAY (REPEATED) columns can be synced successfully"
+    (do-with-temp-obj "table_array_type_%s"
+      (fn [tbl-nm] ["CREATE TABLE `v3_test_data.%s` AS SELECT 1 AS int_col, GENERATE_ARRAY(1,10) AS array_col"
+                    tbl-nm])
+      (fn [tbl-nm] ["DROP TABLE IF EXISTS `v3_test_data.%s`" tbl-nm])
+      (fn [tbl-nm]
+        (is (= {:schema nil
+                :name   tbl-nm
+                :fields #{{:name "int_col", :database-type "INTEGER", :base-type :type/Integer, :database-position 0}
+                          {:name "array_col", :database-type "INTEGER", :base-type :type/Array, :database-position 1}}}
+              (driver/describe-table :bigquery-cloud-sdk (mt/db) {:name tbl-nm}))
+          "`describe-table` should detect the correct base-type for array type columns")))))
+
-- 
GitLab