feat: BigQuery Faster Sync (#48027)

* feat: BigQuery Faster Sync Use describe-fields for much faster big-query sync. --------- Co-authored-by: metamben <103100869+metamben@users.noreply.github.com>

feat: BigQuery Faster Sync (#48027)
1186871d · Case Nelson · GitHub · f8a5a30d · 1186871d · 1186871d
Unverified Commit 1186871d authored 6 months ago by Case Nelson Committed by GitHub 6 months ago
--- a/modules/drivers/bigquery-cloud-sdk/src/metabase/driver/bigquery_cloud_sdk.clj
+++ b/modules/drivers/bigquery-cloud-sdk/src/metabase/driver/bigquery_cloud_sdk.clj
@@ -9,6 +9,7 @@
   [metabase.driver.bigquery-cloud-sdk.common :as bigquery.common]
   [metabase.driver.bigquery-cloud-sdk.params :as bigquery.params]
   [metabase.driver.bigquery-cloud-sdk.query-processor :as bigquery.qp]
+   [metabase.driver.sql.query-processor :as sql.qp]
   [metabase.driver.sql.util :as sql.u]
   [metabase.driver.sync :as driver.s]
   [metabase.lib.metadata :as lib.metadata]
@@ -29,10 +30,9 @@
  (:import
   (clojure.lang PersistentList)
   (com.google.cloud.bigquery BigQuery BigQuery$DatasetListOption BigQuery$JobOption BigQuery$TableDataListOption
-                              BigQuery$TableListOption BigQuery$TableOption BigQueryException BigQueryOptions Dataset
-                              DatasetId Field Field$Mode FieldValue FieldValueList MaterializedViewDefinition QueryJobConfiguration Schema
-                              RangePartitioning TimePartitioning
-                              StandardTableDefinition Table TableDefinition TableDefinition$Type TableId TableResult)
+                              BigQuery$TableOption BigQueryException BigQueryOptions Dataset
+                              Field Field$Mode FieldValue FieldValueList QueryJobConfiguration Schema
+                              Table TableDefinition$Type TableId TableResult)
   (java.util Iterator)))

 (set! *warn-on-reflection* true)
@@ -121,32 +121,25 @@
 ;;; |                                                      Sync                                                      |
 ;;; +----------------------------------------------------------------------------------------------------------------+

+(defn- get-project-id
+  [{:keys [project-id] :as details}]
+  (or project-id (bigquery.common/database-details->credential-project-id details)))
+
 (defn- list-datasets
  "Fetch all datasets given database `details`, applying dataset filters if specified."
-  [{:keys [project-id dataset-filters-type dataset-filters-patterns] :as details}]
+  [{:keys [dataset-filters-type dataset-filters-patterns] :as details}]
  (let [client (database-details->client details)
-        project-id (or project-id (bigquery.common/database-details->credential-project-id details))
+        project-id (get-project-id details)
        datasets (.listDatasets client project-id (u/varargs BigQuery$DatasetListOption))
        inclusion-patterns (when (= "inclusion" dataset-filters-type) dataset-filters-patterns)
        exclusion-patterns (when (= "exclusion" dataset-filters-type) dataset-filters-patterns)]
    (for [^Dataset dataset (.iterateAll datasets)
-          :let [^DatasetId dataset-id (.. dataset getDatasetId)]
+          :let [dataset-id (.. dataset getDatasetId getDataset)]
          :when (driver.s/include-schema? inclusion-patterns
                                          exclusion-patterns
-                                          (.getDataset dataset-id))]
+                                          dataset-id)]
      dataset-id)))

-(defn- list-tables
-  "Fetch all tables (new pages are loaded automatically by the API)."
-  (^Iterable [details]
-   (let [client (database-details->client details)
-         dataset-iter (list-datasets details)]
-     (apply concat (for [^DatasetId dataset-id dataset-iter]
-                     (-> (.listTables client dataset-id (u/varargs BigQuery$TableListOption))
-                         .iterateAll
-                         .iterator
-                         iterator-seq))))))
-
 (defmethod driver/can-connect? :bigquery-cloud-sdk
  [_ details]
  ;; check whether we can connect by seeing whether listing datasets succeeds
@@ -180,80 +173,100 @@
     (.getTable client (TableId/of project-id dataset-id table-id) empty-table-options)
     (.getTable client dataset-id table-id empty-table-options))))

-(defn- tabledef->range-partition
-  [^TableDefinition tabledef]
-  (condp = (.getType tabledef)
-    TableDefinition$Type/TABLE
-    (.getRangePartitioning ^StandardTableDefinition tabledef)
-    TableDefinition$Type/MATERIALIZED_VIEW
-    (.getRangePartitioning ^MaterializedViewDefinition tabledef)
-    nil))
-
-(defn- tabledef->time-partition
-  [^TableDefinition tabledef]
-  (condp = (.getType tabledef)
-    TableDefinition$Type/TABLE
-    (.getTimePartitioning ^StandardTableDefinition tabledef)
-    TableDefinition$Type/MATERIALIZED_VIEW
-    (.getTimePartitioning ^MaterializedViewDefinition tabledef)
-    nil))
-
-(defn- table-is-partitioned?
-  [^TableDefinition tabledef]
-  (when (#{TableDefinition$Type/TABLE TableDefinition$Type/MATERIALIZED_VIEW} (.getType tabledef))
-    (or (tabledef->range-partition tabledef)
-        (tabledef->time-partition tabledef))))
+(declare *process-native*)
+
+(defn- information-schema-table [project-id dataset-id table]
+  (keyword (format "%s.%s.INFORMATION_SCHEMA.%s" project-id dataset-id table)))
+
+(defn- query-honeysql
+  "Query database with honeysql. Returns rows as maps with column names"
+  [driver database honeysql-form]
+  (let [[sql & params] (sql.qp/format-honeysql
+                        driver
+                        honeysql-form)]
+
+    (*process-native*
+     (fn [cols results]
+       (let [col-names (map (comp keyword :name) (:cols cols))]
+         (into [] (map #(zipmap col-names %)) results)))
+     database
+     sql
+     params
+     nil)))
+
+(defn- describe-database-tables
+  [driver database]
+  (set
+   (for [dataset-id (list-datasets (:details database))
+         :let [project-id (get-project-id (:details database))
+               results (query-honeysql
+                        driver
+                        database
+                        {:select [:table_name :table_type
+                                  [{:select [[[:= :option_value "true"]]]
+                                    :from [[(information-schema-table project-id dataset-id "TABLE_OPTIONS") :o]]
+                                    :where [:and
+                                            [:= :o.table_name :t.table_name]
+                                            [:= :o.option_name "require_partition_filter"]]}
+                                   :require_partition_filter]]
+                         :from [[(information-schema-table project-id dataset-id "TABLES") :t]]})]
+         {table-name :table_name table-type :table_type require-partition-filter :require_partition_filter} results]
+     {:schema dataset-id
+      :name table-name
+      :database_require_filter
+      (boolean (and
+                ;; Materialiezed views can be partitioned, and whether the view require a filter or not is based
+                ;; on the base table it selects from, without parsing the view query we can't find out the base table,
+                ;; thus we can't know whether the view require a filter or not.
+                ;; Maybe this is something we can do once we can parse sql
+                (= "BASE TABLE" table-type)
+                require-partition-filter))})))

 (defmethod driver/describe-database :bigquery-cloud-sdk
-  [_ database]
-  (let [tables (list-tables (:details database))]
-    {:tables (set (for [^Table table tables
-                        :let  [^TableId                 table-id   (.getTableId table)
-                               ^String                  dataset-id (.getDataset table-id)
-                               ^TableDefinition         tabledef   (.getDefinition table)
-                               table-name (str (.getTable table-id))]]
-                    {:schema                  dataset-id
-                     :name                    table-name
-                     :database_require_filter
-                     (boolean
-                      (and
-                       ;; Materialiezed views can be partitioned, and whether the view require a filter or not is based
-                       ;; on the base table it selects from, without parsing the view query we can't find out the base table,
-                       ;; thus we can't know whether the view require a filter or not.
-                       ;; Maybe this is something we can do once we can parse sql
-                       (= TableDefinition$Type/TABLE (. tabledef getType))
-                       (when (table-is-partitioned? tabledef)
-                         ;; having to use `get-table` here is inefficient, but calling `(.getRequirePartitionFilter)`
-                         ;; on the `table` object from `list-tables` will return `nil` even though the table requires
-                         ;; a partition filter.
-                         ;; This is an upstream bug where the v2 API is incomplete when setting object values see
-                         ;; https://github.com/googleapis/java-bigquery/blob/main/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/spi/v2/HttpBigQueryRpc.java#L343C23-L343C23
-                         ;; Anyway, we only call it when the table is partitioned, so I don't think it's a big deal
-                         (.getRequirePartitionFilter (get-table database dataset-id table-name)))))}))}))
-
-(defn- bigquery-type->base-type
-  "Returns the base type for the given BigQuery field's `field-mode` and `field-type`. In BQ, an ARRAY of INTEGER has
-  \"REPEATED\" as the mode, and \"INTEGER\" as the type name.
-
-  If/when we are able to represent complex types more precisely, we may want to capture that information separately.
-  For now, though, we will check if the `field-mode` is \"REPEATED\" and return our :type/Array for that case, then
-  proceed to check the `field-type` otherwise."
-  [field-mode field-type]
-  (if (= Field$Mode/REPEATED field-mode)
-    :type/Array
-    (case field-type
-      "BOOLEAN"    :type/Boolean
-      "FLOAT"      :type/Float
-      "INTEGER"    :type/Integer
-      "RECORD"     :type/Dictionary ; RECORD -> field has a nested schema
-      "STRING"     :type/Text
-      "DATE"       :type/Date
-      "DATETIME"   :type/DateTime
-      "TIMESTAMP"  :type/DateTimeWithLocalTZ
-      "TIME"       :type/Time
-      "NUMERIC"    :type/Decimal
-      "BIGNUMERIC" :type/Decimal
-      :type/*)))
+  [driver database]
+  {:tables (describe-database-tables driver database)})
+
+(defn- database-type->base-type
+  [database-type]
+  (case database-type
+    "ARRAY"      :type/Array
+    "BOOLEAN"    :type/Boolean
+    "FLOAT"      :type/Float
+    "INTEGER"    :type/Integer
+    "RECORD"     :type/Dictionary ; RECORD -> field has a nested schema
+    "STRING"     :type/Text
+    "DATE"       :type/Date
+    "DATETIME"   :type/DateTime
+    "TIMESTAMP"  :type/DateTimeWithLocalTZ
+    "TIME"       :type/Time
+    "JSON"       :type/JSON
+    "NUMERIC"    :type/Decimal
+    "BIGNUMERIC" :type/Decimal
+    :type/*))
+
+(defn- field->database+base-type
+  "Returns a normalized `database-type` and its `base-type` for a type from BigQuery Field type.
+
+   In BQ, an ARRAY of INTEGER has \"REPEATED\" as the mode, and \"INTEGER\" as the type name."
+  [^Field field]
+  (let [field-type (.. field getType name)
+        field-mode (.getMode field)
+        database-type (if (= Field$Mode/REPEATED field-mode)
+                        "ARRAY"
+                        field-type)]
+    [database-type (database-type->base-type database-type)]))
+
+(defn- raw-type->database+base-type
+  "Returns a normalized `database-type` and its `base-type` for a type from `INFORMATION_SCHEMA.COLUMNS.data_type`."
+  [raw-data-type]
+  (let [database-type (cond
+                        (str/starts-with? raw-data-type "ARRAY") "ARRAY" ;; ARRAY<INT64>
+                        (str/starts-with? raw-data-type "STRUCT") "RECORD" ;; STRUCT<INT64, FLOAT64>
+                        (str/starts-with? raw-data-type "INT") "INTEGER" ;; INT64
+                        (str/starts-with? raw-data-type "FLOAT") "FLOAT" ;; FLOAT 64
+                        (= raw-data-type "BOOL") "BOOLEAN"
+                        :else raw-data-type)]
+    [database-type (database-type->base-type database-type)]))

 (mu/defn- fields->metabase-field-info
  ([fields]
@@ -263,14 +276,12 @@
    []
    (map
     (fn [[idx ^Field field]]
-       (let [type-name (.. field getType name)
-             f-mode    (.getMode field)
-             database-position (or database-position idx)
+       (let [database-position (or database-position idx)
             field-name (.getName field)
-             base-type (bigquery-type->base-type f-mode type-name)]
+             [database-type base-type] (field->database+base-type field)]
         (into
          (cond-> {:name              field-name
-                   :database-type     type-name
+                   :database-type     database-type
                   :base-type         base-type
                   :database-position database-position}
            nfc-path (assoc :nfc-path nfc-path)
@@ -291,39 +302,106 @@
  See https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table"
  "_PARTITIONDATE")

-(defmethod driver/describe-table :bigquery-cloud-sdk
-  [_ database {table-name :name, dataset-id :schema}]
-  (let [table                     (get-table database dataset-id table-name)
-        ^TableDefinition tabledef (.getDefinition table)
-        is-partitioned?           (table-is-partitioned? tabledef)
-        ;; a table can only have one partitioned field
-        partitioned-field-name    (when is-partitioned?
-                                    (or (some-> ^RangePartitioning (tabledef->range-partition tabledef) .getField)
-                                        (some-> ^TimePartitioning (tabledef->time-partition tabledef) .getField)))
-        fields                    (set
-                                   (map
-                                    #(assoc % :database-partitioned (= (:name %) partitioned-field-name))
-                                    (fields->metabase-field-info (.. tabledef getSchema getFields))))]
-    {:schema dataset-id
-     :name   table-name
-     :fields (cond-> fields
-               ;; if table has time partition but no field is specified as partitioned
-               ;; meaning this table is partitioned by ingestion time
-               ;; so we manually sync the 2 pseudo-columns _PARTITIONTIME AND _PARTITIONDATE
-               (and is-partitioned?
-                    (some? (tabledef->time-partition tabledef))
-                    (nil? partitioned-field-name))
-               (conj
-                {:name                 partitioned-time-field-name
-                 :database-type        "TIMESTAMP"
-                 :base-type            (bigquery-type->base-type nil "TIMESTAMP")
-                 :database-position    (count fields)
-                 :database-partitioned true}
-                {:name                 partitioned-date-field-name
-                 :database-type        "DATE"
-                 :base-type            (bigquery-type->base-type nil "DATE")
-                 :database-position    (inc (count fields))
-                 :database-partitioned true}))}))
+(defn- build-nested-column-lookup
+  "Returns a map of table-name->parent-path->nested-columns"
+  [driver database project-id dataset-id table-names]
+  (let [results (query-honeysql
+                 driver
+                 database
+                 (cond->
+                  {:select [:table_name :column_name :data_type :field_path]
+                   :from [[(information-schema-table project-id dataset-id "COLUMN_FIELD_PATHS") :c]]}
+                   (not-empty table-names)
+                   (assoc :where [:in :table_name table-names])))
+        nested-columns (map (fn [{data-type :data_type field-path-str :field_path table-name :table_name}]
+                              (let [field-path (str/split field-path-str #"\.")
+                                    nfc-path (not-empty (pop field-path))
+                                    [database-type base-type] (raw-type->database+base-type data-type)]
+                                {:name (peek field-path)
+                                 :table-name table-name
+                                 :table-schema dataset-id
+                                 :database-type database-type
+                                 :base-type base-type
+                                 :nfc-path nfc-path}))
+                            results)]
+    (reduce
+     (fn [accum col]
+       (let [parent (:nfc-path col)]
+         (cond-> accum
+           parent
+           (update-in [(:table-name col) parent] (fnil conj []) col))))
+     {}
+     (sort-by (comp count :nfc-path) nested-columns))))
+
+(defn- describe-dataset-fields
+  [driver database project-id dataset-id table-names]
+  (let [named-rows (query-honeysql
+                    driver
+                    database
+                    (cond->
+                     {:select [:table_name :column_name :data_type :ordinal_position
+                               [[:= :is_partitioning_column "YES"] :partitioned]]
+                      :from [[(information-schema-table project-id dataset-id "COLUMNS") :c]]}
+                      (not-empty table-names)
+                      (assoc :where [:in :table_name table-names])))
+        nested-column-lookup (build-nested-column-lookup driver database project-id dataset-id table-names)
+        maybe-add-nested-fields (fn maybe-add-nested-fields [col nfc-path root-database-position]
+                                  (let [new-path ((fnil conj []) nfc-path (:name col))
+                                        nested-fields (get-in nested-column-lookup [(:table-name col) new-path])]
+                                    (cond-> (assoc col :database-position root-database-position)
+                                      nested-fields
+                                      (assoc :nested-fields (into #{}
+                                                                  (map #(maybe-add-nested-fields % new-path root-database-position))
+                                                                  nested-fields)))))
+        max-position-per-table (reduce
+                                (fn [accum {table-name :table_name pos :ordinal_position}]
+                                  (if (> (or pos 0) (get accum table-name -1))
+                                    (assoc accum table-name (or pos 0))
+                                    accum))
+                                {}
+                                named-rows)]
+    (mapcat (fn [{column-name :column_name
+                  data-type :data_type
+                  database-position :ordinal_position
+                  partitioned? :partitioned
+                  table-name :table_name}]
+              (let [database-position (or (some-> database-position dec)
+                                          (get max-position-per-table table-name 0))
+                    [database-type base-type] (raw-type->database+base-type data-type)]
+                (cond-> [(maybe-add-nested-fields
+                          {:name column-name
+                           :table-name table-name
+                           :table-schema dataset-id
+                           :database-type database-type
+                           :base-type base-type
+                           :database-partitioned partitioned?
+                           :database-position database-position}
+                          nil
+                          database-position)]
+                  ;; _PARTITIONDATE does not appear so add it in if we see _PARTITIONTIME
+                  (= column-name partitioned-time-field-name)
+                  (conj {:name partitioned-date-field-name
+                         :table-name table-name
+                         :table-schema dataset-id
+                         :database-type "DATE"
+                         :base-type :type/Date
+                         :database-position (inc database-position)
+                         :database-partitioned true}))))
+            named-rows)))
+
+(defmethod driver/describe-fields :bigquery-cloud-sdk
+  [driver database & {:keys [schema-names table-names]}]
+  (let [project-id (get-project-id (:details database))
+        dataset-ids (or schema-names
+                        (list-datasets (:details database)))]
+    (sort-by
+     (juxt :table-schema :table-name :database-position :name)
+     (into
+      []
+      (mapcat
+       (fn [dataset-id]
+         (describe-dataset-fields driver database project-id dataset-id table-names)))
+      dataset-ids))))

 (defn- get-field-parsers [^Schema schema]
  (let [default-parser (get-method bigquery.qp/parse-result-of-type :default)]
@@ -567,6 +645,7 @@
 ;;; +----------------------------------------------------------------------------------------------------------------+

 (doseq [[feature supported?] {:convert-timezone         true
+                              :describe-fields          true
                              :nested-fields            true
                              :datetime-diff            true
                              :expressions              true

--- a/modules/drivers/bigquery-cloud-sdk/test/metabase/driver/bigquery_cloud_sdk_test.clj
+++ b/modules/drivers/bigquery-cloud-sdk/test/metabase/driver/bigquery_cloud_sdk_test.clj
@@ -4,6 +4,7 @@
   [clojure.core.async :as a]
   [clojure.string :as str]
   [clojure.test :refer :all]
+   [clojure.walk :as walk]
   [metabase.db.metadata-queries :as metadata-queries]
   [metabase.driver :as driver]
   [metabase.driver.bigquery-cloud-sdk :as bigquery]
@@ -12,6 +13,7 @@
   [metabase.query-processor :as qp]
   [metabase.query-processor.compile :as qp.compile]
   [metabase.query-processor.pipeline :as qp.pipeline]
+   [metabase.query-processor.store :as qp.store]
   [metabase.sync :as sync]
   [metabase.test :as mt]
   [metabase.test.data.bigquery-cloud-sdk :as bigquery.tx]
@@ -22,7 +24,7 @@
   [toucan2.core :as t2]
   [toucan2.tools.with-temp :as t2.with-temp])
  (:import
-   (com.google.cloud.bigquery BigQuery DatasetId TableResult)))
+   (com.google.cloud.bigquery BigQuery TableResult)))

 (set! *warn-on-reflection* true)

@@ -50,8 +52,8 @@
      (testing "can-connect? returns false for bogus credentials"
        (is (false? (driver/can-connect? :bigquery-cloud-sdk (assoc db-details :project-id fake-proj-id)))))
      (testing "can-connect? returns true for a valid dataset-id even with no tables"
-        (with-redefs [bigquery/list-tables (fn [& _]
-                                             [])]
+        (with-redefs [bigquery/describe-database-tables (fn [& _]
+                                                          [])]
          (is (true? (driver/can-connect? :bigquery-cloud-sdk db-details)))))
      (testing "can-connect? returns an appropriate exception message if no datasets are found"
        (is (thrown-with-msg? Exception
@@ -240,14 +242,22 @@
      (is (contains? (:tables (driver/describe-database :bigquery-cloud-sdk (mt/db)))
                     {:schema test-db-name :name view-name :database_require_filter false})
          "`describe-database` should see the view")
-      (is (= {:schema test-db-name
-              :name   view-name
-              :fields #{{:name "id", :database-type "INTEGER" :base-type :type/Integer :database-position 0 :database-partitioned false}
-                        {:name "venue_name", :database-type "STRING" :base-type :type/Text :database-position 1 :database-partitioned false}
-                        {:name "category_name", :database-type "STRING" :base-type :type/Text :database-position 2 :database-partitioned false}}}
-             (driver/describe-table :bigquery-cloud-sdk (mt/db) {:name view-name, :schema test-db-name}))
-          "`describe-tables` should see the fields in the view")
+      (is (= [{:name "id", :database-type "INTEGER" :base-type :type/Integer :database-position 0 :database-partitioned false :table-name view-name :table-schema test-db-name}
+              {:name "venue_name", :database-type "STRING" :base-type :type/Text :database-position 1 :database-partitioned false :table-name view-name :table-schema test-db-name}
+              {:name "category_name", :database-type "STRING" :base-type :type/Text :database-position 2 :database-partitioned false :table-name view-name :table-schema test-db-name}]
+             (driver/describe-fields :bigquery-cloud-sdk (mt/db) {:table-names [view-name], :schema-names [test-db-name]}))
+          "`describe-fields` should see the fields in the view")
      (sync/sync-database! (mt/db) {:scan :schema})
+
+      (testing "describe-database"
+        (qp.store/with-metadata-provider (mt/id)
+          (is (= #{{:schema test-db-name
+                    :name view-name
+                    :database_require_filter false}}
+                 (into #{}
+                       (filter (comp #{view-name} :name))
+                       (:tables (driver/describe-database :bigquery-cloud-sdk (mt/db))))))))
+
      (testing "We should be able to run queries against the view (#3414)"
        (is (= [[1 "Red Medicine" "Asian"]
                [2 "Stout Burgers & Beers" "Burger"]
@@ -268,6 +278,15 @@
                               (fmt-table-name "orders"))]]
            (bigquery.tx/execute! sql))
          (sync/sync-database! (mt/db) {:scan :schema})
+          (testing "describe-database"
+            (qp.store/with-metadata-provider (mt/id)
+              (is (= #{{:schema test-db-name
+                        :name view-name
+                        :database_require_filter false}}
+                     (into #{}
+                           (filter (comp #{view-name} :name))
+                           (:tables (driver/describe-database :bigquery-cloud-sdk (mt/db))))))))
+
          (testing "We should be able to run queries against the view (#3414)"
            (is (= [[1 93] [2 98] [3 77]]
                   (mt/rows
@@ -302,13 +321,42 @@
    :bigquery-cloud-sdk
    (mt/dataset
      nested-records
-      (is (= {:columns ["r.a" "r.b" "r.rr.aa"]
-              :rows [[1 "a" 10] [2 "b" nil] [3 "c" nil]]}
-             (mt/rows+column-names
-              (mt/run-mbql-query records
-                {:fields [(mt/id :records :r :a)
-                          (mt/id :records :r :b)
-                          (mt/id :records :r :rr :aa)]})))))))
+      (let [database (driver/describe-database :bigquery-cloud-sdk (mt/db))
+            table (first (:tables database))]
+        (is (=? {:name "records"} table))
+        (is (=? [{:name "id"}
+                 {:name "name"}
+                 {:name "r"
+                  :database-type "RECORD",
+                  :base-type :type/Dictionary,
+                  :database-position 2
+                  :nested-fields [{:name "a",
+                                   :database-type "INTEGER",
+                                   :base-type :type/Integer,
+                                   :database-position 2,
+                                   :nfc-path ["r"]}
+                                  {:name "b",
+                                   :database-type "STRING",
+                                   :base-type :type/Text,
+                                   :database-position 2,
+                                   :nfc-path ["r"]}
+                                  {:name "rr",
+                                   :database-type "RECORD",
+                                   :base-type :type/Dictionary,
+                                   :database-position 2,
+                                   :nfc-path ["r"],
+                                   :nested-fields
+                                   [{:name "aa",
+                                     :database-type "INTEGER",
+                                     :base-type :type/Integer,
+                                     :database-position 2,
+                                     :nfc-path ["r" "rr"]}]}]}]
+                (walk/postwalk
+                 (fn [n]
+                   (if (set? n)
+                     (sort-by :name n)
+                     n))
+                 (driver/describe-fields :bigquery-cloud-sdk (mt/db) {:table-names [(:name table)]}))))))))

 (deftest query-nested-fields-test
  (mt/test-driver
@@ -355,6 +403,24 @@
              (bigquery.tx/execute! sql))
            (sync/sync-database! (mt/db) {:scan :schema})

+            (testing "describe-database"
+              (qp.store/with-metadata-provider (mt/id)
+                (is (= #{{:schema test-db-name
+                          :name "partition_by_ingestion_time",
+                          :database_require_filter true}
+                         {:schema test-db-name, :name "partition_by_time", :database_require_filter true}
+                         {:schema test-db-name, :name "partition_by_range", :database_require_filter true}
+                         {:schema test-db-name,
+                          :name "partition_by_range_not_required",
+                          :database_require_filter false}}
+                       (into #{}
+                             (filter (comp #{"partition_by_range"
+                                             "partition_by_time"
+                                             "partition_by_ingestion_time"
+                                             "partition_by_range_not_required"
+                                             "partition_by_ingestion_time_not_required"} :name))
+                             (:tables (driver/describe-database :bigquery-cloud-sdk (mt/db))))))))
+
            (testing "tables that require a filter are correctly identified"
              (is (= table-name->is-filter-required?
                     (t2/select-fn->fn :name :database_require_filter :model/Table
@@ -655,30 +721,36 @@
      (is (contains? (:tables (driver/describe-database :bigquery-cloud-sdk (mt/db)))
                     {:schema test-db-name :name tbl-nm :database_require_filter false})
          "`describe-database` should see the table")
-      (is (= {:schema test-db-name
-              :name   tbl-nm
-              :fields #{{:base-type :type/Decimal
-                         :database-partitioned false
-                         :database-position 0
-                         :database-type "NUMERIC"
-                         :name "numeric_col"}
-                        {:base-type :type/Decimal
-                         :database-partitioned false
-                         :database-position 1
-                         :database-type "NUMERIC"
-                         :name "decimal_col"}
-                        {:base-type :type/Decimal
-                         :database-partitioned false
-                         :database-position 2
-                         :database-type "BIGNUMERIC"
-                         :name "bignumeric_col"}
-                        {:base-type :type/Decimal
-                         :database-partitioned false
-                         :database-position 3
-                         :database-type "BIGNUMERIC"
-                         :name "bigdecimal_col"}}}
-             (driver/describe-table :bigquery-cloud-sdk (mt/db) {:name tbl-nm :schema test-db-name}))
-          "`describe-table` should see the fields in the table")
+      (is (= [{:base-type :type/Decimal
+               :table-name tbl-nm
+               :table-schema test-db-name
+               :database-partitioned false
+               :database-position 0
+               :database-type "NUMERIC"
+               :name "numeric_col"}
+              {:base-type :type/Decimal
+               :table-name tbl-nm
+               :table-schema test-db-name
+               :database-partitioned false
+               :database-position 1
+               :database-type "NUMERIC"
+               :name "decimal_col"}
+              {:base-type :type/Decimal
+               :table-name tbl-nm
+               :table-schema test-db-name
+               :database-partitioned false
+               :database-position 2
+               :database-type "BIGNUMERIC"
+               :name "bignumeric_col"}
+              {:base-type :type/Decimal
+               :table-name tbl-nm
+               :table-schema test-db-name
+               :database-partitioned false
+               :database-position 3
+               :database-type "BIGNUMERIC"
+               :name "bigdecimal_col"}]
+             (driver/describe-fields :bigquery-cloud-sdk (mt/db) {:table-names [tbl-nm] :schema-names [test-db-name]}))
+          "`describe-fields` should see the fields in the table")
      (sync/sync-database! (mt/db) {:scan :schema})
      (testing "We should be able to run queries against the table"
        (doseq [[col-nm param-v] [[:numeric_col (bigdec numeric-val)]
@@ -705,12 +777,10 @@
                                    tbl-nm])
                      (fn [tbl-nm] ["DROP TABLE IF EXISTS `%s.%s`" test-db-name tbl-nm])
                      (fn [tbl-nm]
-                        (is (= {:schema test-db-name
-                                :name   tbl-nm
-                                :fields #{{:name "int_col" :database-type "INTEGER" :base-type :type/Integer :database-position 0 :database-partitioned false}
-                                          {:name "array_col" :database-type "INTEGER" :base-type :type/Array :database-position 1 :database-partitioned false}}}
-                               (driver/describe-table :bigquery-cloud-sdk (mt/db) {:name tbl-nm :schema test-db-name}))
-                            "`describe-table` should detect the correct base-type for array type columns")))))
+                        (is (= [{:name "int_col" :database-type "INTEGER" :base-type :type/Integer :database-position 0 :database-partitioned false :table-name tbl-nm :table-schema test-db-name}
+                                {:name "array_col" :database-type "ARRAY" :base-type :type/Array :database-position 1 :database-partitioned false :table-name tbl-nm :table-schema test-db-name}]
+                               (driver/describe-fields :bigquery-cloud-sdk (mt/db) {:table-names [tbl-nm] :schema-names [test-db-name]}))
+                            "`describe-fields` should detect the correct base-type for array type columns")))))

 (deftest sync-inactivates-old-duplicate-tables
  (testing "If on the new driver, then downgrade, then upgrade again (#21981)"
@@ -893,11 +963,10 @@
        (mt/db) ;; force the creation of another test dataset
        (let [;; This test is implemented in this way to avoid having to create new datasets, and to avoid
              ;; syncing most of the tables in the test DB.
-              datasets (#'bigquery/list-datasets (-> (mt/db)
-                                                     :details
-                                                     (dissoc :dataset-filters-type
-                                                             :dataset-filters-patterns)))
-              dataset-ids (map #(.getDataset ^DatasetId %) datasets)
+              dataset-ids (#'bigquery/list-datasets (-> (mt/db)
+                                                        :details
+                                                        (dissoc :dataset-filters-type
+                                                                :dataset-filters-patterns)))
              ;; get the first 4 characters of each dataset-id. The first 4 characters are used because the first 3 are
              ;; often used for bigquery dataset names e.g. `v4_test_data`
              prefixes (->> dataset-ids