From 6176fe4d8f3f84bc94e53b1280ea99c7f8363fd3 Mon Sep 17 00:00:00 2001
From: Ngoc Khuat <qn.khuat@gmail.com>
Date: Mon, 25 Mar 2024 11:36:05 +0700
Subject: [PATCH] Homebrew getTables for postgres (#39670)

---
 .../redshift/src/metabase/driver/redshift.clj |  6 ++
 src/metabase/driver/postgres.clj              | 67 ++++++++++++++++++-
 src/metabase/driver/sql_jdbc/execute.clj      |  4 +-
 .../sql_jdbc/sync/describe_database.clj       | 61 +++++++++--------
 .../driver/sql_jdbc/sync/describe_table.clj   |  8 +--
 src/metabase/sync/sync_metadata/tables.clj    |  4 +-
 test/metabase/driver/postgres_test.clj        | 44 +++++++++++-
 7 files changed, 152 insertions(+), 42 deletions(-)

diff --git a/modules/drivers/redshift/src/metabase/driver/redshift.clj b/modules/drivers/redshift/src/metabase/driver/redshift.clj
index 8127d106697..4dbcbfa9e4b 100644
--- a/modules/drivers/redshift/src/metabase/driver/redshift.clj
+++ b/modules/drivers/redshift/src/metabase/driver/redshift.clj
@@ -51,6 +51,12 @@
   [& args]
   (apply (get-method driver/describe-table :sql-jdbc) args))
 
+;; don't use the Postgres implementation for `describe-database` as it uses a custom SQL to get the tables.
+;; we should do the same for Redshift tho
+(defmethod driver/describe-database :redshift
+ [& args]
+ (apply (get-method driver/describe-database :sql-jdbc) args))
+
 (defmethod sql-jdbc.sync/describe-fks-sql :redshift
   [driver & {:keys [schema-names table-names]}]
   (sql/format {:select (vec
diff --git a/src/metabase/driver/postgres.clj b/src/metabase/driver/postgres.clj
index 23effc9ce06..fc673670f24 100644
--- a/src/metabase/driver/postgres.clj
+++ b/src/metabase/driver/postgres.clj
@@ -7,6 +7,8 @@
    [clojure.string :as str]
    [clojure.walk :as walk]
    [honey.sql :as sql]
+   [honey.sql.helpers :as sql.helpers]
+   [honey.sql.pg-ops :as sql.pg-ops]
    [java-time.api :as t]
    [metabase.db :as mdb]
    [metabase.driver :as driver]
@@ -18,6 +20,7 @@
    [metabase.driver.sql-jdbc.connection :as sql-jdbc.conn]
    [metabase.driver.sql-jdbc.execute :as sql-jdbc.execute]
    [metabase.driver.sql-jdbc.sync :as sql-jdbc.sync]
+   [metabase.driver.sql-jdbc.sync.describe-database :as sql-jdbc.describe-database]
    [metabase.driver.sql.query-processor :as sql.qp]
    [metabase.driver.sql.query-processor.util :as sql.qp.u]
    [metabase.driver.sql.util :as sql.u]
@@ -50,7 +53,8 @@
 (comment
   ;; method impls live in these namespaces.
   postgres.actions/keep-me
-  postgres.ddl/keep-me)
+  postgres.ddl/keep-me
+  sql.pg-ops/keep-me)
 
 (driver/register! :postgres, :parent :sql-jdbc)
 
@@ -209,6 +213,67 @@
 
 (def ^:private ^:dynamic *enum-types* nil)
 
+(defn- get-tables-sql
+  [schemas table-names]
+  ;; Ref: https://github.com/davecramer/pgjdbc/blob/a714bfd/pgjdbc/src/main/java/org/postgresql/jdbc/PgDatabaseMetaData.java#L1272
+  (sql/format
+   (cond->  {:select    [[:n.nspname :schema]
+                         [:c.relname :name]
+                         [[:case-expr :c.relkind
+                           [:inline "r"] [:inline "TABLE"]
+                           [:inline "p"] [:inline "PARTITIONED TABLE"]
+                           [:inline "v"] [:inline "VIEW"]
+                           [:inline "f"] [:inline "FOREIGN TABLE"]
+                           [:inline "m"] [:inline "MATERIALIZED VIEW"]
+                           :else nil]
+                          :type]
+                         [:d.description :description]]
+             :from      [[:pg_catalog.pg_namespace :n]
+                         [:pg_catalog.pg_class :c]]
+             :left-join [[:pg_catalog.pg_description :d] [:and [:= :c.oid :d.objoid] [:= :d.objsubid 0] [:= :d.classoid [:raw "'pg_class'::regclass"]]]]
+             :where     [:and [:= :c.relnamespace :n.oid]
+                         ;; filter out system tables
+                         [(keyword "!~") :n.nspname "^pg_"] [:<> :n.nspname "information_schema"]
+                         ;; only get tables of type: TABLE, PARTITIONED TABLE, VIEW, FOREIGN TABLE, MATERIALIZED VIEW
+                         [:raw "c.relkind in ('r', 'p', 'v', 'f', 'm')"]]
+             :order-by  [:type :schema :name]}
+     (seq schemas)
+     (sql.helpers/where [:in :n.nspname schemas])
+
+     (seq table-names)
+     (sql.helpers/where [:in :c.relname table-names]))
+   {:dialect :ansi}))
+
+(defn- get-tables
+  ;; have it as its own method for ease of testing
+  [database schemas tables]
+  (sql-jdbc.execute/reducible-query database (get-tables-sql schemas tables)))
+
+(defn- describe-syncable-tables
+  [{driver :engine :as database}]
+  (reify clojure.lang.IReduceInit
+    (reduce [_ rf init]
+      (sql-jdbc.execute/do-with-connection-with-options
+       driver
+       database
+       nil
+       (fn [^Connection conn]
+         (reduce
+          rf
+          init
+          (when-let [syncable-schemas (seq (driver/syncable-schemas driver database))]
+            (let [have-select-privilege? (sql-jdbc.describe-database/have-select-privilege-fn driver conn)]
+              (eduction
+               (comp (filter have-select-privilege?)
+                     (map #(dissoc % :type)))
+               (get-tables database syncable-schemas nil))))))))))
+
+(defmethod driver/describe-database :postgres
+  [_driver database]
+  ;; TODO: we should figure out how to sync tables using transducer, this way we don't have to hold 100k tables in
+  ;; memrory in a set like this
+  {:tables (into #{} (describe-syncable-tables database))})
+
 ;; Describe the Fields present in a `table`. This just hands off to the normal SQL driver implementation of the same
 ;; name, but first fetches database enum types so we have access to them. These are simply binded to the dynamic var
 ;; and used later in `database-type->base-type`, which you will find below.
diff --git a/src/metabase/driver/sql_jdbc/execute.clj b/src/metabase/driver/sql_jdbc/execute.clj
index 4b09f238374..9061888aef6 100644
--- a/src/metabase/driver/sql_jdbc/execute.clj
+++ b/src/metabase/driver/sql_jdbc/execute.clj
@@ -717,6 +717,7 @@
   "Returns a reducible collection of rows as maps from `db` and a given SQL query. This is similar to [[jdbc/reducible-query]] but reuses the
   driver-specific configuration for the Connection and Statement/PreparedStatement. This is slightly different from [[execute-reducible-query]]
   in that it is not intended to be used as part of middleware. Keywordizes column names. "
+  {:added "0.49.0", :arglists '([db [sql & params]])}
   [db [sql & params]]
   (let [driver (:engine db)]
     (reify clojure.lang.IReduceInit
@@ -729,7 +730,7 @@
            (with-open [stmt          (statement-or-prepared-statement driver conn sql params nil)
                        ^ResultSet rs (try
                                        (let [max-rows 0] ; 0 means no limit
-                                         (execute-statement-or-prepared-statement! driver stmt max-rows params sql))
+                                          (execute-statement-or-prepared-statement! driver stmt max-rows params sql))
                                        (catch Throwable e
                                          (throw (ex-info (tru "Error executing query: {0}" (ex-message e))
                                                          {:driver driver
@@ -739,7 +740,6 @@
              ;; TODO - we should probably be using [[reducible-rows]] instead to convert to the correct types
              (reduce rf init (jdbc/reducible-result-set rs {})))))))))
 
-
 ;;; +----------------------------------------------------------------------------------------------------------------+
 ;;; |                                                 Actions Stuff                                                  |
 ;;; +----------------------------------------------------------------------------------------------------------------+
diff --git a/src/metabase/driver/sql_jdbc/sync/describe_database.clj b/src/metabase/driver/sql_jdbc/sync/describe_database.clj
index 7a26f7f72a2..30a4225f60d 100644
--- a/src/metabase/driver/sql_jdbc/sync/describe_database.clj
+++ b/src/metabase/driver/sql_jdbc/sync/describe_database.clj
@@ -92,6 +92,21 @@
          (.rollback conn))
        false))))
 
+(defn- jdbc-get-tables
+  [driver ^DatabaseMetaData metadata catalog schema-pattern tablename-pattern types]
+  (sql-jdbc.sync.common/reducible-results
+   #(.getTables metadata catalog
+                (some->> schema-pattern (driver/escape-entity-name-for-metadata driver))
+                (some->> tablename-pattern (driver/escape-entity-name-for-metadata driver))
+                (when (seq types) (into-array String types)))
+   (fn [^ResultSet rset]
+     (fn [] {:name        (.getString rset "TABLE_NAME")
+             :schema      (.getString rset "TABLE_SCHEM")
+             :description (when-let [remarks (.getString rset "REMARKS")]
+                            (when-not (str/blank? remarks)
+                              remarks))
+             :type        (.getString rset "TABLE_TYPE")}))))
+
 (defn db-tables
   "Fetch a JDBC Metadata ResultSet of tables in the DB, optionally limited to ones belonging to a given
   schema. Returns a reducible sequence of results."
@@ -101,18 +116,9 @@
   ;; this by passing in `"%"` instead -- consider making this the default behavior. See this Slack thread
   ;; https://metaboat.slack.com/archives/C04DN5VRQM6/p1706220295862639?thread_ts=1706156558.940489&cid=C04DN5VRQM6 for
   ;; more info.
-  (with-open [rset (.getTables metadata db-name-or-nil (some->> schema-or-nil (driver/escape-entity-name-for-metadata driver)) "%"
-                               (into-array String ["TABLE" "PARTITIONED TABLE" "VIEW" "FOREIGN TABLE" "MATERIALIZED VIEW"
-                                                   "EXTERNAL TABLE" "DYNAMIC_TABLE"]))]
-    (loop [acc []]
-      (if-not (.next rset)
-        acc
-        (recur (conj acc {:name        (.getString rset "TABLE_NAME")
-                          :schema      (.getString rset "TABLE_SCHEM")
-                          :description (when-let [remarks (.getString rset "REMARKS")]
-                                         (when-not (str/blank? remarks)
-                                           remarks))
-                          :type        (.getString rset "TABLE_TYPE")}))))))
+  (jdbc-get-tables driver metadata db-name-or-nil schema-or-nil "%"
+                   ["TABLE" "PARTITIONED TABLE" "VIEW" "FOREIGN TABLE" "MATERIALIZED VIEW"
+                    "EXTERNAL TABLE" "DYNAMIC_TABLE"]))
 
 (defn- schema+table-with-select-privileges
   [driver conn]
@@ -122,7 +128,7 @@
               [schema table]))
        set))
 
-(defn- have-select-privilege-fn
+(defn have-select-privilege-fn
   "Returns a function that take a map with 3 keys [:schema, :name, :type], return true if we can do a select query on the table.
 
   This function shouldn't be called a `map` or anything alike, instead use it as a cache function like so:
@@ -158,9 +164,11 @@
                                                                                      schema-inclusion-filters schema-exclusion-filters)
         have-select-privilege-fn? (have-select-privilege-fn driver conn)]
     (eduction (mapcat (fn [schema]
-                        (->> (db-tables driver metadata schema db-name-or-nil)
-                             (filter have-select-privilege-fn?)
-                             (map #(dissoc % :type))))) syncable-schemas)))
+                        (eduction
+                         (comp (filter have-select-privilege-fn?)
+                               (map #(dissoc % :type)))
+                         (db-tables driver metadata schema db-name-or-nil))))
+              syncable-schemas)))
 
 (defmethod sql-jdbc.sync.interface/active-tables :sql-jdbc
   [driver connection schema-inclusion-filters schema-exclusion-filters]
@@ -203,18 +211,9 @@
     db-or-id-or-spec
     nil
     (fn [^Connection conn]
-      (let [schema-filter-prop      (driver.u/find-schema-filters-prop driver)
-            has-schema-filter-prop? (some? schema-filter-prop)
-            database                (db-or-id-or-spec->database db-or-id-or-spec)
-            default-active-tbl-fn   #(into #{} (sql-jdbc.sync.interface/active-tables driver conn nil nil))]
-        (if has-schema-filter-prop?
-          ;; TODO: the else of this branch seems uncessary, why do you want to call describe-database on a database that
-          ;; does not exists?
-          (if (some? database)
-            (let [prop-nm                                 (:name schema-filter-prop)
-                  [inclusion-patterns exclusion-patterns] (driver.s/db-details->schema-filter-patterns
-                                                           prop-nm
-                                                           database)]
-              (into #{} (sql-jdbc.sync.interface/active-tables driver conn inclusion-patterns exclusion-patterns)))
-            (default-active-tbl-fn))
-          (default-active-tbl-fn)))))})
+      (let [schema-filter-prop   (driver.u/find-schema-filters-prop driver)
+            database             (db-or-id-or-spec->database db-or-id-or-spec)
+            [inclusion-patterns
+             exclusion-patterns] (when (some? schema-filter-prop)
+                                   (driver.s/db-details->schema-filter-patterns (:name schema-filter-prop) database))]
+        (into #{} (sql-jdbc.sync.interface/active-tables driver conn inclusion-patterns exclusion-patterns)))))})
diff --git a/src/metabase/driver/sql_jdbc/sync/describe_table.clj b/src/metabase/driver/sql_jdbc/sync/describe_table.clj
index 8ee116fc884..249c2aea43e 100644
--- a/src/metabase/driver/sql_jdbc/sync/describe_table.clj
+++ b/src/metabase/driver/sql_jdbc/sync/describe_table.clj
@@ -314,10 +314,10 @@
 (defn describe-fks
   "Default implementation of [[metabase.driver/describe-fks]] for JDBC drivers. Uses JDBC DatabaseMetaData."
   [driver db & {:keys [schema-names table-names] :as args}]
-    (if (or (and schema-names (empty? schema-names))
-            (and table-names (empty? table-names)))
-      []
-      (sql-jdbc.execute/reducible-query db (describe-fks-sql driver args))))
+  (if (or (and schema-names (empty? schema-names))
+          (and table-names (empty? table-names)))
+    []
+    (sql-jdbc.execute/reducible-query db (describe-fks-sql driver args))))
 
 (defn describe-table-indexes
   "Default implementation of [[metabase.driver/describe-table-indexes]] for SQL JDBC drivers. Uses JDBC DatabaseMetaData."
diff --git a/src/metabase/sync/sync_metadata/tables.clj b/src/metabase/sync/sync_metadata/tables.clj
index b1c6e8d1b16..dc71e71d6bb 100644
--- a/src/metabase/sync/sync_metadata/tables.clj
+++ b/src/metabase/sync/sync_metadata/tables.clj
@@ -129,8 +129,8 @@
   "Create `new-tables` for database, or if they already exist, mark them as active."
   [database :- i/DatabaseInstance
    new-tables :- [:set i/DatabaseMetadataTable]]
-  (log/info "Found new tables:"
-            (for [table new-tables]
+  (doseq [table new-tables]
+    (log/info "Found new table:"
               (sync-util/name-for-logging (mi/instance Table table))))
   (doseq [table new-tables]
     (create-or-reactivate-table! database table)))
diff --git a/test/metabase/driver/postgres_test.clj b/test/metabase/driver/postgres_test.clj
index 89007ac35d8..18d94eba10a 100644
--- a/test/metabase/driver/postgres_test.clj
+++ b/test/metabase/driver/postgres_test.clj
@@ -18,6 +18,7 @@
    [metabase.driver.sql-jdbc.connection :as sql-jdbc.conn]
    [metabase.driver.sql-jdbc.execute :as sql-jdbc.execute]
    [metabase.driver.sql-jdbc.sync :as sql-jdbc.sync]
+   [metabase.driver.sql-jdbc.sync.describe-database :as sql-jdbc.describe-database]
    [metabase.driver.sql.query-processor :as sql.qp]
    [metabase.driver.sql.query-processor-test-util :as sql.qp-test-util]
    [metabase.lib.test-metadata :as meta]
@@ -41,7 +42,9 @@
    [metabase.util.log :as log]
    [next.jdbc :as next.jdbc]
    [toucan2.core :as t2]
-   [toucan2.tools.with-temp :as t2.with-temp]))
+   [toucan2.tools.with-temp :as t2.with-temp])
+  (:import
+   (java.sql Connection)))
 
 (set! *warn-on-reflection* true)
 
@@ -332,7 +335,7 @@
 (deftest partitioned-table-test
   (mt/test-driver :postgres
     (testing (str "Make sure that partitioned tables (in addition to the individual partitions themselves) are
-                   synced properly (#15049")
+                   synced properly (#15049)")
       (let [db-name "partitioned_table_test"
             details (mt/dbdef->connection-details :postgres :db {:database-name db-name})
             spec    (sql-jdbc.conn/connection-details->spec :postgres details)]
@@ -1356,3 +1359,40 @@
     ;; Special characters
     (is (= "SET ROLE \"Role.123\";"   (driver.sql/set-role-statement :postgres "Role.123")))
     (is (= "SET ROLE \"$role\";"      (driver.sql/set-role-statement :postgres "$role")))))
+
+(deftest get-tables-parity-with-jdbc-test
+  (testing "make sure our get-tables return result consistent with jdbc getTables"
+    (mt/test-driver :postgres
+      (mt/with-empty-db
+        (sql-jdbc.execute/do-with-connection-with-options
+         :postgres
+         (mt/db)
+         nil
+         (fn [^Connection conn]
+           (let [do-test (fn [& {:keys [schema-pattern table-pattern schemas tables]
+                                 :or   {schema-pattern "public%" ;; postgres get-tables exclude system tables by default
+                                        schemas        nil
+                                        table-pattern  "%"
+                                        tables         nil}
+                                 :as _opts}]
+                           (is (= (into #{} (#'sql-jdbc.describe-database/jdbc-get-tables
+                                             :postgres (.getMetaData conn) nil schema-pattern table-pattern
+                                             ["TABLE" "PARTITIONED TABLE" "VIEW" "FOREIGN TABLE" "MATERIALIZED VIEW"]))
+                                  (into #{} (#'postgres/get-tables (mt/db) schemas tables)))))]
+             (doseq [stmt ["CREATE TABLE public.table (id INTEGER, type TEXT);"
+                           "CREATE UNIQUE INDEX idx_table_type ON public.table(type);"
+                           "CREATE TABLE public.partition_table (id INTEGER) PARTITION BY RANGE (id);"
+                           "CREATE UNIQUE INDEX idx_partition_table_id ON public.partition_table(id);"
+                           "CREATE SEQUENCE public.table_id_seq;"
+                           "CREATE VIEW public.view AS SELECT * FROM public.table;"
+                           "CREATE TYPE public.enum_type AS ENUM ('a', 'b', 'c');"
+                           "CREATE MATERIALIZED VIEW public.materialized_view AS SELECT * FROM public.table;"
+                           "CREATE SCHEMA public_2;"
+                           "CREATE TABLE public_2.table (id INTEGER);"]]
+               (next.jdbc/execute! conn [stmt]))
+             (testing "without any filters"
+               (do-test))
+             (testing "filter by schema"
+               (do-test :schema-pattern "private" :schemas ["private"]))
+             (testing "filter by table name"
+               (do-test :table-pattern "table" :tables ["table"])))))))))
-- 
GitLab