From 6176fe4d8f3f84bc94e53b1280ea99c7f8363fd3 Mon Sep 17 00:00:00 2001 From: Ngoc Khuat <qn.khuat@gmail.com> Date: Mon, 25 Mar 2024 11:36:05 +0700 Subject: [PATCH] Homebrew getTables for postgres (#39670) --- .../redshift/src/metabase/driver/redshift.clj | 6 ++ src/metabase/driver/postgres.clj | 67 ++++++++++++++++++- src/metabase/driver/sql_jdbc/execute.clj | 4 +- .../sql_jdbc/sync/describe_database.clj | 61 +++++++++-------- .../driver/sql_jdbc/sync/describe_table.clj | 8 +-- src/metabase/sync/sync_metadata/tables.clj | 4 +- test/metabase/driver/postgres_test.clj | 44 +++++++++++- 7 files changed, 152 insertions(+), 42 deletions(-) diff --git a/modules/drivers/redshift/src/metabase/driver/redshift.clj b/modules/drivers/redshift/src/metabase/driver/redshift.clj index 8127d106697..4dbcbfa9e4b 100644 --- a/modules/drivers/redshift/src/metabase/driver/redshift.clj +++ b/modules/drivers/redshift/src/metabase/driver/redshift.clj @@ -51,6 +51,12 @@ [& args] (apply (get-method driver/describe-table :sql-jdbc) args)) +;; don't use the Postgres implementation for `describe-database` as it uses a custom SQL to get the tables. +;; we should do the same for Redshift tho +(defmethod driver/describe-database :redshift + [& args] + (apply (get-method driver/describe-database :sql-jdbc) args)) + (defmethod sql-jdbc.sync/describe-fks-sql :redshift [driver & {:keys [schema-names table-names]}] (sql/format {:select (vec diff --git a/src/metabase/driver/postgres.clj b/src/metabase/driver/postgres.clj index 23effc9ce06..fc673670f24 100644 --- a/src/metabase/driver/postgres.clj +++ b/src/metabase/driver/postgres.clj @@ -7,6 +7,8 @@ [clojure.string :as str] [clojure.walk :as walk] [honey.sql :as sql] + [honey.sql.helpers :as sql.helpers] + [honey.sql.pg-ops :as sql.pg-ops] [java-time.api :as t] [metabase.db :as mdb] [metabase.driver :as driver] @@ -18,6 +20,7 @@ [metabase.driver.sql-jdbc.connection :as sql-jdbc.conn] [metabase.driver.sql-jdbc.execute :as sql-jdbc.execute] [metabase.driver.sql-jdbc.sync :as sql-jdbc.sync] + [metabase.driver.sql-jdbc.sync.describe-database :as sql-jdbc.describe-database] [metabase.driver.sql.query-processor :as sql.qp] [metabase.driver.sql.query-processor.util :as sql.qp.u] [metabase.driver.sql.util :as sql.u] @@ -50,7 +53,8 @@ (comment ;; method impls live in these namespaces. postgres.actions/keep-me - postgres.ddl/keep-me) + postgres.ddl/keep-me + sql.pg-ops/keep-me) (driver/register! :postgres, :parent :sql-jdbc) @@ -209,6 +213,67 @@ (def ^:private ^:dynamic *enum-types* nil) +(defn- get-tables-sql + [schemas table-names] + ;; Ref: https://github.com/davecramer/pgjdbc/blob/a714bfd/pgjdbc/src/main/java/org/postgresql/jdbc/PgDatabaseMetaData.java#L1272 + (sql/format + (cond-> {:select [[:n.nspname :schema] + [:c.relname :name] + [[:case-expr :c.relkind + [:inline "r"] [:inline "TABLE"] + [:inline "p"] [:inline "PARTITIONED TABLE"] + [:inline "v"] [:inline "VIEW"] + [:inline "f"] [:inline "FOREIGN TABLE"] + [:inline "m"] [:inline "MATERIALIZED VIEW"] + :else nil] + :type] + [:d.description :description]] + :from [[:pg_catalog.pg_namespace :n] + [:pg_catalog.pg_class :c]] + :left-join [[:pg_catalog.pg_description :d] [:and [:= :c.oid :d.objoid] [:= :d.objsubid 0] [:= :d.classoid [:raw "'pg_class'::regclass"]]]] + :where [:and [:= :c.relnamespace :n.oid] + ;; filter out system tables + [(keyword "!~") :n.nspname "^pg_"] [:<> :n.nspname "information_schema"] + ;; only get tables of type: TABLE, PARTITIONED TABLE, VIEW, FOREIGN TABLE, MATERIALIZED VIEW + [:raw "c.relkind in ('r', 'p', 'v', 'f', 'm')"]] + :order-by [:type :schema :name]} + (seq schemas) + (sql.helpers/where [:in :n.nspname schemas]) + + (seq table-names) + (sql.helpers/where [:in :c.relname table-names])) + {:dialect :ansi})) + +(defn- get-tables + ;; have it as its own method for ease of testing + [database schemas tables] + (sql-jdbc.execute/reducible-query database (get-tables-sql schemas tables))) + +(defn- describe-syncable-tables + [{driver :engine :as database}] + (reify clojure.lang.IReduceInit + (reduce [_ rf init] + (sql-jdbc.execute/do-with-connection-with-options + driver + database + nil + (fn [^Connection conn] + (reduce + rf + init + (when-let [syncable-schemas (seq (driver/syncable-schemas driver database))] + (let [have-select-privilege? (sql-jdbc.describe-database/have-select-privilege-fn driver conn)] + (eduction + (comp (filter have-select-privilege?) + (map #(dissoc % :type))) + (get-tables database syncable-schemas nil)))))))))) + +(defmethod driver/describe-database :postgres + [_driver database] + ;; TODO: we should figure out how to sync tables using transducer, this way we don't have to hold 100k tables in + ;; memrory in a set like this + {:tables (into #{} (describe-syncable-tables database))}) + ;; Describe the Fields present in a `table`. This just hands off to the normal SQL driver implementation of the same ;; name, but first fetches database enum types so we have access to them. These are simply binded to the dynamic var ;; and used later in `database-type->base-type`, which you will find below. diff --git a/src/metabase/driver/sql_jdbc/execute.clj b/src/metabase/driver/sql_jdbc/execute.clj index 4b09f238374..9061888aef6 100644 --- a/src/metabase/driver/sql_jdbc/execute.clj +++ b/src/metabase/driver/sql_jdbc/execute.clj @@ -717,6 +717,7 @@ "Returns a reducible collection of rows as maps from `db` and a given SQL query. This is similar to [[jdbc/reducible-query]] but reuses the driver-specific configuration for the Connection and Statement/PreparedStatement. This is slightly different from [[execute-reducible-query]] in that it is not intended to be used as part of middleware. Keywordizes column names. " + {:added "0.49.0", :arglists '([db [sql & params]])} [db [sql & params]] (let [driver (:engine db)] (reify clojure.lang.IReduceInit @@ -729,7 +730,7 @@ (with-open [stmt (statement-or-prepared-statement driver conn sql params nil) ^ResultSet rs (try (let [max-rows 0] ; 0 means no limit - (execute-statement-or-prepared-statement! driver stmt max-rows params sql)) + (execute-statement-or-prepared-statement! driver stmt max-rows params sql)) (catch Throwable e (throw (ex-info (tru "Error executing query: {0}" (ex-message e)) {:driver driver @@ -739,7 +740,6 @@ ;; TODO - we should probably be using [[reducible-rows]] instead to convert to the correct types (reduce rf init (jdbc/reducible-result-set rs {}))))))))) - ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | Actions Stuff | ;;; +----------------------------------------------------------------------------------------------------------------+ diff --git a/src/metabase/driver/sql_jdbc/sync/describe_database.clj b/src/metabase/driver/sql_jdbc/sync/describe_database.clj index 7a26f7f72a2..30a4225f60d 100644 --- a/src/metabase/driver/sql_jdbc/sync/describe_database.clj +++ b/src/metabase/driver/sql_jdbc/sync/describe_database.clj @@ -92,6 +92,21 @@ (.rollback conn)) false)))) +(defn- jdbc-get-tables + [driver ^DatabaseMetaData metadata catalog schema-pattern tablename-pattern types] + (sql-jdbc.sync.common/reducible-results + #(.getTables metadata catalog + (some->> schema-pattern (driver/escape-entity-name-for-metadata driver)) + (some->> tablename-pattern (driver/escape-entity-name-for-metadata driver)) + (when (seq types) (into-array String types))) + (fn [^ResultSet rset] + (fn [] {:name (.getString rset "TABLE_NAME") + :schema (.getString rset "TABLE_SCHEM") + :description (when-let [remarks (.getString rset "REMARKS")] + (when-not (str/blank? remarks) + remarks)) + :type (.getString rset "TABLE_TYPE")})))) + (defn db-tables "Fetch a JDBC Metadata ResultSet of tables in the DB, optionally limited to ones belonging to a given schema. Returns a reducible sequence of results." @@ -101,18 +116,9 @@ ;; this by passing in `"%"` instead -- consider making this the default behavior. See this Slack thread ;; https://metaboat.slack.com/archives/C04DN5VRQM6/p1706220295862639?thread_ts=1706156558.940489&cid=C04DN5VRQM6 for ;; more info. - (with-open [rset (.getTables metadata db-name-or-nil (some->> schema-or-nil (driver/escape-entity-name-for-metadata driver)) "%" - (into-array String ["TABLE" "PARTITIONED TABLE" "VIEW" "FOREIGN TABLE" "MATERIALIZED VIEW" - "EXTERNAL TABLE" "DYNAMIC_TABLE"]))] - (loop [acc []] - (if-not (.next rset) - acc - (recur (conj acc {:name (.getString rset "TABLE_NAME") - :schema (.getString rset "TABLE_SCHEM") - :description (when-let [remarks (.getString rset "REMARKS")] - (when-not (str/blank? remarks) - remarks)) - :type (.getString rset "TABLE_TYPE")})))))) + (jdbc-get-tables driver metadata db-name-or-nil schema-or-nil "%" + ["TABLE" "PARTITIONED TABLE" "VIEW" "FOREIGN TABLE" "MATERIALIZED VIEW" + "EXTERNAL TABLE" "DYNAMIC_TABLE"])) (defn- schema+table-with-select-privileges [driver conn] @@ -122,7 +128,7 @@ [schema table])) set)) -(defn- have-select-privilege-fn +(defn have-select-privilege-fn "Returns a function that take a map with 3 keys [:schema, :name, :type], return true if we can do a select query on the table. This function shouldn't be called a `map` or anything alike, instead use it as a cache function like so: @@ -158,9 +164,11 @@ schema-inclusion-filters schema-exclusion-filters) have-select-privilege-fn? (have-select-privilege-fn driver conn)] (eduction (mapcat (fn [schema] - (->> (db-tables driver metadata schema db-name-or-nil) - (filter have-select-privilege-fn?) - (map #(dissoc % :type))))) syncable-schemas))) + (eduction + (comp (filter have-select-privilege-fn?) + (map #(dissoc % :type))) + (db-tables driver metadata schema db-name-or-nil)))) + syncable-schemas))) (defmethod sql-jdbc.sync.interface/active-tables :sql-jdbc [driver connection schema-inclusion-filters schema-exclusion-filters] @@ -203,18 +211,9 @@ db-or-id-or-spec nil (fn [^Connection conn] - (let [schema-filter-prop (driver.u/find-schema-filters-prop driver) - has-schema-filter-prop? (some? schema-filter-prop) - database (db-or-id-or-spec->database db-or-id-or-spec) - default-active-tbl-fn #(into #{} (sql-jdbc.sync.interface/active-tables driver conn nil nil))] - (if has-schema-filter-prop? - ;; TODO: the else of this branch seems uncessary, why do you want to call describe-database on a database that - ;; does not exists? - (if (some? database) - (let [prop-nm (:name schema-filter-prop) - [inclusion-patterns exclusion-patterns] (driver.s/db-details->schema-filter-patterns - prop-nm - database)] - (into #{} (sql-jdbc.sync.interface/active-tables driver conn inclusion-patterns exclusion-patterns))) - (default-active-tbl-fn)) - (default-active-tbl-fn)))))}) + (let [schema-filter-prop (driver.u/find-schema-filters-prop driver) + database (db-or-id-or-spec->database db-or-id-or-spec) + [inclusion-patterns + exclusion-patterns] (when (some? schema-filter-prop) + (driver.s/db-details->schema-filter-patterns (:name schema-filter-prop) database))] + (into #{} (sql-jdbc.sync.interface/active-tables driver conn inclusion-patterns exclusion-patterns)))))}) diff --git a/src/metabase/driver/sql_jdbc/sync/describe_table.clj b/src/metabase/driver/sql_jdbc/sync/describe_table.clj index 8ee116fc884..249c2aea43e 100644 --- a/src/metabase/driver/sql_jdbc/sync/describe_table.clj +++ b/src/metabase/driver/sql_jdbc/sync/describe_table.clj @@ -314,10 +314,10 @@ (defn describe-fks "Default implementation of [[metabase.driver/describe-fks]] for JDBC drivers. Uses JDBC DatabaseMetaData." [driver db & {:keys [schema-names table-names] :as args}] - (if (or (and schema-names (empty? schema-names)) - (and table-names (empty? table-names))) - [] - (sql-jdbc.execute/reducible-query db (describe-fks-sql driver args)))) + (if (or (and schema-names (empty? schema-names)) + (and table-names (empty? table-names))) + [] + (sql-jdbc.execute/reducible-query db (describe-fks-sql driver args)))) (defn describe-table-indexes "Default implementation of [[metabase.driver/describe-table-indexes]] for SQL JDBC drivers. Uses JDBC DatabaseMetaData." diff --git a/src/metabase/sync/sync_metadata/tables.clj b/src/metabase/sync/sync_metadata/tables.clj index b1c6e8d1b16..dc71e71d6bb 100644 --- a/src/metabase/sync/sync_metadata/tables.clj +++ b/src/metabase/sync/sync_metadata/tables.clj @@ -129,8 +129,8 @@ "Create `new-tables` for database, or if they already exist, mark them as active." [database :- i/DatabaseInstance new-tables :- [:set i/DatabaseMetadataTable]] - (log/info "Found new tables:" - (for [table new-tables] + (doseq [table new-tables] + (log/info "Found new table:" (sync-util/name-for-logging (mi/instance Table table)))) (doseq [table new-tables] (create-or-reactivate-table! database table))) diff --git a/test/metabase/driver/postgres_test.clj b/test/metabase/driver/postgres_test.clj index 89007ac35d8..18d94eba10a 100644 --- a/test/metabase/driver/postgres_test.clj +++ b/test/metabase/driver/postgres_test.clj @@ -18,6 +18,7 @@ [metabase.driver.sql-jdbc.connection :as sql-jdbc.conn] [metabase.driver.sql-jdbc.execute :as sql-jdbc.execute] [metabase.driver.sql-jdbc.sync :as sql-jdbc.sync] + [metabase.driver.sql-jdbc.sync.describe-database :as sql-jdbc.describe-database] [metabase.driver.sql.query-processor :as sql.qp] [metabase.driver.sql.query-processor-test-util :as sql.qp-test-util] [metabase.lib.test-metadata :as meta] @@ -41,7 +42,9 @@ [metabase.util.log :as log] [next.jdbc :as next.jdbc] [toucan2.core :as t2] - [toucan2.tools.with-temp :as t2.with-temp])) + [toucan2.tools.with-temp :as t2.with-temp]) + (:import + (java.sql Connection))) (set! *warn-on-reflection* true) @@ -332,7 +335,7 @@ (deftest partitioned-table-test (mt/test-driver :postgres (testing (str "Make sure that partitioned tables (in addition to the individual partitions themselves) are - synced properly (#15049") + synced properly (#15049)") (let [db-name "partitioned_table_test" details (mt/dbdef->connection-details :postgres :db {:database-name db-name}) spec (sql-jdbc.conn/connection-details->spec :postgres details)] @@ -1356,3 +1359,40 @@ ;; Special characters (is (= "SET ROLE \"Role.123\";" (driver.sql/set-role-statement :postgres "Role.123"))) (is (= "SET ROLE \"$role\";" (driver.sql/set-role-statement :postgres "$role"))))) + +(deftest get-tables-parity-with-jdbc-test + (testing "make sure our get-tables return result consistent with jdbc getTables" + (mt/test-driver :postgres + (mt/with-empty-db + (sql-jdbc.execute/do-with-connection-with-options + :postgres + (mt/db) + nil + (fn [^Connection conn] + (let [do-test (fn [& {:keys [schema-pattern table-pattern schemas tables] + :or {schema-pattern "public%" ;; postgres get-tables exclude system tables by default + schemas nil + table-pattern "%" + tables nil} + :as _opts}] + (is (= (into #{} (#'sql-jdbc.describe-database/jdbc-get-tables + :postgres (.getMetaData conn) nil schema-pattern table-pattern + ["TABLE" "PARTITIONED TABLE" "VIEW" "FOREIGN TABLE" "MATERIALIZED VIEW"])) + (into #{} (#'postgres/get-tables (mt/db) schemas tables)))))] + (doseq [stmt ["CREATE TABLE public.table (id INTEGER, type TEXT);" + "CREATE UNIQUE INDEX idx_table_type ON public.table(type);" + "CREATE TABLE public.partition_table (id INTEGER) PARTITION BY RANGE (id);" + "CREATE UNIQUE INDEX idx_partition_table_id ON public.partition_table(id);" + "CREATE SEQUENCE public.table_id_seq;" + "CREATE VIEW public.view AS SELECT * FROM public.table;" + "CREATE TYPE public.enum_type AS ENUM ('a', 'b', 'c');" + "CREATE MATERIALIZED VIEW public.materialized_view AS SELECT * FROM public.table;" + "CREATE SCHEMA public_2;" + "CREATE TABLE public_2.table (id INTEGER);"]] + (next.jdbc/execute! conn [stmt])) + (testing "without any filters" + (do-test)) + (testing "filter by schema" + (do-test :schema-pattern "private" :schemas ["private"])) + (testing "filter by table name" + (do-test :table-pattern "table" :tables ["table"]))))))))) -- GitLab