From bad129cda2ffbbd8212b4125985e15c9b96dbdc2 Mon Sep 17 00:00:00 2001 From: Howon Lee <hlee.howon@gmail.com> Date: Fri, 22 Apr 2022 06:16:44 -0700 Subject: [PATCH] Group-by fix for JSON columns. (#21741) Group-bys didn't work because you need two instances of the field and you couldn't have two instances of the field be considered by the postgres backend as the same. Ported over the BigQuery fix to this to apply to JSON columns as well. --- .../bigquery_cloud_sdk/query_processor.clj | 25 ++------- src/metabase/driver/postgres.clj | 53 +++++++++++++++++-- src/metabase/driver/sql/query_processor.clj | 21 ++++++++ src/metabase/models/field.clj | 5 ++ test/metabase/driver/postgres_test.clj | 30 +++++++++++ 5 files changed, 109 insertions(+), 25 deletions(-) diff --git a/modules/drivers/bigquery-cloud-sdk/src/metabase/driver/bigquery_cloud_sdk/query_processor.clj b/modules/drivers/bigquery-cloud-sdk/src/metabase/driver/bigquery_cloud_sdk/query_processor.clj index f3807fdfe64..46a9ac4cb8c 100644 --- a/modules/drivers/bigquery-cloud-sdk/src/metabase/driver/bigquery_cloud_sdk/query_processor.clj +++ b/modules/drivers/bigquery-cloud-sdk/src/metabase/driver/bigquery_cloud_sdk/query_processor.clj @@ -17,7 +17,6 @@ [metabase.query-processor.error-type :as qp.error-type] [metabase.query-processor.store :as qp.store] [metabase.query-processor.util.add-alias-info :as add] - [metabase.query-processor.util.nest-query :as nest-query] [metabase.util :as u] [metabase.util.date-2 :as u.date] [metabase.util.honeysql-extensions :as hx] @@ -566,24 +565,6 @@ [_ t] (format "timestamp \"%s %s\"" (u.date/format-sql (t/local-date-time t)) (.getId (t/zone-id t)))) -;; In `ORDER BY` and `GROUP BY`, unlike other SQL drivers, BigQuery requires that we refer to Fields using the alias we -;; gave them in the `SELECT` clause, rather than repeating their definitions. -;; -;; See #17536 and #18742 -(defn- rewrite-fields-to-force-using-column-aliases - "Rewrite `:field` clauses to force them to use the column alias regardless of where they appear." - [form] - (mbql.u/replace form - [:field id-or-name opts] - [:field id-or-name (-> opts - (assoc ::add/source-alias (::add/desired-alias opts) - ::add/source-table ::add/none - ;; sort of a HACK but this key will tell the SQL QP not to apply casting here either. - ::nest-query/outer-select true) - ;; don't want to do temporal bucketing or binning inside the order by or breakout either. - ;; That happens inside the `SELECT` - (dissoc :temporal-unit :binning))])) - (defmethod sql.qp/apply-top-level-clause [:bigquery-cloud-sdk :breakout] [driver top-level-clause honeysql-form query] ;; If stuff in `:fields` still needs to be qualified like `dataset.table.field`, just the stuff in `:group-by` should @@ -592,7 +573,7 @@ (let [parent-method (partial (get-method sql.qp/apply-top-level-clause [:sql :breakout]) driver top-level-clause honeysql-form) qualified (parent-method query) - unqualified (parent-method (update query :breakout rewrite-fields-to-force-using-column-aliases))] + unqualified (parent-method (update query :breakout sql.qp/rewrite-fields-to-force-using-column-aliases))] (merge qualified (select-keys unqualified #{:group-by})))) @@ -600,13 +581,13 @@ [driver clause] ((get-method sql.qp/->honeysql [:sql :asc]) driver - (rewrite-fields-to-force-using-column-aliases clause))) + (sql.qp/rewrite-fields-to-force-using-column-aliases clause))) (defmethod sql.qp/->honeysql [:bigquery-cloud-sdk :desc] [driver clause] ((get-method sql.qp/->honeysql [:sql :desc]) driver - (rewrite-fields-to-force-using-column-aliases clause))) + (sql.qp/rewrite-fields-to-force-using-column-aliases clause))) (defn- reconcile-temporal-types "Make sure the temporal types of fields and values in filter clauses line up." diff --git a/src/metabase/driver/postgres.clj b/src/metabase/driver/postgres.clj index db8152353b4..cbcd3d09e9d 100644 --- a/src/metabase/driver/postgres.clj +++ b/src/metabase/driver/postgres.clj @@ -21,6 +21,7 @@ [metabase.models.field :as field] [metabase.models.secret :as secret] [metabase.query-processor.store :as qp.store] + [metabase.query-processor.util.add-alias-info :as add] [metabase.util :as u] [metabase.util.date-2 :as u.date] [metabase.util.honeysql-extensions :as hx] @@ -295,7 +296,7 @@ (format "(%s#>> ?::text[])::%s " (hformat/to-sql parent-identifier) field-type)))))) (defmethod sql.qp/->honeysql [:postgres :field] - [driver [_ id-or-name _opts :as clause]] + [driver [_ id-or-name opts :as clause]] (let [stored-field (when (integer? id-or-name) (qp.store/field id-or-name)) parent-method (get-method sql.qp/->honeysql [:sql :field]) @@ -305,12 +306,58 @@ (= (:database_type stored-field) "money") (pg-conversion identifier :numeric) - (some? nfc-path) - (json-query identifier stored-field) + (field/json-field? stored-field) + (if (::sql.qp/forced-alias opts) + (keyword (::add/source-alias opts)) + (json-query identifier stored-field)) :else identifier))) +;; Postgres is not happy with JSON fields which are in group-bys or order-bys +;; being described twice instead of using the alias. +;; Therefore, force the alias, but only for JSON fields to avoid ambiguity. +;; The alias names in JSON fields are unique wrt nfc path" +(defmethod sql.qp/apply-top-level-clause + [:postgres :breakout] + [driver clause honeysql-form {breakout-fields :breakout, fields-fields :fields :as query}] + (let [stored-field-ids (map second breakout-fields) + stored-fields (map #(when (integer? %) (qp.store/field %)) stored-field-ids) + parent-method (partial (get-method sql.qp/apply-top-level-clause [:sql :breakout]) + driver clause honeysql-form) + qualified (parent-method query) + unqualified (parent-method (update query + :breakout + sql.qp/rewrite-fields-to-force-using-column-aliases))] + (if (some field/json-field? stored-fields) + (merge qualified + (select-keys unqualified #{:group-by})) + qualified))) + +(defn- order-by-is-json-field? + [clause] + (let [is-aggregation? (= (-> clause (second) (first)) :aggregation) + stored-field-id (-> clause (second) (second)) + stored-field (when (and (not is-aggregation?) (integer? stored-field-id)) + (qp.store/field stored-field-id))] + (and + (some? stored-field) + (field/json-field? stored-field)))) + +(defmethod sql.qp/->honeysql [:postgres :desc] + [driver clause] + (let [new-clause (if (order-by-is-json-field? clause) + (sql.qp/rewrite-fields-to-force-using-column-aliases clause) + clause)] + ((get-method sql.qp/->honeysql [:sql :desc]) driver new-clause))) + +(defmethod sql.qp/->honeysql [:postgres :asc] + [driver clause] + (let [new-clause (if (order-by-is-json-field? clause) + (sql.qp/rewrite-fields-to-force-using-column-aliases clause) + clause)] + ((get-method sql.qp/->honeysql [:sql :asc]) driver new-clause))) + (defmethod unprepare/unprepare-value [:postgres Date] [_ value] (format "'%s'::timestamp" (u.date/format value))) diff --git a/src/metabase/driver/sql/query_processor.clj b/src/metabase/driver/sql/query_processor.clj index 3884f917ed4..67d61fe6f18 100644 --- a/src/metabase/driver/sql/query_processor.clj +++ b/src/metabase/driver/sql/query_processor.clj @@ -589,6 +589,27 @@ [honeysql-form field-alias] honeysql-form))) +;; Certain SQL drivers require that we refer to Fields using the alias we give in the `SELECT` clause in +;; `ORDER BY` and `GROUP BY` rather than repeating definitions. +;; BigQuery does this generally, other DB's require this in JSON columns. +;; +;; See #17536 and #18742 + +(defn rewrite-fields-to-force-using-column-aliases + "Rewrite `:field` clauses to force them to use the column alias regardless of where they appear." + [form] + (mbql.u/replace form + [:field id-or-name opts] + [:field id-or-name (-> opts + (assoc ::add/source-alias (::add/desired-alias opts) + ::add/source-table ::add/none + ;; sort of a HACK but this key will tell the SQL QP not to apply casting here either. + ::nest-query/outer-select true + ;; used to indicate that this is a forced alias + ::forced-alias true) + ;; don't want to do temporal bucketing or binning inside the order by or breakout either. + ;; That happens inside the `SELECT` + (dissoc :temporal-unit :binning))])) ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | Clause Handlers | diff --git a/src/metabase/models/field.clj b/src/metabase/models/field.clj index d30c5e1d2da..f99b0ff7499 100644 --- a/src/metabase/models/field.clj +++ b/src/metabase/models/field.clj @@ -332,6 +332,11 @@ table-name)))) field-name)) +(defn json-field? + "Return true if field is a JSON field, false if not." + [field] + (some? (:nfc_path field))) + (defn qualified-name "Return a combined qualified name for `field`, e.g. `table_name.parent_field_name.field_name`." [field] diff --git a/test/metabase/driver/postgres_test.clj b/test/metabase/driver/postgres_test.clj index 67625cca824..2483de7c2b8 100644 --- a/test/metabase/driver/postgres_test.clj +++ b/test/metabase/driver/postgres_test.clj @@ -291,6 +291,36 @@ (is (= ["(boop.bleh#>> ?::text[])::boolean " "{boop,foobar,1234}"] (hsql/format (#'postgres/json-query boop-identifier boolean-boop-field)))))))) +(deftest json-alias-test + (mt/test-driver :postgres + (testing "json breakouts and order bys have alias coercion" + (drop-if-exists-and-create-db! "json-alias-test") + (let [details (mt/dbdef->connection-details :postgres :db {:database-name "json-alias-test"}) + spec (sql-jdbc.conn/connection-details->spec :postgres details) + json-part (json/generate-string {:bob :dobbs}) + insert (str "CREATE TABLE json_alias_test (json_part JSON NOT NULL);" + (format "INSERT INTO json_alias_test (json_part) VALUES ('%s');" json-part))] + (jdbc/with-db-connection [conn (sql-jdbc.conn/connection-details->spec :postgres details)] + (jdbc/execute! spec [insert])) + (mt/with-temp* [Database [database {:engine :postgres, :details details}] + Table [table {:db_id (u/the-id database) :name "json_alias_test"}] + Field [field {:table_id (u/the-id table) + :nfc_path [:bob + "injection' OR 1=1--' AND released = 1" + (keyword "injection' OR 1=1--' AND released = 1")], + :name "json_alias_test"}]] + (let [compile-res (qp/compile + {:database (u/the-id database) + :type :query + :query {:source-table (u/the-id table) + :aggregation [[:count]] + :breakout [[:field (u/the-id field) nil]]}})] + (is (= (str "SELECT (\"json_alias_test\".\"bob\"#>> ?::text[])::VARCHAR " + "AS \"json_alias_test\", count(*) AS \"count\" FROM \"json_alias_test\" " + "GROUP BY \"json_alias_test\" ORDER BY \"json_alias_test\" ASC") + (:query compile-res))) + (is (= '("{injection' OR 1=1--' AND released = 1,injection' OR 1=1--' AND released = 1}") (:params compile-res))))))))) + (deftest describe-nested-field-columns-test (mt/test-driver :postgres (testing "describes json columns and gives types for ones with coherent schemas only" -- GitLab