Skip to content
Snippets Groups Projects
Unverified Commit bad129cd authored by Howon Lee's avatar Howon Lee Committed by GitHub
Browse files

Group-by fix for JSON columns. (#21741)

Group-bys didn't work because you need two instances of the field and you couldn't have two instances of the field be considered by the postgres backend as the same. Ported over the BigQuery fix to this to apply to JSON columns as well.
parent 824c3a07
No related branches found
No related tags found
No related merge requests found
......@@ -17,7 +17,6 @@
[metabase.query-processor.error-type :as qp.error-type]
[metabase.query-processor.store :as qp.store]
[metabase.query-processor.util.add-alias-info :as add]
[metabase.query-processor.util.nest-query :as nest-query]
[metabase.util :as u]
[metabase.util.date-2 :as u.date]
[metabase.util.honeysql-extensions :as hx]
......@@ -566,24 +565,6 @@
[_ t]
(format "timestamp \"%s %s\"" (u.date/format-sql (t/local-date-time t)) (.getId (t/zone-id t))))
;; In `ORDER BY` and `GROUP BY`, unlike other SQL drivers, BigQuery requires that we refer to Fields using the alias we
;; gave them in the `SELECT` clause, rather than repeating their definitions.
;;
;; See #17536 and #18742
(defn- rewrite-fields-to-force-using-column-aliases
"Rewrite `:field` clauses to force them to use the column alias regardless of where they appear."
[form]
(mbql.u/replace form
[:field id-or-name opts]
[:field id-or-name (-> opts
(assoc ::add/source-alias (::add/desired-alias opts)
::add/source-table ::add/none
;; sort of a HACK but this key will tell the SQL QP not to apply casting here either.
::nest-query/outer-select true)
;; don't want to do temporal bucketing or binning inside the order by or breakout either.
;; That happens inside the `SELECT`
(dissoc :temporal-unit :binning))]))
(defmethod sql.qp/apply-top-level-clause [:bigquery-cloud-sdk :breakout]
[driver top-level-clause honeysql-form query]
;; If stuff in `:fields` still needs to be qualified like `dataset.table.field`, just the stuff in `:group-by` should
......@@ -592,7 +573,7 @@
(let [parent-method (partial (get-method sql.qp/apply-top-level-clause [:sql :breakout])
driver top-level-clause honeysql-form)
qualified (parent-method query)
unqualified (parent-method (update query :breakout rewrite-fields-to-force-using-column-aliases))]
unqualified (parent-method (update query :breakout sql.qp/rewrite-fields-to-force-using-column-aliases))]
(merge qualified
(select-keys unqualified #{:group-by}))))
......@@ -600,13 +581,13 @@
[driver clause]
((get-method sql.qp/->honeysql [:sql :asc])
driver
(rewrite-fields-to-force-using-column-aliases clause)))
(sql.qp/rewrite-fields-to-force-using-column-aliases clause)))
(defmethod sql.qp/->honeysql [:bigquery-cloud-sdk :desc]
[driver clause]
((get-method sql.qp/->honeysql [:sql :desc])
driver
(rewrite-fields-to-force-using-column-aliases clause)))
(sql.qp/rewrite-fields-to-force-using-column-aliases clause)))
(defn- reconcile-temporal-types
"Make sure the temporal types of fields and values in filter clauses line up."
......
......@@ -21,6 +21,7 @@
[metabase.models.field :as field]
[metabase.models.secret :as secret]
[metabase.query-processor.store :as qp.store]
[metabase.query-processor.util.add-alias-info :as add]
[metabase.util :as u]
[metabase.util.date-2 :as u.date]
[metabase.util.honeysql-extensions :as hx]
......@@ -295,7 +296,7 @@
(format "(%s#>> ?::text[])::%s " (hformat/to-sql parent-identifier) field-type))))))
(defmethod sql.qp/->honeysql [:postgres :field]
[driver [_ id-or-name _opts :as clause]]
[driver [_ id-or-name opts :as clause]]
(let [stored-field (when (integer? id-or-name)
(qp.store/field id-or-name))
parent-method (get-method sql.qp/->honeysql [:sql :field])
......@@ -305,12 +306,58 @@
(= (:database_type stored-field) "money")
(pg-conversion identifier :numeric)
(some? nfc-path)
(json-query identifier stored-field)
(field/json-field? stored-field)
(if (::sql.qp/forced-alias opts)
(keyword (::add/source-alias opts))
(json-query identifier stored-field))
:else
identifier)))
;; Postgres is not happy with JSON fields which are in group-bys or order-bys
;; being described twice instead of using the alias.
;; Therefore, force the alias, but only for JSON fields to avoid ambiguity.
;; The alias names in JSON fields are unique wrt nfc path"
(defmethod sql.qp/apply-top-level-clause
[:postgres :breakout]
[driver clause honeysql-form {breakout-fields :breakout, fields-fields :fields :as query}]
(let [stored-field-ids (map second breakout-fields)
stored-fields (map #(when (integer? %) (qp.store/field %)) stored-field-ids)
parent-method (partial (get-method sql.qp/apply-top-level-clause [:sql :breakout])
driver clause honeysql-form)
qualified (parent-method query)
unqualified (parent-method (update query
:breakout
sql.qp/rewrite-fields-to-force-using-column-aliases))]
(if (some field/json-field? stored-fields)
(merge qualified
(select-keys unqualified #{:group-by}))
qualified)))
(defn- order-by-is-json-field?
[clause]
(let [is-aggregation? (= (-> clause (second) (first)) :aggregation)
stored-field-id (-> clause (second) (second))
stored-field (when (and (not is-aggregation?) (integer? stored-field-id))
(qp.store/field stored-field-id))]
(and
(some? stored-field)
(field/json-field? stored-field))))
(defmethod sql.qp/->honeysql [:postgres :desc]
[driver clause]
(let [new-clause (if (order-by-is-json-field? clause)
(sql.qp/rewrite-fields-to-force-using-column-aliases clause)
clause)]
((get-method sql.qp/->honeysql [:sql :desc]) driver new-clause)))
(defmethod sql.qp/->honeysql [:postgres :asc]
[driver clause]
(let [new-clause (if (order-by-is-json-field? clause)
(sql.qp/rewrite-fields-to-force-using-column-aliases clause)
clause)]
((get-method sql.qp/->honeysql [:sql :asc]) driver new-clause)))
(defmethod unprepare/unprepare-value [:postgres Date]
[_ value]
(format "'%s'::timestamp" (u.date/format value)))
......
......@@ -589,6 +589,27 @@
[honeysql-form field-alias]
honeysql-form)))
;; Certain SQL drivers require that we refer to Fields using the alias we give in the `SELECT` clause in
;; `ORDER BY` and `GROUP BY` rather than repeating definitions.
;; BigQuery does this generally, other DB's require this in JSON columns.
;;
;; See #17536 and #18742
(defn rewrite-fields-to-force-using-column-aliases
"Rewrite `:field` clauses to force them to use the column alias regardless of where they appear."
[form]
(mbql.u/replace form
[:field id-or-name opts]
[:field id-or-name (-> opts
(assoc ::add/source-alias (::add/desired-alias opts)
::add/source-table ::add/none
;; sort of a HACK but this key will tell the SQL QP not to apply casting here either.
::nest-query/outer-select true
;; used to indicate that this is a forced alias
::forced-alias true)
;; don't want to do temporal bucketing or binning inside the order by or breakout either.
;; That happens inside the `SELECT`
(dissoc :temporal-unit :binning))]))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | Clause Handlers |
......
......@@ -332,6 +332,11 @@
table-name))))
field-name))
(defn json-field?
"Return true if field is a JSON field, false if not."
[field]
(some? (:nfc_path field)))
(defn qualified-name
"Return a combined qualified name for `field`, e.g. `table_name.parent_field_name.field_name`."
[field]
......
......@@ -291,6 +291,36 @@
(is (= ["(boop.bleh#>> ?::text[])::boolean " "{boop,foobar,1234}"]
(hsql/format (#'postgres/json-query boop-identifier boolean-boop-field))))))))
(deftest json-alias-test
(mt/test-driver :postgres
(testing "json breakouts and order bys have alias coercion"
(drop-if-exists-and-create-db! "json-alias-test")
(let [details (mt/dbdef->connection-details :postgres :db {:database-name "json-alias-test"})
spec (sql-jdbc.conn/connection-details->spec :postgres details)
json-part (json/generate-string {:bob :dobbs})
insert (str "CREATE TABLE json_alias_test (json_part JSON NOT NULL);"
(format "INSERT INTO json_alias_test (json_part) VALUES ('%s');" json-part))]
(jdbc/with-db-connection [conn (sql-jdbc.conn/connection-details->spec :postgres details)]
(jdbc/execute! spec [insert]))
(mt/with-temp* [Database [database {:engine :postgres, :details details}]
Table [table {:db_id (u/the-id database) :name "json_alias_test"}]
Field [field {:table_id (u/the-id table)
:nfc_path [:bob
"injection' OR 1=1--' AND released = 1"
(keyword "injection' OR 1=1--' AND released = 1")],
:name "json_alias_test"}]]
(let [compile-res (qp/compile
{:database (u/the-id database)
:type :query
:query {:source-table (u/the-id table)
:aggregation [[:count]]
:breakout [[:field (u/the-id field) nil]]}})]
(is (= (str "SELECT (\"json_alias_test\".\"bob\"#>> ?::text[])::VARCHAR "
"AS \"json_alias_test\", count(*) AS \"count\" FROM \"json_alias_test\" "
"GROUP BY \"json_alias_test\" ORDER BY \"json_alias_test\" ASC")
(:query compile-res)))
(is (= '("{injection' OR 1=1--' AND released = 1,injection' OR 1=1--' AND released = 1}") (:params compile-res)))))))))
(deftest describe-nested-field-columns-test
(mt/test-driver :postgres
(testing "describes json columns and gives types for ones with coherent schemas only"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment