From 236665163e45d404c16af907a5876e6f550c2c5d Mon Sep 17 00:00:00 2001 From: metamben <103100869+metamben@users.noreply.github.com> Date: Mon, 17 Oct 2022 23:16:55 +0300 Subject: [PATCH] Prevent repeated coercion of fields (#24718) Fixes #22519. --- .../row_level_restrictions_test.clj | 9 ++- .../22519-casting-fails-query.cy.spec.js | 3 +- src/metabase/driver/sql/query_processor.clj | 12 +-- src/metabase/query_processor.clj | 2 + .../middleware/mark_outer_fields.clj | 32 ++++++++ .../query_processor/util/nest_query.clj | 4 +- .../middleware/add_source_metadata_test.clj | 2 +- .../middleware/fetch_source_query_test.clj | 5 +- .../util/add_alias_info_test.clj | 20 +++-- .../query_processor/util/nest_query_test.clj | 48 ++++++------ .../nested_queries_test.clj | 73 +++++++++++++++++-- .../query_processor_test/remapping_test.clj | 4 +- 12 files changed, 162 insertions(+), 52 deletions(-) create mode 100644 src/metabase/query_processor/middleware/mark_outer_fields.clj diff --git a/enterprise/backend/test/metabase_enterprise/sandbox/query_processor/middleware/row_level_restrictions_test.clj b/enterprise/backend/test/metabase_enterprise/sandbox/query_processor/middleware/row_level_restrictions_test.clj index 1255afc7d34..c4b2b3bbc6e 100644 --- a/enterprise/backend/test/metabase_enterprise/sandbox/query_processor/middleware/row_level_restrictions_test.clj +++ b/enterprise/backend/test/metabase_enterprise/sandbox/query_processor/middleware/row_level_restrictions_test.clj @@ -476,17 +476,20 @@ (assoc col :id id :table_id (mt/id :venues) - :field_ref [:field id nil])))] + :field_ref [:field id nil]))) + expected-outer-cols (fn [] + (map #(assoc-in % [:options :nested/outer] true) + (expected-cols)))] (testing "A query with a simple attributes-based sandbox should have the same metadata" (mt/with-gtaps {:gtaps {:venues (dissoc (venues-category-mbql-gtap-def) :query)} :attributes {"cat" 50}} - (is (= (expected-cols) + (is (= (expected-outer-cols) (cols))))) (testing "A query with an equivalent MBQL query sandbox should have the same metadata" (mt/with-gtaps {:gtaps {:venues (venues-category-mbql-gtap-def)} :attributes {"cat" 50}} - (is (= (expected-cols) + (is (= (expected-outer-cols) (cols))))) (testing "A query with an equivalent native query sandbox should have the same metadata" diff --git a/frontend/test/metabase/scenarios/models/reproductions/22519-casting-fails-query.cy.spec.js b/frontend/test/metabase/scenarios/models/reproductions/22519-casting-fails-query.cy.spec.js index 89b88bb1911..4533f9ccf05 100644 --- a/frontend/test/metabase/scenarios/models/reproductions/22519-casting-fails-query.cy.spec.js +++ b/frontend/test/metabase/scenarios/models/reproductions/22519-casting-fails-query.cy.spec.js @@ -14,7 +14,7 @@ const questionDetails = { }, }; -describe.skip("issue 22519", () => { +describe("issue 22519", () => { beforeEach(() => { cy.intercept("PUT", "/api/field/*").as("updateField"); cy.intercept("POST", "/api/dataset").as("dataset"); @@ -36,7 +36,6 @@ describe.skip("issue 22519", () => { turnIntoModel(); - cy.wait("@dataset"); cy.findByText("xavier"); }); }); diff --git a/src/metabase/driver/sql/query_processor.clj b/src/metabase/driver/sql/query_processor.clj index 43d9a3aba87..6635b4d9b99 100644 --- a/src/metabase/driver/sql/query_processor.clj +++ b/src/metabase/driver/sql/query_processor.clj @@ -370,7 +370,6 @@ (defmethod ->honeysql [:sql :field] [driver [_ id-or-name {:keys [database-type] - ::nest-query/keys [outer-select] :as options} :as field-clause]] (try @@ -378,6 +377,7 @@ source-alias (field-source-alias field-clause) field (when (integer? id-or-name) (qp.store/field id-or-name)) + outer-select (:nested/outer options) allow-casting? (and field (not outer-select)) database-type (or database-type @@ -392,7 +392,7 @@ (u/prog1 (cond->> identifier allow-casting? (cast-field-if-needed driver field) - database-type maybe-add-db-type ; only add type info if it wasn't added by [[cast-field-if-needed]] + database-type maybe-add-db-type ; only add type info if it wasn't added by [[cast-field-if-needed]] (:temporal-unit options) (apply-temporal-bucketing driver options) (:binning options) (apply-binning options)) (log/trace (binding [*print-meta* true] @@ -656,12 +656,12 @@ [:field id-or-name opts] [:field id-or-name (cond-> opts true - (assoc ::add/source-alias (::add/desired-alias opts) - ::add/source-table ::add/none + (assoc ::add/source-alias (::add/desired-alias opts) + ::add/source-table ::add/none ;; sort of a HACK but this key will tell the SQL QP not to apply casting here either. - ::nest-query/outer-select true + :nested/outer true ;; used to indicate that this is a forced alias - ::forced-alias true) + ::forced-alias true) ;; don't want to do temporal bucketing or binning inside the order by only. ;; That happens inside the `SELECT` ;; (#22831) however, we do want it in breakout diff --git a/src/metabase/query_processor.clj b/src/metabase/query_processor.clj index bfff92ebfe7..480c6adf77d 100644 --- a/src/metabase/query_processor.clj +++ b/src/metabase/query_processor.clj @@ -36,6 +36,7 @@ [metabase.query-processor.middleware.format-rows :as format-rows] [metabase.query-processor.middleware.large-int-id :as large-int-id] [metabase.query-processor.middleware.limit :as limit] + [metabase.query-processor.middleware.mark-outer-fields :as qp.mark-outer-fields] [metabase.query-processor.middleware.mbql-to-native :as mbql-to-native] [metabase.query-processor.middleware.normalize-query :as normalize] [metabase.query-processor.middleware.optimize-temporal-filters :as optimize-temporal-filters] @@ -104,6 +105,7 @@ #'resolve-joins/resolve-joins #'resolve-joined-fields/resolve-joined-fields #'fix-bad-refs/fix-bad-references + #'qp.mark-outer-fields/mark-outer-fields #'escape-join-aliases/escape-join-aliases ;; yes, this is called a second time, because we need to handle any joins that got added (resolve 'ee.sandbox.rows/apply-sandboxing) diff --git a/src/metabase/query_processor/middleware/mark_outer_fields.clj b/src/metabase/query_processor/middleware/mark_outer_fields.clj new file mode 100644 index 00000000000..3697a63024f --- /dev/null +++ b/src/metabase/query_processor/middleware/mark_outer_fields.clj @@ -0,0 +1,32 @@ +(ns metabase.query-processor.middleware.mark-outer-fields + (:require [clojure.walk :as walk] + [medley.core :as m] + [metabase.mbql.util :as mbql.u])) + +(defn- annotate-outer-fields [form source-fields-names-or-ids] + (mbql.u/replace form + [:field (_ :guard source-fields-names-or-ids) _] (mbql.u/assoc-field-options &match :nested/outer true))) + +(defn- mark-mbql-outer-fields + [mbql-query] + (walk/prewalk + (fn [form] + (let [source-fields (get-in form [:source-query :fields])] + (if (and (map? form) + (seqable? source-fields) + (seq source-fields)) + (let [id-set (into #{} (map second) source-fields)] + (-> form + (m/update-existing :fields annotate-outer-fields id-set) + (m/update-existing :breakout annotate-outer-fields id-set) + (m/update-existing :order-by annotate-outer-fields id-set))) + form))) + mbql-query)) + +(defn mark-outer-fields + "Mark all Fields in the MBQL query `query` coming from a source-query as `:nested/outer` so QP implementations know + not to apply coercion or whatever to them a second time" + [query] + (cond-> query + (not= (:type query) :native) + (update :query mark-mbql-outer-fields))) diff --git a/src/metabase/query_processor/util/nest_query.clj b/src/metabase/query_processor/util/nest_query.clj index 2f91124162c..a270a77af19 100644 --- a/src/metabase/query_processor/util/nest_query.clj +++ b/src/metabase/query_processor/util/nest_query.clj @@ -74,8 +74,8 @@ ;; mark all Fields at the new top level as `::outer-select` so QP implementations know not to apply coercion or ;; whatever to them a second time. - [:field _id-or-name (_opts :guard (every-pred :temporal-unit (complement ::outer-select)))] - (recur (mbql.u/update-field-options &match assoc ::outer-select true)) + [:field _id-or-name (_opts :guard (every-pred :temporal-unit (complement :nested/outer)))] + (recur (mbql.u/assoc-field-options &match :nested/outer true)) [:field id-or-name (opts :guard :join-alias)] (let [{::add/keys [desired-alias]} (mbql.u/match-one (:source-query query) diff --git a/test/metabase/query_processor/middleware/add_source_metadata_test.clj b/test/metabase/query_processor/middleware/add_source_metadata_test.clj index 401475a58d7..8ba7db019dc 100644 --- a/test/metabase/query_processor/middleware/add_source_metadata_test.clj +++ b/test/metabase/query_processor/middleware/add_source_metadata_test.clj @@ -331,7 +331,7 @@ ;; the actual metadata this middleware should return. Doesn't have all the columns that come back from ;; `qp/query->expected-cols` expected-metadata (for [col metadata] - (cond-> (dissoc col :description :source :visibility_type) + (cond-> (dissoc col :description :source :visibility_type :options) ;; for some reason this middleware returns temporal fields with a `:default` unit, ;; whereas `query->expected-cols` does not return the unit. It ulimately makes zero ;; difference, so I haven't looked into why this is the case yet. diff --git a/test/metabase/query_processor/middleware/fetch_source_query_test.clj b/test/metabase/query_processor/middleware/fetch_source_query_test.clj index 82ea3500126..43796a84627 100644 --- a/test/metabase/query_processor/middleware/fetch_source_query_test.clj +++ b/test/metabase/query_processor/middleware/fetch_source_query_test.clj @@ -123,7 +123,10 @@ (qp/query->expected-cols (mt/mbql-query venues))) (assoc-in [:query :source-query :source-metadata] (mt/derecordize (qp/query->expected-cols (mt/mbql-query venues)))) - (assoc :info {:card-id (u/the-id card-2)})) + (assoc :info {:card-id (u/the-id card-2)}) + (update-in [:query :source-metadata] (fn [fields] + (mapv #(assoc-in % [:options :nested/outer] true) + fields)))) (resolve-card-id-source-tables (wrap-inner-query {:source-table (str "card__" (u/the-id card-2)), :limit 25})))))) diff --git a/test/metabase/query_processor/util/add_alias_info_test.clj b/test/metabase/query_processor/util/add_alias_info_test.clj index 21f65346ff9..e2e0d90fdad 100644 --- a/test/metabase/query_processor/util/add_alias_info_test.clj +++ b/test/metabase/query_processor/util/add_alias_info_test.clj @@ -62,12 +62,14 @@ ::add/source-table ::add/source ::add/source-alias "Cat__NAME" ::add/desired-alias "Cat__NAME" - ::add/position 0}]] + ::add/position 0 + :nested/outer true}]] :order-by [[:asc [:field %categories.name {:join-alias "Cat" ::add/source-table ::add/source ::add/source-alias "Cat__NAME" ::add/desired-alias "Cat__NAME" - ::add/position 0}]]] + ::add/position 0 + :nested/outer true}]]] :limit 1}) (add-alias-info (mt/mbql-query venues @@ -132,6 +134,7 @@ ::add/source-table "Q2"}]] :strategy :left-join}] :fields [[:field %products.category {:join-alias "P1" + :nested/outer true ::add/desired-alias "P1__CATEGORY" ::add/position 0 ::add/source-alias "P1__CATEGORY" @@ -404,7 +407,8 @@ ::add/source-table ::add/source ::add/source-alias "COOL.double_price" ::add/desired-alias "COOL.COOL.double_price" - ::add/position 0}]] + ::add/position 0 + :nested/outer true}]] {:aggregation [[:aggregation-options [:count] {:name "COOL.count" ::add/position 1 ::add/source-alias "count" @@ -541,6 +545,7 @@ ::add/position 0 ::add/source-alias "Products_Renamed__ID" ::add/source-table ::add/source + :nested/outer true :join-alias "Products Renamed"}] [:field "CC" @@ -548,7 +553,8 @@ ::add/position 1 ::add/source-alias "CC" ::add/source-table ::add/source - :base-type :type/Float}]] + :base-type :type/Float + :nested/outer true}]] :limit 1}) (-> (mt/mbql-query orders {:source-query {:source-table $$orders @@ -586,11 +592,13 @@ ::add/source-alias "Name" ::add/desired-alias "Name_2" ::add/position 1}]]} - :fields [[:field name-id {::add/source-table ::add/source + :fields [[:field name-id {:nested/outer true + ::add/source-table ::add/source ::add/source-alias "NAME" ::add/desired-alias "NAME" ::add/position 0}] - [:field price-id {::add/source-table ::add/source + [:field price-id {:nested/outer true + ::add/source-table ::add/source ::add/source-alias "Name_2" ::add/desired-alias "Name_2" ::add/position 1}]] diff --git a/test/metabase/query_processor/util/nest_query_test.clj b/test/metabase/query_processor/util/nest_query_test.clj index abbc5010b18..a83e60654de 100644 --- a/test/metabase/query_processor/util/nest_query_test.clj +++ b/test/metabase/query_processor/util/nest_query_test.clj @@ -117,18 +117,18 @@ ::add/source-alias "double_id" ::add/desired-alias "double_id" ::add/position 0}] - [:field %date {:temporal-unit :day - ::nest-query/outer-select true - ::add/source-table ::add/source - ::add/source-alias "DATE" - ::add/desired-alias "DATE" - ::add/position 1}] - [:field %date {:temporal-unit :month - ::nest-query/outer-select true - ::add/source-table ::add/source - ::add/source-alias "DATE" - ::add/desired-alias "DATE_2" - ::add/position 2}]] + [:field %date {:temporal-unit :day + :nested/outer true + ::add/source-table ::add/source + ::add/source-alias "DATE" + ::add/desired-alias "DATE" + ::add/position 1}] + [:field %date {:temporal-unit :month + :nested/outer true + ::add/source-table ::add/source + ::add/source-alias "DATE" + ::add/desired-alias "DATE_2" + ::add/position 2}]] :limit 1}) (nest-expressions (mt/mbql-query checkins @@ -588,12 +588,12 @@ ::add/position 5}] [:expression "test" {::add/desired-alias "test" ::add/position 6}]]} - :fields [[:field %price {:temporal-unit :default - ::nest-query/outer-select true - ::add/source-table ::add/source - ::add/source-alias "PRICE" - ::add/desired-alias "PRICE" - ::add/position 0}] + :fields [[:field %price {:temporal-unit :default + :nested/outer true + ::add/source-table ::add/source + ::add/source-alias "PRICE" + ::add/desired-alias "PRICE" + ::add/position 0}] [:field "test" {:base-type :type/Float ::add/source-table ::add/source ::add/source-alias "test" @@ -684,12 +684,12 @@ ::add/source-alias "PRODUCTS__via__PRODUCT_ID__CATEGORY" ::add/desired-alias "PRODUCTS__via__PRODUCT_ID__CATEGORY" ::add/position 0}] - created-at [:field %created_at {:temporal-unit :year - ::nest-query/outer-select true - ::add/source-table ::add/source - ::add/source-alias "CREATED_AT" - ::add/desired-alias "CREATED_AT" - ::add/position 1}] + created-at [:field %created_at {:temporal-unit :year + :nested/outer true + ::add/source-table ::add/source + ::add/source-alias "CREATED_AT" + ::add/desired-alias "CREATED_AT" + ::add/position 1}] pivot-grouping [:field "pivot-grouping" {:base-type :type/Float ::add/source-table ::add/source ::add/source-alias "pivot-grouping" diff --git a/test/metabase/query_processor_test/nested_queries_test.clj b/test/metabase/query_processor_test/nested_queries_test.clj index eca4628faa1..96c92ccc0c7 100644 --- a/test/metabase/query_processor_test/nested_queries_test.clj +++ b/test/metabase/query_processor_test/nested_queries_test.clj @@ -31,7 +31,9 @@ [4 "Wurstküche" 29 33.9997 -118.465 2] [5 "Brite Spot Family Restaurant" 20 34.0778 -118.261 2]] :cols (mapv - (partial qp.test/col :venues) + (fn [col] + (-> (qp.test/col :venues col) + (assoc-in [:options :nested/outer] true))) [:id :name :category_id :latitude :longitude :price])} (qp.test/rows-and-cols (mt/format-rows-by :venues @@ -68,6 +70,9 @@ [3 13] [4 6]] :cols [(cond-> (qp.test/breakout-col (qp.test/col :venues :price)) + (not native-source?) + (assoc-in [:options :nested/outer] true) + native-source? (-> (assoc :field_ref [:field "PRICE" {:base-type :type/Integer}] :effective_type :type/Integer) @@ -133,8 +138,11 @@ [1 3 13] [1 4 8] [1 5 10]] - :cols [(qp.test/breakout-col (qp.test/fk-col :checkins :venue_id :venues :price)) - (qp.test/breakout-col (qp.test/col :checkins :user_id)) + :cols [(-> (qp.test/fk-col :checkins :venue_id :venues :price) + qp.test/breakout-col) + (-> (qp.test/col :checkins :user_id) + qp.test/breakout-col + (assoc-in [:options :nested/outer] true)) (qp.test/aggregate-col :count)]} (qp.test/rows-and-cols (mt/format-rows-by [int int int] @@ -190,6 +198,54 @@ +(deftest nested-with-coerced-fields + (testing "Coerced fields are cast only once (#22519)" + (mt/test-drivers (mt/normal-drivers-with-feature :nested-queries) + (mt/dataset sample-dataset + (mt/with-temp-vals-in-db Field (mt/id :reviews :rating) {:coercion_strategy :Coercion/UNIXSeconds->DateTime + :effective_type :type/Instant} + (mt/with-temp Card [{card-id :id} + {:dataset_query + (mt/mbql-query reviews + {:fields [$rating] + :limit 1})}] + (testing "with all inherited fields" + (is (= :completed + (:status (qp/process-query {:type :query + :database (mt/id) + :query {:source-table (str "card__" card-id)}}))))) + (testing "with explicit top-level fields" + (is (= :completed + (:status (qp/process-query {:type :query + :database (mt/id) + :query {:fields [(mt/id :reviews :rating)] + :source-table (str "card__" card-id)}}))))) + (testing "with breakout and order-by" + (is (= :completed + (:status (qp/process-query {:type :query + :database (mt/id) + :query {:source-table (str "card__" card-id) + :aggregation [:count] + :breakout [(mt/id :reviews :rating)]}}))))))))) + (mt/test-drivers (mt/normal-drivers-with-feature :nested-queries :left-join) + (mt/dataset sample-dataset + (mt/with-temp-vals-in-db Field (mt/id :reviews :rating) {:coercion_strategy :Coercion/UNIXSeconds->DateTime + :effective_type :type/Instant} + (testing "with join" + (mt/with-temp Card [{card-id :id} + {:dataset_query + (mt/mbql-query products + {:fields [$id] + :joins [{:source-table $$reviews + :alias "R" + :fields [&R.reviews.rating] + :condition [:= $id &R.reviews.product_id]}] + :limit 1})}] + (is (= :completed + (:status (qp/process-query {:type :query + :database (mt/id) + :query {:source-table (str "card__" card-id)}}))))))))))) + (deftest sql-source-query-breakout-aggregation-test (mt/test-drivers (mt/normal-drivers-with-feature :nested-queries) (testing "make sure we can do a query with breakout and aggregation using a SQL source query" @@ -310,7 +366,9 @@ (deftest filter-by-field-literal-test (testing "make sure we can filter by a field literal" (is (= {:rows [[1 "Red Medicine" 4 10.0646 -165.374 3]] - :cols (mapv (partial qp.test/col :venues) + :cols (mapv (fn [col] + (-> (qp.test/col :venues col) + (assoc-in [:options :nested/outer] true))) [:id :name :category_id :latitude :longitude :price])} (qp.test/rows-and-cols (mt/run-mbql-query venues @@ -465,7 +523,9 @@ (deftest correct-column-metadata-test (testing "make sure a query using a source query comes back with the correct columns metadata" - (is (= (map (partial qp.test/col :venues) + (is (= (map (fn [col] + (-> (qp.test/col :venues col) + (assoc-in [:options :nested/outer] true))) [:id :name :category_id :latitude :longitude :price]) ;; todo: i don't know why the results don't have the information (mt/cols @@ -473,7 +533,8 @@ (qp/process-query (query-with-source-card card))))))) (testing "make sure a breakout/aggregate query using a source query comes back with the correct columns metadata" - (is (= [(qp.test/breakout-col (qp.test/col :venues :price)) + (is (= [(qp.test/breakout-col (-> (qp.test/col :venues :price) + (assoc-in [:options :nested/outer] true))) (qp.test/aggregate-col :count)] (mt/cols (mt/with-temp Card [card (venues-mbql-card-def)] diff --git a/test/metabase/query_processor_test/remapping_test.clj b/test/metabase/query_processor_test/remapping_test.clj index 3f38729b569..ee204dcb083 100644 --- a/test/metabase/query_processor_test/remapping_test.clj +++ b/test/metabase/query_processor_test/remapping_test.clj @@ -73,8 +73,10 @@ ["25°" 11 "Burger"] ["33 Taps" 7 "Bar"] ["800 Degrees Neapolitan Pizzeria" 58 "Pizza"]] - :cols [(mt/col :venues :name) + :cols [(-> (mt/col :venues :name) + (assoc-in [:options :nested/outer] true)) (-> (mt/col :venues :category_id) + (assoc-in [:options :nested/outer] true) (assoc :remapped_to "Category ID [internal remap]")) (#'qp.add-dimension-projections/create-remapped-col "Category ID [internal remap]" -- GitLab