From 3ddce6076bd21ef47a163f961ea08b20248bcf1a Mon Sep 17 00:00:00 2001 From: dpsutton <dan@dpsutton.com> Date: Thu, 12 Jan 2023 16:38:16 -0600 Subject: [PATCH] restore aliases before annotating (#27637) * restore aliases before annotating * cleanup * fix tests * Don't add escaped->original if aliases have not changed No need to walk and replace the aliases if they are identical. And in that case, no need to keep a mapping of identical to identical. Not super important but saves some time and complexity, and keeps other tests passing since the presence of [:info :alias/escaped->original] in the query caused them to trivially fail. * oracle has a smaller limit and _expected_ mangled previous testing behavior was "what happened" and not what should happen. we fixed the bug but the "expect garbage" behavior was still present * Relax :alias/escaped->original schema oracle tests use keywords for the alias ```clojure {:alias/escaped->original {:test-data-venues--via-a763718f "test_data_venues__via__venue_id", :test-data-users--via--user-id "test_data_users__via__user_id"}}} ``` No idea why that is keyworded * relax `:alias/escaped->original` schema see previous commit --- shared/src/metabase/mbql/schema.cljc | 15 +++++----- .../query_processor/middleware/annotate.clj | 12 +++++--- .../middleware/escape_join_aliases.clj | 21 ++++++++++++-- .../middleware/annotate_test.clj | 29 +++++++++++++++++++ .../middleware/escape_join_aliases_test.clj | 28 ++++++++++++++++-- test/metabase/query_processor_test.clj | 6 ++-- 6 files changed, 90 insertions(+), 21 deletions(-) diff --git a/shared/src/metabase/mbql/schema.cljc b/shared/src/metabase/mbql/schema.cljc index 1c3f0344ffd..411c2006c03 100644 --- a/shared/src/metabase/mbql/schema.cljc +++ b/shared/src/metabase/mbql/schema.cljc @@ -1710,19 +1710,20 @@ based on this information, don't do it!" {;; These keys are nice to pass in if you're running queries on the backend and you know these values. They aren't ;; used for permissions checking or anything like that so don't try to be sneaky - (s/optional-key :context) (s/maybe Context) - (s/optional-key :executed-by) (s/maybe helpers/IntGreaterThanZero) - (s/optional-key :card-id) (s/maybe helpers/IntGreaterThanZero) - (s/optional-key :card-name) (s/maybe helpers/NonBlankString) - (s/optional-key :dashboard-id) (s/maybe helpers/IntGreaterThanZero) - (s/optional-key :pulse-id) (s/maybe helpers/IntGreaterThanZero) + (s/optional-key :context) (s/maybe Context) + (s/optional-key :executed-by) (s/maybe helpers/IntGreaterThanZero) + (s/optional-key :card-id) (s/maybe helpers/IntGreaterThanZero) + (s/optional-key :card-name) (s/maybe helpers/NonBlankString) + (s/optional-key :dashboard-id) (s/maybe helpers/IntGreaterThanZero) + (s/optional-key :alias/escaped->original) (s/maybe {s/Any s/Any}) + (s/optional-key :pulse-id) (s/maybe helpers/IntGreaterThanZero) ;; Metadata for datasets when querying the dataset. This ensures that user edits to dataset metadata are blended in ;; with runtime computed metadata so that edits are saved. (s/optional-key :metadata/dataset-metadata) (s/maybe [{s/Any s/Any}]) ;; `:hash` gets added automatically by `process-query-and-save-execution!`, so don't try passing ;; these in yourself. In fact, I would like this a lot better if we could take these keys out of `:info` entirely ;; and have the code that saves QueryExceutions figure out their values when it goes to save them - (s/optional-key :query-hash) (s/maybe #?(:clj (Class/forName "[B") + (s/optional-key :query-hash) (s/maybe #?(:clj (Class/forName "[B") :cljs s/Any))}) diff --git a/src/metabase/query_processor/middleware/annotate.clj b/src/metabase/query_processor/middleware/annotate.clj index 55a2e857be7..8464444cffe 100644 --- a/src/metabase/query_processor/middleware/annotate.clj +++ b/src/metabase/query_processor/middleware/annotate.clj @@ -12,6 +12,7 @@ [metabase.mbql.util.match :as mbql.match] [metabase.models.humanization :as humanization] [metabase.query-processor.error-type :as qp.error-type] + [metabase.query-processor.middleware.escape-join-aliases :as escape-join-aliases] [metabase.query-processor.reducible :as qp.reducible] [metabase.query-processor.store :as qp.store] [metabase.query-processor.util :as qp.util] @@ -695,12 +696,15 @@ (defn add-column-info "Middleware for adding type information about the columns in the query results (the `:cols` key)." [{query-type :type, :as query - {:keys [:metadata/dataset-metadata]} :info} rff] + {:keys [:metadata/dataset-metadata :alias/escaped->original]} :info} rff] (fn add-column-info-rff* [metadata] (if (= query-type :query) - (rff (cond-> (assoc metadata :cols (merged-column-info query metadata)) - (seq dataset-metadata) - (update :cols qp.util/combine-metadata dataset-metadata))) + (let [query (cond-> query + (seq escaped->original) ;; if we replaced aliases, restore them + (escape-join-aliases/restore-aliases escaped->original))] + (rff (cond-> (assoc metadata :cols (merged-column-info query metadata)) + (seq dataset-metadata) + (update :cols qp.util/combine-metadata dataset-metadata)))) ;; rows sampling is only needed for native queries! TODO  not sure we really even need to do for native ;; queries... (let [metadata (cond-> (update metadata :cols annotate-native-cols) diff --git a/src/metabase/query_processor/middleware/escape_join_aliases.clj b/src/metabase/query_processor/middleware/escape_join_aliases.clj index b2295152f2f..8ab4125c58a 100644 --- a/src/metabase/query_processor/middleware/escape_join_aliases.clj +++ b/src/metabase/query_processor/middleware/escape_join_aliases.clj @@ -1,5 +1,6 @@ (ns metabase.query-processor.middleware.escape-join-aliases (:require + [clojure.set :as set] [clojure.string :as str] [clojure.tools.logging :as log] [metabase.driver :as driver] @@ -38,7 +39,8 @@ (defn escape-join-aliases "Pre-processing middleware. Make sure all join aliases are unique, regardless of case (some databases treat table aliases as case-insensitive, even if table names themselves are not); escape all join aliases - with [[metabase.driver/escape-alias]]." + with [[metabase.driver/escape-alias]]. If aliases are 'uniquified', will include a map + at [:info :alias/escaped->original] of the escaped name back to the original, to be restored in post processing." [query] (let [all-join-aliases (all-join-aliases query)] (log/tracef "Join aliases in query: %s" (pr-str all-join-aliases)) @@ -55,5 +57,18 @@ (escape (str original \_ suffix)))) original->escaped (into {} (map (juxt identity (comp uniquify escape))) - all-join-aliases)] - (rename-join-aliases query original->escaped))))) + all-join-aliases) + aliases-changed? (some (fn [[original escaped]] (not= original escaped)) + original->escaped)] + (if aliases-changed? + (-> query + (rename-join-aliases original->escaped) + (assoc-in [:info :alias/escaped->original] (set/map-invert original->escaped))) + query))))) + +(defn restore-aliases + "Restore aliases in query. + If aliases were changed in [[escape-join-aliases]], there is a key in `:info` of `:alias/escaped->original` which we + can restore the aliases in the query." + [query escaped->original] + (rename-join-aliases query escaped->original)) diff --git a/test/metabase/query_processor/middleware/annotate_test.clj b/test/metabase/query_processor/middleware/annotate_test.clj index 8031963da5a..c3c67f5ed15 100644 --- a/test/metabase/query_processor/middleware/annotate_test.clj +++ b/test/metabase/query_processor/middleware/annotate_test.clj @@ -795,3 +795,32 @@ (is (= "alias → B Column" (-> results :data :results_metadata :columns second :display_name)) "Results metadata cols has wrong display name")))))) + +(deftest preserve-original-join-alias-test + (testing "The join alias for the `:field_ref` in results metadata should match the one originally specified (#27464)" + (mt/test-drivers (mt/normal-drivers-with-feature :left-join) + (mt/dataset sample-dataset + (let [join-alias "Products with a very long name - Product ID with a very long name" + results (mt/run-mbql-query orders + {:joins [{:source-table $$products + :condition [:= $product_id [:field %products.id {:join-alias join-alias}]] + :alias join-alias + :fields [[:field %products.title {:join-alias join-alias}]]}] + :fields [$orders.id + [:field %products.title {:join-alias join-alias}]] + :limit 4})] + (doseq [[location metadata] {"data.cols" (mt/cols results) + "data.results_metadata.columns" (get-in results [:data :results_metadata :columns])}] + (testing location + (is (= (mt/$ids + [{:display_name "ID" + :field_ref $orders.id} + (merge + {:display_name (str join-alias " → Title") + :field_ref [:field %products.title {:join-alias join-alias}]} + ;; `source_alias` is only included in `data.cols`, but not in `results_metadata` + (when (= location "data.cols") + {:source_alias join-alias}))]) + (map + #(select-keys % [:display_name :field_ref :source_alias]) + metadata)))))))))) diff --git a/test/metabase/query_processor/middleware/escape_join_aliases_test.clj b/test/metabase/query_processor/middleware/escape_join_aliases_test.clj index 7b6691732f1..698f0f250f8 100644 --- a/test/metabase/query_processor/middleware/escape_join_aliases_test.clj +++ b/test/metabase/query_processor/middleware/escape_join_aliases_test.clj @@ -22,7 +22,8 @@ :condition [:= [:field 3 nil] [:field 4 {:join-alias "cat_2"}]]}] :fields [[:field 3 nil] [:field 4 {:join-alias "Cat"}] - [:field 4 {:join-alias "cat_2"}]]}} + [:field 4 {:join-alias "cat_2"}]]} + :info {:alias/escaped->original {"Cat" "Cat", "cat_2" "cat"}}} (escape-join-aliases/escape-join-aliases {:database 1 :type :query @@ -34,7 +35,26 @@ :condition [:= [:field 3 nil] [:field 4 {:join-alias "cat"}]]}] :fields [[:field 3 nil] [:field 4 {:join-alias "Cat"}] - [:field 4 {:join-alias "cat"}]]}})))))) + [:field 4 {:join-alias "cat"}]]}}))))) + (testing "no need to include alias info if they have no changed" + (driver/with-driver :h2 + (let [query {:database 1 + :type :query + :query {:joins [{:source-table 2 + :alias "cat_1" + :condition [:= [:field 3 nil] [:field 4 {:join-alias "Cat"}]]} + {:source-table 2 + :alias "Cat_2" + :condition [:= [:field 3 nil] [:field 4 {:join-alias "cat"}]]}] + :fields [[:field 3 nil] + [:field 4 {:join-alias "Cat"}] + [:field 4 {:join-alias "cat"}]]}} + q' (escape-join-aliases/escape-join-aliases query)] + (testing "No need for a map with identical mapping" + (is (not (contains? (:info q') :alias/escaped->original)))) + (testing "aliases in the query remain the same" + (is (= (#'escape-join-aliases/all-join-aliases query) + (#'escape-join-aliases/all-join-aliases q')))))))) (driver/register! ::custom-escape :abstract? true) @@ -55,7 +75,9 @@ :condition [:= [:field 3 nil] [:field 4 {:join-alias "ê°€_50a93035"}]]}] :fields [[:field 3 nil] [:field 4 {:join-alias "012_68c4f033"}] - [:field 4 {:join-alias "ê°€_50a93035"}]]}} + [:field 4 {:join-alias "ê°€_50a93035"}]]} + :info {:alias/escaped->original {"ê°€_50a93035" "가나다ë¼ë§ˆ" + "012_68c4f033" "0123456789abcdef"}}} (escape-join-aliases/escape-join-aliases {:database 1 :type :query diff --git a/test/metabase/query_processor_test.clj b/test/metabase/query_processor_test.clj index f6822ba1853..d8af8504d89 100644 --- a/test/metabase/query_processor_test.clj +++ b/test/metabase/query_processor_test.clj @@ -191,10 +191,8 @@ (update :display_name (partial format "%s → %s" (str/replace (:display_name source-col) #"(?i)\sid$" ""))) (assoc :field_ref [:field (:id dest-col) {:source-field (:id source-col)}] :fk_field_id (:id source-col) - :source_alias (driver/escape-alias - driver/*driver* - (#'qp.add-implicit-joins/join-alias (db/select-one-field :name Table :id (data/id dest-table-kw)) - (:name source-col))))))) + :source_alias (#'qp.add-implicit-joins/join-alias (db/select-one-field :name Table :id (data/id dest-table-kw)) + (:name source-col)))))) (declare cols) -- GitLab