From 3ddce6076bd21ef47a163f961ea08b20248bcf1a Mon Sep 17 00:00:00 2001
From: dpsutton <dan@dpsutton.com>
Date: Thu, 12 Jan 2023 16:38:16 -0600
Subject: [PATCH] restore aliases before annotating (#27637)

* restore aliases before annotating

* cleanup

* fix tests

* Don't add escaped->original if aliases have not changed

No need to walk and replace the aliases if they are identical. And in
that case, no need to keep a mapping of identical to identical. Not
super important but saves some time and complexity, and keeps other
tests passing since the presence of [:info :alias/escaped->original] in
the query caused them to trivially fail.

* oracle has a smaller limit and _expected_ mangled

previous testing behavior was "what happened" and not what should
happen. we fixed the bug but the "expect garbage" behavior was still
present

* Relax :alias/escaped->original schema

oracle tests use keywords for the alias

```clojure
{:alias/escaped->original
 {:test-data-venues--via-a763718f "test_data_venues__via__venue_id",
  :test-data-users--via--user-id "test_data_users__via__user_id"}}}
```

No idea why that is keyworded

* relax `:alias/escaped->original` schema

see previous commit
---
 shared/src/metabase/mbql/schema.cljc          | 15 +++++-----
 .../query_processor/middleware/annotate.clj   | 12 +++++---
 .../middleware/escape_join_aliases.clj        | 21 ++++++++++++--
 .../middleware/annotate_test.clj              | 29 +++++++++++++++++++
 .../middleware/escape_join_aliases_test.clj   | 28 ++++++++++++++++--
 test/metabase/query_processor_test.clj        |  6 ++--
 6 files changed, 90 insertions(+), 21 deletions(-)

diff --git a/shared/src/metabase/mbql/schema.cljc b/shared/src/metabase/mbql/schema.cljc
index 1c3f0344ffd..411c2006c03 100644
--- a/shared/src/metabase/mbql/schema.cljc
+++ b/shared/src/metabase/mbql/schema.cljc
@@ -1710,19 +1710,20 @@
   based on this information, don't do it!"
   {;; These keys are nice to pass in if you're running queries on the backend and you know these values. They aren't
    ;; used for permissions checking or anything like that so don't try to be sneaky
-   (s/optional-key :context)      (s/maybe Context)
-   (s/optional-key :executed-by)  (s/maybe helpers/IntGreaterThanZero)
-   (s/optional-key :card-id)      (s/maybe helpers/IntGreaterThanZero)
-   (s/optional-key :card-name)    (s/maybe helpers/NonBlankString)
-   (s/optional-key :dashboard-id) (s/maybe helpers/IntGreaterThanZero)
-   (s/optional-key :pulse-id)     (s/maybe helpers/IntGreaterThanZero)
+   (s/optional-key :context)                   (s/maybe Context)
+   (s/optional-key :executed-by)               (s/maybe helpers/IntGreaterThanZero)
+   (s/optional-key :card-id)                   (s/maybe helpers/IntGreaterThanZero)
+   (s/optional-key :card-name)                 (s/maybe helpers/NonBlankString)
+   (s/optional-key :dashboard-id)              (s/maybe helpers/IntGreaterThanZero)
+   (s/optional-key :alias/escaped->original)   (s/maybe {s/Any s/Any})
+   (s/optional-key :pulse-id)                  (s/maybe helpers/IntGreaterThanZero)
    ;; Metadata for datasets when querying the dataset. This ensures that user edits to dataset metadata are blended in
    ;; with runtime computed metadata so that edits are saved.
    (s/optional-key :metadata/dataset-metadata) (s/maybe [{s/Any s/Any}])
    ;; `:hash` gets added automatically by `process-query-and-save-execution!`, so don't try passing
    ;; these in yourself. In fact, I would like this a lot better if we could take these keys out of `:info` entirely
    ;; and have the code that saves QueryExceutions figure out their values when it goes to save them
-   (s/optional-key :query-hash) (s/maybe #?(:clj (Class/forName "[B")
+   (s/optional-key :query-hash)                (s/maybe #?(:clj (Class/forName "[B")
                                             :cljs s/Any))})
 
 
diff --git a/src/metabase/query_processor/middleware/annotate.clj b/src/metabase/query_processor/middleware/annotate.clj
index 55a2e857be7..8464444cffe 100644
--- a/src/metabase/query_processor/middleware/annotate.clj
+++ b/src/metabase/query_processor/middleware/annotate.clj
@@ -12,6 +12,7 @@
    [metabase.mbql.util.match :as mbql.match]
    [metabase.models.humanization :as humanization]
    [metabase.query-processor.error-type :as qp.error-type]
+   [metabase.query-processor.middleware.escape-join-aliases :as escape-join-aliases]
    [metabase.query-processor.reducible :as qp.reducible]
    [metabase.query-processor.store :as qp.store]
    [metabase.query-processor.util :as qp.util]
@@ -695,12 +696,15 @@
 (defn add-column-info
   "Middleware for adding type information about the columns in the query results (the `:cols` key)."
   [{query-type :type, :as query
-    {:keys [:metadata/dataset-metadata]} :info} rff]
+    {:keys [:metadata/dataset-metadata :alias/escaped->original]} :info} rff]
   (fn add-column-info-rff* [metadata]
     (if (= query-type :query)
-      (rff (cond-> (assoc metadata :cols (merged-column-info query metadata))
-             (seq dataset-metadata)
-             (update :cols qp.util/combine-metadata dataset-metadata)))
+      (let [query (cond-> query
+                    (seq escaped->original) ;; if we replaced aliases, restore them
+                    (escape-join-aliases/restore-aliases escaped->original))]
+       (rff (cond-> (assoc metadata :cols (merged-column-info query metadata))
+              (seq dataset-metadata)
+              (update :cols qp.util/combine-metadata dataset-metadata))))
       ;; rows sampling is only needed for native queries! TODO ­ not sure we really even need to do for native
       ;; queries...
       (let [metadata (cond-> (update metadata :cols annotate-native-cols)
diff --git a/src/metabase/query_processor/middleware/escape_join_aliases.clj b/src/metabase/query_processor/middleware/escape_join_aliases.clj
index b2295152f2f..8ab4125c58a 100644
--- a/src/metabase/query_processor/middleware/escape_join_aliases.clj
+++ b/src/metabase/query_processor/middleware/escape_join_aliases.clj
@@ -1,5 +1,6 @@
 (ns metabase.query-processor.middleware.escape-join-aliases
   (:require
+   [clojure.set :as set]
    [clojure.string :as str]
    [clojure.tools.logging :as log]
    [metabase.driver :as driver]
@@ -38,7 +39,8 @@
 (defn escape-join-aliases
   "Pre-processing middleware. Make sure all join aliases are unique, regardless of case (some databases treat table
   aliases as case-insensitive, even if table names themselves are not); escape all join aliases
-  with [[metabase.driver/escape-alias]]."
+  with [[metabase.driver/escape-alias]]. If aliases are 'uniquified', will include a map
+  at [:info :alias/escaped->original] of the escaped name back to the original, to be restored in post processing."
   [query]
   (let [all-join-aliases (all-join-aliases query)]
     (log/tracef "Join aliases in query: %s" (pr-str all-join-aliases))
@@ -55,5 +57,18 @@
                                                   (escape (str original \_ suffix))))
             original->escaped (into {}
                                     (map (juxt identity (comp uniquify escape)))
-                                    all-join-aliases)]
-        (rename-join-aliases query original->escaped)))))
+                                    all-join-aliases)
+            aliases-changed?  (some (fn [[original escaped]] (not= original escaped))
+                                    original->escaped)]
+        (if aliases-changed?
+          (-> query
+              (rename-join-aliases original->escaped)
+              (assoc-in [:info :alias/escaped->original] (set/map-invert original->escaped)))
+          query)))))
+
+(defn restore-aliases
+  "Restore aliases in query.
+  If aliases were changed in [[escape-join-aliases]], there is a key in `:info` of `:alias/escaped->original` which we
+  can restore the aliases in the query."
+  [query escaped->original]
+  (rename-join-aliases query escaped->original))
diff --git a/test/metabase/query_processor/middleware/annotate_test.clj b/test/metabase/query_processor/middleware/annotate_test.clj
index 8031963da5a..c3c67f5ed15 100644
--- a/test/metabase/query_processor/middleware/annotate_test.clj
+++ b/test/metabase/query_processor/middleware/annotate_test.clj
@@ -795,3 +795,32 @@
           (is (= "alias → B Column" (-> results :data :results_metadata
                                         :columns second :display_name))
               "Results metadata cols has wrong display name"))))))
+
+(deftest preserve-original-join-alias-test
+  (testing "The join alias for the `:field_ref` in results metadata should match the one originally specified (#27464)"
+    (mt/test-drivers (mt/normal-drivers-with-feature :left-join)
+      (mt/dataset sample-dataset
+        (let [join-alias "Products with a very long name - Product ID with a very long name"
+              results    (mt/run-mbql-query orders
+                           {:joins  [{:source-table $$products
+                                      :condition    [:= $product_id [:field %products.id {:join-alias join-alias}]]
+                                      :alias        join-alias
+                                      :fields       [[:field %products.title {:join-alias join-alias}]]}]
+                            :fields [$orders.id
+                                     [:field %products.title {:join-alias join-alias}]]
+                            :limit  4})]
+          (doseq [[location metadata] {"data.cols"                     (mt/cols results)
+                                       "data.results_metadata.columns" (get-in results [:data :results_metadata :columns])}]
+            (testing location
+              (is (= (mt/$ids
+                       [{:display_name "ID"
+                         :field_ref    $orders.id}
+                        (merge
+                         {:display_name (str join-alias " → Title")
+                          :field_ref    [:field %products.title {:join-alias join-alias}]}
+                         ;; `source_alias` is only included in `data.cols`, but not in `results_metadata`
+                         (when (= location "data.cols")
+                           {:source_alias join-alias}))])
+                     (map
+                      #(select-keys % [:display_name :field_ref :source_alias])
+                      metadata))))))))))
diff --git a/test/metabase/query_processor/middleware/escape_join_aliases_test.clj b/test/metabase/query_processor/middleware/escape_join_aliases_test.clj
index 7b6691732f1..698f0f250f8 100644
--- a/test/metabase/query_processor/middleware/escape_join_aliases_test.clj
+++ b/test/metabase/query_processor/middleware/escape_join_aliases_test.clj
@@ -22,7 +22,8 @@
                                    :condition    [:= [:field 3 nil] [:field 4 {:join-alias "cat_2"}]]}]
                          :fields [[:field 3 nil]
                                   [:field 4 {:join-alias "Cat"}]
-                                  [:field 4 {:join-alias "cat_2"}]]}}
+                                  [:field 4 {:join-alias "cat_2"}]]}
+              :info {:alias/escaped->original {"Cat" "Cat", "cat_2" "cat"}}}
              (escape-join-aliases/escape-join-aliases
               {:database 1
                :type     :query
@@ -34,7 +35,26 @@
                                     :condition    [:= [:field 3 nil] [:field 4 {:join-alias "cat"}]]}]
                           :fields [[:field 3 nil]
                                    [:field 4 {:join-alias "Cat"}]
-                                   [:field 4 {:join-alias "cat"}]]}}))))))
+                                   [:field 4 {:join-alias "cat"}]]}})))))
+  (testing "no need to include alias info if they have no changed"
+    (driver/with-driver :h2
+      (let [query {:database 1
+                   :type     :query
+                   :query    {:joins  [{:source-table 2
+                                        :alias        "cat_1"
+                                        :condition    [:= [:field 3 nil] [:field 4 {:join-alias "Cat"}]]}
+                                       {:source-table 2
+                                        :alias        "Cat_2"
+                                        :condition    [:= [:field 3 nil] [:field 4 {:join-alias "cat"}]]}]
+                              :fields [[:field 3 nil]
+                                       [:field 4 {:join-alias "Cat"}]
+                                       [:field 4 {:join-alias "cat"}]]}}
+            q' (escape-join-aliases/escape-join-aliases query)]
+        (testing "No need for a map with identical mapping"
+          (is (not (contains? (:info q') :alias/escaped->original))))
+        (testing "aliases in the query remain the same"
+          (is (= (#'escape-join-aliases/all-join-aliases query)
+                 (#'escape-join-aliases/all-join-aliases q'))))))))
 
 (driver/register! ::custom-escape :abstract? true)
 
@@ -55,7 +75,9 @@
                                    :condition    [:= [:field 3 nil] [:field 4 {:join-alias "ê°€_50a93035"}]]}]
                          :fields [[:field 3 nil]
                                   [:field 4 {:join-alias "012_68c4f033"}]
-                                  [:field 4 {:join-alias "ê°€_50a93035"}]]}}
+                                  [:field 4 {:join-alias "ê°€_50a93035"}]]}
+              :info {:alias/escaped->original {"가_50a93035" "가나다라마"
+                                               "012_68c4f033" "0123456789abcdef"}}}
              (escape-join-aliases/escape-join-aliases
               {:database 1
                :type     :query
diff --git a/test/metabase/query_processor_test.clj b/test/metabase/query_processor_test.clj
index f6822ba1853..d8af8504d89 100644
--- a/test/metabase/query_processor_test.clj
+++ b/test/metabase/query_processor_test.clj
@@ -191,10 +191,8 @@
         (update :display_name (partial format "%s → %s" (str/replace (:display_name source-col) #"(?i)\sid$" "")))
         (assoc :field_ref    [:field (:id dest-col) {:source-field (:id source-col)}]
                :fk_field_id  (:id source-col)
-               :source_alias (driver/escape-alias
-                              driver/*driver*
-                              (#'qp.add-implicit-joins/join-alias (db/select-one-field :name Table :id (data/id dest-table-kw))
-                                                                  (:name source-col)))))))
+               :source_alias (#'qp.add-implicit-joins/join-alias (db/select-one-field :name Table :id (data/id dest-table-kw))
+                                                                 (:name source-col))))))
 
 (declare cols)
 
-- 
GitLab