From d16bbb8a19d95b48a475e254aa6423097da6d57e Mon Sep 17 00:00:00 2001
From: metamben <103100869+metamben@users.noreply.github.com>
Date: Thu, 11 May 2023 19:51:37 +0300
Subject: [PATCH] Prevent fields hiding joined fields (#30662)

* Prevent fields hiding joined fields (#30644)

Fixes #29904.

* Resolve join columns in the join stages

The original implementation got into infinite loops.
---
 src/metabase/lib/join.cljc              | 11 +++---
 src/metabase/lib/stage.cljc             | 30 +++++++++++-----
 test/metabase/lib/join_test.cljc        | 11 +++++-
 test/metabase/lib/metadata/jvm_test.clj | 48 +++++++++++++++++++++++++
 test/metabase/lib/order_by_test.cljc    | 20 ++++++++---
 test/metabase/lib/stage_test.cljc       | 40 +++++++++++++++++++++
 6 files changed, 140 insertions(+), 20 deletions(-)

diff --git a/src/metabase/lib/join.cljc b/src/metabase/lib/join.cljc
index 12b0ecc35fd..1249dc0bede 100644
--- a/src/metabase/lib/join.cljc
+++ b/src/metabase/lib/join.cljc
@@ -107,11 +107,12 @@
 (defmethod lib.metadata.calculation/metadata-method :mbql/join
   [query stage-number {:keys [fields stages], join-alias :alias, :or {fields :none}, :as _join}]
   (when-not (= fields :none)
-    (let [field-metadatas (if (= fields :all)
-                            (lib.metadata.calculation/metadata (assoc query :stages stages) -1 (last stages))
-                            (for [field-ref fields]
-                              ;; resolve the field ref in the context of the join. Not sure if this is right.
-                              (lib.metadata.calculation/metadata query stage-number field-ref)))]
+    (let [join-query (assoc query :stages stages)
+          field-metadatas (if (= fields :all)
+                            (lib.metadata.calculation/metadata join-query -1 (peek stages))
+                            (for [field-ref fields
+                                  :let [join-field (lib.options/update-options field-ref dissoc :join-alias)]]
+                              (lib.metadata.calculation/metadata join-query -1 join-field)))]
       (mapv (fn [field-metadata]
               (column-from-join-fields query stage-number field-metadata join-alias))
             field-metadatas))))
diff --git a/src/metabase/lib/stage.cljc b/src/metabase/lib/stage.cljc
index e0ce4031361..1d659d394d2 100644
--- a/src/metabase/lib/stage.cljc
+++ b/src/metabase/lib/stage.cljc
@@ -107,7 +107,7 @@
               :lib/source-column-alias  (lib.metadata.calculation/column-name query stage-number metadata)
               :lib/desired-column-alias (unique-name-fn (lib.field/desired-alias query metadata)))))))
 
-(mu/defn ^:private breakout-ags-fields-columns :- [:maybe lib.metadata.calculation/ColumnsWithUniqueAliases]
+(mu/defn ^:private summary-columns :- [:maybe lib.metadata.calculation/ColumnsWithUniqueAliases]
   [query          :- ::lib.schema/query
    stage-number   :- :int
    unique-name-fn :- fn?]
@@ -116,8 +116,7 @@
          (mapcat (fn [f]
                    (f query stage-number unique-name-fn)))
          [breakouts-columns
-          aggregations-columns
-          fields-columns])))
+          aggregations-columns])))
 
 (mu/defn ^:private previous-stage-metadata :- [:maybe lib.metadata.calculation/ColumnsWithUniqueAliases]
   "Metadata for the previous stage, if there is one."
@@ -211,8 +210,9 @@
 
 (mu/defn ^:private stage-metadata :- [:maybe lib.metadata.calculation/ColumnsWithUniqueAliases]
   "Return results metadata about the expected columns in an MBQL query stage. If the query has
-  aggregations/breakouts/fields, then return THOSE. Otherwise return the defaults based on the source Table or
-  previous stage + joins."
+  aggregations/breakouts, then return those and the fields columns.
+  Otherwise if there are fields columns return those and the joined columns.
+  Otherwise return the defaults based on the source Table or previous stage + joins."
   ([query stage-number]
    (stage-metadata query stage-number (lib.util/unique-name-generator)))
 
@@ -221,11 +221,23 @@
     unique-name-fn :- fn?]
    (or
     (existing-stage-metadata query stage-number)
-    (let [query (ensure-previous-stages-have-metadata query stage-number)]
+    (let [query (ensure-previous-stages-have-metadata query stage-number)
+          summary-cols (summary-columns query stage-number unique-name-fn)
+          field-cols (fields-columns query stage-number unique-name-fn)]
       ;; ... then calculate metadata for this stage
-      (or
-       (breakout-ags-fields-columns query stage-number unique-name-fn)
-       (lib.metadata.calculation/default-columns query stage-number (lib.util/query-stage query stage-number) unique-name-fn))))))
+      (cond
+        summary-cols
+        (into summary-cols field-cols)
+
+        field-cols
+        (do (doall field-cols)          ; force generation of unique names before join columns
+            (into []
+                  (m/distinct-by #(dissoc % :source_alias :lib/source :lib/desired-column-alias))
+                  (concat field-cols
+                          (lib.join/all-joins-default-columns query stage-number unique-name-fn))))
+
+        :else
+        (lib.metadata.calculation/default-columns query stage-number (lib.util/query-stage query stage-number) unique-name-fn))))))
 
 (defmethod lib.metadata.calculation/metadata-method ::stage
   [query stage-number _stage]
diff --git a/test/metabase/lib/join_test.cljc b/test/metabase/lib/join_test.cljc
index 0e346d98bac..78a47ab7a35 100644
--- a/test/metabase/lib/join_test.cljc
+++ b/test/metabase/lib/join_test.cljc
@@ -214,7 +214,12 @@
               :lib/source-column-alias       "ID"
               :lib/desired-column-alias      "Cat__ID"
               :metabase.lib.field/join-alias "Cat"
-              :lib/source                    :source/fields}]
+              :lib/source                    :source/fields}
+             {:name                          "NAME"
+              :lib/source-column-alias       "NAME"
+              :lib/desired-column-alias      "Cat__NAME"
+              :metabase.lib.field/join-alias "Cat"
+              :lib/source                    :source/joins}]
             (lib.metadata.calculation/metadata query)))
     (testing "Introduce a new stage"
       (let [query' (lib/append-stage query)]
@@ -225,6 +230,10 @@
                  {:name                          "ID"
                   :lib/source-column-alias       "Cat__ID"
                   :lib/desired-column-alias      "Cat__ID"
+                  :lib/source                    :source/previous-stage}
+                 {:name                          "NAME"
+                  :lib/source-column-alias       "Cat__NAME"
+                  :lib/desired-column-alias      "Cat__NAME"
                   :lib/source                    :source/previous-stage}]
                 (lib.metadata.calculation/metadata query')))))))
 
diff --git a/test/metabase/lib/metadata/jvm_test.clj b/test/metabase/lib/metadata/jvm_test.clj
index ee0278363d7..12029cc4d4d 100644
--- a/test/metabase/lib/metadata/jvm_test.clj
+++ b/test/metabase/lib/metadata/jvm_test.clj
@@ -3,6 +3,7 @@
    [clojure.test :refer :all]
    [malli.core :as mc]
    [malli.error :as me]
+   [metabase.lib.convert :as lib.convert]
    [metabase.lib.core :as lib]
    [metabase.lib.metadata :as lib.metadata]
    [metabase.lib.metadata.calculation :as lib.metadata.calculation]
@@ -35,3 +36,50 @@
              {:lib/desired-column-alias "Cat__ID"}
              {:lib/desired-column-alias "Cat__NAME"}]
             (lib.metadata.calculation/metadata query)))))
+
+(deftest ^:parallel join-with-aggregation-reference-in-fields-metadata-test
+  (mt/dataset sample-dataset
+    (let [query (mt/mbql-query products
+                  {:joins [{:source-query {:source-table $$orders
+                                           :breakout     [$orders.product_id]
+                                           :aggregation  [[:sum $orders.quantity]]}
+                            :alias        "Orders"
+                            :condition    [:= $id &Orders.orders.product_id]
+                            :fields       [&Orders.orders.product_id
+                                           &Orders.*sum/Integer]}]
+                   :fields [$id]})
+          mlv2-query (lib/query (lib.metadata.jvm/application-database-metadata-provider (mt/id))
+                                (lib.convert/->pMBQL query))]
+      (is (=? [{:base-type :type/BigInteger
+                :semantic-type :type/PK
+                :table-id (mt/id :products)
+                :name "ID"
+                :lib/source :source/fields
+                :lib/source-column-alias "ID"
+                :effective-type :type/BigInteger
+                :id (mt/id :products :id)
+                :lib/desired-column-alias "ID"
+                :display-name "ID"}
+               {:metabase.lib.field/join-alias "Orders"
+                :base-type :type/Integer
+                :semantic-type :type/FK
+                :table-id (mt/id :orders)
+                :name "PRODUCT_ID"
+                :lib/source :source/joins
+                :lib/source-column-alias "PRODUCT_ID"
+                :effective-type :type/Integer
+                :id (mt/id :orders :product_id)
+                :lib/desired-column-alias "Orders__PRODUCT_ID"
+                :display-name "Product ID"
+                :source_alias "Orders"}
+               {:metabase.lib.field/join-alias "Orders"
+                :lib/type :metadata/field
+                :base-type :type/Integer
+                :name "sum"
+                :lib/source :source/joins
+                :lib/source-column-alias "sum"
+                :effective-type :type/Integer
+                :lib/desired-column-alias "Orders__sum"
+                :display-name "Sum"
+                :source_alias "Orders"}]
+              (lib.metadata.calculation/metadata mlv2-query))))))
diff --git a/test/metabase/lib/order_by_test.cljc b/test/metabase/lib/order_by_test.cljc
index 00e8563fca9..f86cd933395 100644
--- a/test/metabase/lib/order_by_test.cljc
+++ b/test/metabase/lib/order_by_test.cljc
@@ -473,13 +473,23 @@
               :display-name             "ID"
               :table-id                 (meta/id :categories)
               :lib/source-column-alias  "Cat__ID"
-              :lib/desired-column-alias "Cat__ID"}]
+              :lib/desired-column-alias "Cat__ID"}
+             {:id                       (meta/id :categories :name)
+              :name                     "NAME"
+              :lib/source               :source/previous-stage
+              :lib/type                 :metadata/field
+              :base-type                :type/Text
+              :effective-type           :type/Text
+              :display-name             "Name"
+              :table-id                 (meta/id :categories)
+              :lib/source-column-alias  "Cat__NAME"
+              :lib/desired-column-alias "Cat__NAME"}]
             (-> (lib/query-for-table-name meta/metadata-provider "VENUES")
                 (lib/join (-> (lib/join-clause
-                                (meta/table-metadata :categories)
-                                [(lib/=
-                                   (lib/field "VENUES" "CATEGORY_ID")
-                                   (lib/with-join-alias (lib/field "CATEGORIES" "ID") "Cat"))])
+                               (meta/table-metadata :categories)
+                               [(lib/=
+                                 (lib/field "VENUES" "CATEGORY_ID")
+                                 (lib/with-join-alias (lib/field "CATEGORIES" "ID") "Cat"))])
                               (lib/with-join-alias "Cat")
                               (lib/with-join-fields :all)))
                 (lib/with-fields [(lib/field "VENUES" "ID")
diff --git a/test/metabase/lib/stage_test.cljc b/test/metabase/lib/stage_test.cljc
index c60ad86bfdb..427485e8027 100644
--- a/test/metabase/lib/stage_test.cljc
+++ b/test/metabase/lib/stage_test.cljc
@@ -224,3 +224,43 @@
                  (not (:temporal-unit opts))))
               "DATE"]]
             (map lib/ref cols)))))
+
+(deftest ^:parallel fields-should-not-hide-joined-fields
+  (let [query (-> (lib/query-for-table-name meta/metadata-provider "VENUES")
+                  (lib/with-fields [(lib/field (meta/id :venues :id))
+                                    (lib/field (meta/id :venues :name))])
+                  (lib/join (-> (lib/join-clause (lib/table (meta/id :categories)))
+                                (lib/with-join-alias "Cat")
+                                (lib/with-join-fields :all))
+                            [(lib/= (lib/field "VENUES" "CATEGORY_ID")
+                                    (lib/field "CATEGORIES" "ID"))])
+                  (lib/append-stage))]
+    (is (=? [{:base-type :type/BigInteger,
+              :semantic-type :type/PK,
+              :name "ID",
+              :lib/source :source/previous-stage
+              :effective-type :type/BigInteger,
+              :lib/desired-column-alias "ID",
+              :display-name "ID"}
+             {:base-type :type/Text,
+              :semantic-type :type/Name,
+              :name "NAME",
+              :lib/source :source/previous-stage,
+              :effective-type :type/Text,
+              :lib/desired-column-alias "NAME",
+              :display-name "Name"}
+             {:base-type :type/BigInteger,
+              :semantic-type :type/PK,
+              :name "ID",
+              :lib/source :source/previous-stage
+              :effective-type :type/BigInteger,
+              :lib/desired-column-alias "Cat__ID",
+              :display-name "ID"}
+             {:base-type :type/Text,
+              :semantic-type :type/Name,
+              :name "NAME",
+              :lib/source :source/previous-stage
+              :effective-type :type/Text,
+              :lib/desired-column-alias "Cat__NAME",
+              :display-name "Name"}]
+            (lib.metadata.calculation/visible-columns query)))))
-- 
GitLab