From aa5d804b98bf2d9d662b019f152011c09508d273 Mon Sep 17 00:00:00 2001
From: metamben <103100869+metamben@users.noreply.github.com>
Date: Thu, 19 Oct 2023 01:52:10 +0300
Subject: [PATCH] Support duplicate columns hidden in cards (#34636)

---
 src/metabase/lib/join.cljc                    | 31 +++++---
 .../middleware/resolve_joins.clj              | 20 +++--
 .../middleware/upgrade_field_literals.clj     | 22 +++++-
 .../query_processor/util/add_alias_info.clj   | 33 +++++---
 .../explicit_joins_test.clj                   | 76 +++++++++++++++++++
 5 files changed, 151 insertions(+), 31 deletions(-)

diff --git a/src/metabase/lib/join.cljc b/src/metabase/lib/join.cljc
index 7a0aaa41dd5..4373ad18987 100644
--- a/src/metabase/lib/join.cljc
+++ b/src/metabase/lib/join.cljc
@@ -767,10 +767,17 @@
   [pk-col          :- [:maybe lib.metadata/ColumnMetadata]
    visible-columns :- [:maybe [:sequential lib.metadata/ColumnMetadata]]]
   (when-let [pk-id (:id pk-col)]
-    (m/find-first (fn [{:keys [fk-target-field-id], :as col}]
-                    (and (lib.types.isa/foreign-key? col)
-                         (= fk-target-field-id pk-id)))
-                  visible-columns)))
+    (let [candidates (filter (fn [{:keys [fk-target-field-id], :as col}]
+                               (and (lib.types.isa/foreign-key? col)
+                                    (= fk-target-field-id pk-id)))
+                             visible-columns)
+          primary (first candidates)]
+      (if (some #(= (:id %) (:id primary)) (rest candidates))
+        (when (not-any? #(= (:lib/desired-column-alias %) (:lib/desired-column-alias primary)) (rest candidates))
+          ;; there are multiple candidates but :lib/desired-column-alias disambiguates them,
+          ;; so reference by name
+          (dissoc primary :id))
+        primary))))
 
 (mu/defn ^:private fk-column-for :- [:maybe lib.metadata/ColumnMetadata]
   "Given a query stage find an FK column that points to the PK `pk-col`."
@@ -795,14 +802,14 @@
    (let [joinable-col-options {:include-implicitly-joinable? false
                                :include-implicitly-joinable-for-source-card? false}]
      (letfn [(filter-clause [x y]
-                   (lib.filter/filter-clause (lib.filter.operator/operator-def :=) x y))]
-           (or (when-let [pk-col (pk-column query stage-number joinable joinable-col-options)]
-                 (when-let [fk-col (fk-column-for query stage-number pk-col)]
-                   (filter-clause fk-col pk-col)))
-               (when-let [pk-col (pk-column query stage-number (lib.util/query-stage query stage-number) nil)]
-                 (when-let [fk-col (fk-column-for-pk-in pk-col (lib.metadata.calculation/visible-columns
-                                                                query stage-number joinable joinable-col-options))]
-                   (filter-clause pk-col fk-col))))))))
+               (lib.filter/filter-clause (lib.filter.operator/operator-def :=) x y))]
+       (or (when-let [pk-col (pk-column query stage-number joinable joinable-col-options)]
+             (when-let [fk-col (fk-column-for query stage-number pk-col)]
+               (filter-clause fk-col pk-col)))
+           (when-let [pk-col (pk-column query stage-number (lib.util/query-stage query stage-number) nil)]
+             (when-let [fk-col (fk-column-for-pk-in pk-col (lib.metadata.calculation/visible-columns
+                                                            query stage-number joinable joinable-col-options))]
+               (filter-clause pk-col fk-col))))))))
 
 (defn- add-join-alias-to-joinable-columns [cols a-join]
   (let [join-alias     (lib.join.util/current-join-alias a-join)
diff --git a/src/metabase/query_processor/middleware/resolve_joins.clj b/src/metabase/query_processor/middleware/resolve_joins.clj
index 8bbf5fa1087..360b1e09920 100644
--- a/src/metabase/query_processor/middleware/resolve_joins.clj
+++ b/src/metabase/query_processor/middleware/resolve_joins.clj
@@ -71,10 +71,16 @@
   (when-not (seq source-metadata)
     (throw (ex-info (tru "Cannot use :fields :all in join against source query unless it has :source-metadata.")
                     {:join join})))
-  (for [{field-name :name, base-type :base_type, field-id :id} source-metadata]
-    (if field-id
-      [:field field-id   {:join-alias alias}]
-      [:field field-name {:base-type base-type, :join-alias alias}])))
+  (let [duplicate-ids (into #{}
+                            (keep (fn [[item freq]]
+                                    (when (> freq 1)
+                                      item)))
+                            (frequencies (map :id source-metadata)))]
+    (for [{field-name :name, base-type :base_type, field-id :id} source-metadata]
+      (if (and field-id (not (contains? duplicate-ids field-id)))
+        ;; field-id is a unique reference, use it
+        [:field field-id   {:join-alias alias}]
+        [:field field-name {:base-type base-type, :join-alias alias}]))))
 
 (mu/defn ^:private handle-all-fields :- mbql.s/Join
   "Replace `:fields :all` in a join with an appropriate list of Fields."
@@ -83,9 +89,9 @@
    join
    (when (= fields :all)
      {:fields (if source-query
-               (source-metadata->fields join source-metadata)
-               (for [[_ id-or-name opts] (qp.add-implicit-clauses/sorted-implicit-fields-for-table source-table)]
-                 [:field id-or-name (assoc opts :join-alias alias)]))})))
+                (source-metadata->fields join source-metadata)
+                (for [[_ id-or-name opts] (qp.add-implicit-clauses/sorted-implicit-fields-for-table source-table)]
+                  [:field id-or-name (assoc opts :join-alias alias)]))})))
 
 (mu/defn ^:private resolve-references :- Joins
   [joins :- Joins]
diff --git a/src/metabase/query_processor/middleware/upgrade_field_literals.clj b/src/metabase/query_processor/middleware/upgrade_field_literals.clj
index 3e7f77c4f85..611811fffeb 100644
--- a/src/metabase/query_processor/middleware/upgrade_field_literals.clj
+++ b/src/metabase/query_processor/middleware/upgrade_field_literals.clj
@@ -20,15 +20,33 @@
     (qp.store/cached ::bad-clause-warning
       (log/warn (u/colorize :red message)))))
 
+(defn- unique-reference?
+  "Check if the field-id and the (possibly missing) join-alias of `field-clause`
+  result in an unambiguous reference to one of the `columns`
+  The join-alias of columns is taken from their field_ref property."
+  [field-clause columns]
+  (let [[_ field-id {:keys [join-alias]}] field-clause
+        matches-by-id (filter #(= (:id %) field-id) columns)]
+    (or (nil? (next matches-by-id))
+        (->> matches-by-id (filter #(= (get-in % [:field-ref 2 :join-alias]) join-alias)) count (= 1)))))
+
 (defn- fix-clause [{:keys [source-aliases field-name->field]} [_ field-name options :as field-clause]]
   ;; attempt to find a corresponding Field ref from the source metadata.
-  (let [field-ref (:field_ref (get field-name->field field-name))]
+  (let [field-ref (:field_ref (get field-name->field field-name))
+        ;; the map contains duplicate columns to support lowercase lookup
+        columns (set (vals field-name->field))]
     (cond
       field-ref
       (mbql.u/match-one field-ref
         ;; If the matching Field ref is an integer `:field` clause then replace it with the corrected clause.
         [:field (id :guard integer?) new-options]
-        [:field id (merge new-options (dissoc options :base-type))]
+        (let [new-clause [:field id (merge new-options (dissoc options :base-type))]]
+          (if (unique-reference? new-clause columns)
+            new-clause
+            (u/prog1 field-clause
+              (warn-once
+               (format "Warning: upgrading field literal %s would result in an ambiguous reference. Not upgrading."
+                       (pr-str field-clause))))))
 
         ;; Otherwise the Field clause in the source query uses a string Field name as well, but that name differs from
         ;; the one in `source-aliases`. Will this work? Not sure whether or not we need to log something about this.
diff --git a/src/metabase/query_processor/util/add_alias_info.clj b/src/metabase/query_processor/util/add_alias_info.clj
index 2aedb365a5c..8c694241b22 100644
--- a/src/metabase/query_processor/util/add_alias_info.clj
+++ b/src/metabase/query_processor/util/add_alias_info.clj
@@ -208,8 +208,13 @@
 (defn- fuzzify [clause]
   (mbql.u/update-field-options clause dissoc :temporal-unit :binning))
 
-(defn- matching-field-in-source-query* [source-query field-clause & {:keys [normalize-fn]
-                                                                     :or   {normalize-fn normalize-clause}}]
+(defn- field-signature
+  [field-clause]
+  [(second field-clause) (get-in field-clause [2 :join-alias])])
+
+(defn- matching-field-in-source-query*
+  [source-query source-metadata field-clause & {:keys [normalize-fn]
+                                                :or   {normalize-fn normalize-clause}}]
   (let [normalized    (normalize-fn field-clause)
         all-exports   (exports source-query)
         field-exports (filter (partial mbql.u/is-clause? :field)
@@ -238,18 +243,26 @@
                             (filter (partial mbql.u/is-clause? :expression) all-exports))
               (m/find-first (fn [[_ _ opts :as _aggregation-options-clause]]
                               (= (::source-alias opts) field-name))
-                            (filter (partial mbql.u/is-clause? :aggregation-options) all-exports)))))))
+                            (filter (partial mbql.u/is-clause? :aggregation-options) all-exports))))
+        ;; look for a field referenced by the name in source-metadata
+        (let [field-name (second field-clause)]
+          (when (string? field-name)
+            (when-let [column (m/find-first #(= (:name %) field-name) source-metadata)]
+              (let [signature (field-signature (:field_ref column))]
+                (m/find-first #(= (field-signature %) signature) field-exports))))))))
 
 (defn- matching-field-in-join-at-this-level
   "If `field-clause` is the result of a join *at this level* with a `:source-query`, return the 'source' `:field` clause
   from that source query."
   [inner-query [_ _ {:keys [join-alias]} :as field-clause]]
   (when join-alias
-    (when-let [matching-join-source-query (:source-query (join-with-alias inner-query join-alias))]
-      (matching-field-in-source-query*
-       matching-join-source-query
-       field-clause
-       :normalize-fn #(mbql.u/update-field-options (normalize-clause %) dissoc :join-alias)))))
+    (let [{:keys [source-query source-metadata]} (join-with-alias inner-query join-alias)]
+      (when source-query
+        (matching-field-in-source-query*
+         source-query
+         source-metadata
+         field-clause
+         :normalize-fn #(mbql.u/update-field-options (normalize-clause %) dissoc :join-alias))))))
 
 (defn- field-alias-in-join-at-this-level
   "If `field-clause` is the result of a join at this level, return the `::desired-alias` from that join (where the Field is
@@ -259,10 +272,10 @@
     desired-alias))
 
 (defn- matching-field-in-source-query
-  [{:keys [source-query], :as inner-query} field-clause]
+  [{:keys [source-query source-metadata], :as inner-query} field-clause]
   (when (and source-query
              (= (field-source-table-alias inner-query field-clause) ::source))
-    (matching-field-in-source-query* source-query field-clause)))
+    (matching-field-in-source-query* source-query source-metadata field-clause)))
 
 (defn- field-alias-in-source-query
   [inner-query field-clause]
diff --git a/test/metabase/query_processor_test/explicit_joins_test.clj b/test/metabase/query_processor_test/explicit_joins_test.clj
index bd8590f652c..7421ecbb528 100644
--- a/test/metabase/query_processor_test/explicit_joins_test.clj
+++ b/test/metabase/query_processor_test/explicit_joins_test.clj
@@ -8,6 +8,8 @@
    [metabase.driver.sql.query-processor-test-util :as sql.qp-test-util]
    [metabase.driver.util :as driver.u]
    [metabase.lib.convert :as lib.convert]
+   [metabase.lib.core :as lib]
+   [metabase.lib.metadata :as lib.metadata]
    [metabase.lib.metadata.jvm :as lib.metadata.jvm]
    [metabase.lib.test-util.mocks-31769 :as lib.tu.mocks-31769]
    [metabase.query-processor :as qp]
@@ -324,6 +326,80 @@
                      :order-by [[:asc $name]]
                      :limit    3})))))))))
 
+;; This is a very contrived test. We create two identical cards and join them both
+;; in a third card. This means that first two cards bring fields that differ only in
+;; their join aliases, but these "get lost" when the third card is joined to a table.
+;; Since we have multiple fields with the same ID, the IDs cannot be used to
+;; unambiguously refer to the underlying fields. The only way to reference them
+;; properly is by the name they have in the source metadata.
+(deftest ^:parallel join-against-multiple-card-copies-test
+  (mt/test-drivers (mt/normal-drivers-with-feature :left-join)
+    (testing "join against multiple copies of a card (#34227)"
+      (mt/dataset sample-dataset
+        (let [metadata-provider (qp.test-util/metadata-provider-with-cards-with-metadata-for-queries
+                                 [(mt/mbql-query orders
+                                    {:breakout [$user_id]
+                                     :aggregation [[:count]]})
+                                  (mt/mbql-query orders
+                                    {:breakout [$user_id]
+                                     :aggregation [[:count]]})
+                                  (mt/mbql-query people
+                                    {:fields [$id]
+                                     :joins [{:fields :all
+                                              :alias "ord1"
+                                              :source-table "card__1"
+                                              :condition [:= $id &ord1.orders.user_id]}
+                                             {:fields :all
+                                              :alias "ord2"
+                                              :source-table "card__2"
+                                              :condition [:= $id &ord2.orders.user_id]}]})])]
+          (qp.store/with-metadata-provider metadata-provider
+            (let [top-card-query (mt/mbql-query people
+                                   {:source-table "card__3"
+                                    :limit        3})
+                  [cid cuser-id ccount cuser-id2 ccount2] (->> top-card-query
+                                                               qp/query->expected-cols
+                                                               (map :name))
+                  cid2 (str cid "_2")
+                  col-data-fn   (juxt            :id       :name     :source_alias)
+                  top-card-cols [[(mt/id :people :id)      cid       nil]
+                                 [(mt/id :orders :user_id) cuser-id  "ord1"]
+                                 [nil                      ccount    "ord1"]
+                                 [(mt/id :orders :user_id) cuser-id2 "ord2"]
+                                 [nil                      ccount2   "ord2"]]]
+              (testing "sanity"
+                (is (= top-card-cols
+                       (->> top-card-query
+                            qp/process-query
+                            mt/cols
+                            (map col-data-fn)))))
+
+              (when (= driver/*driver* :h2)
+                (testing "suggested join condition references the FK by name"
+                  (let [query (lib/query metadata-provider (lib.metadata/table metadata-provider (mt/id :people)))
+                        card-meta (lib.metadata/card metadata-provider 3)]
+                    (is (=? [:= {} [:field {} (mt/id :people :id)] [:field {} cuser-id]]
+                            (lib/suggested-join-condition query card-meta))))))
+
+              (testing "the query runs and returns correct data"
+                (is (= {:columns [cid cid2 cuser-id ccount cuser-id2 ccount2]
+                        :rows    [[1  1    1        11     1         11]
+                                  [2  nil  nil      nil    nil       nil]
+                                  [3  3    3        10     3         10]]}
+                       (-> (mt/mbql-query people
+                             {:joins    [{:alias        "peeps"
+                                          :source-table "card__3"
+                                          :fields       :all
+                                          :condition    [:= $id [:field cuser-id2 {:base-type :type/Integer
+                                                                                   :join-alias "peeps"}]]}]
+                              :fields [$id]
+                              :order-by [[:asc $id]]
+                              :limit    3})
+                           qp/process-query
+                           mt/rows+column-names
+                           ;; Oracle is returning java.math.BigDecimal objects
+                           (update :rows #(mt/format-rows-by [int int int int int int] %)))))))))))))
+
 (deftest ^:parallel join-on-field-literal-test
   (mt/test-drivers (mt/normal-drivers-with-feature :left-join)
     (testing "Can we join on a Field literal for a source query?"
-- 
GitLab