From aa5d804b98bf2d9d662b019f152011c09508d273 Mon Sep 17 00:00:00 2001 From: metamben <103100869+metamben@users.noreply.github.com> Date: Thu, 19 Oct 2023 01:52:10 +0300 Subject: [PATCH] Support duplicate columns hidden in cards (#34636) --- src/metabase/lib/join.cljc | 31 +++++--- .../middleware/resolve_joins.clj | 20 +++-- .../middleware/upgrade_field_literals.clj | 22 +++++- .../query_processor/util/add_alias_info.clj | 33 +++++--- .../explicit_joins_test.clj | 76 +++++++++++++++++++ 5 files changed, 151 insertions(+), 31 deletions(-) diff --git a/src/metabase/lib/join.cljc b/src/metabase/lib/join.cljc index 7a0aaa41dd5..4373ad18987 100644 --- a/src/metabase/lib/join.cljc +++ b/src/metabase/lib/join.cljc @@ -767,10 +767,17 @@ [pk-col :- [:maybe lib.metadata/ColumnMetadata] visible-columns :- [:maybe [:sequential lib.metadata/ColumnMetadata]]] (when-let [pk-id (:id pk-col)] - (m/find-first (fn [{:keys [fk-target-field-id], :as col}] - (and (lib.types.isa/foreign-key? col) - (= fk-target-field-id pk-id))) - visible-columns))) + (let [candidates (filter (fn [{:keys [fk-target-field-id], :as col}] + (and (lib.types.isa/foreign-key? col) + (= fk-target-field-id pk-id))) + visible-columns) + primary (first candidates)] + (if (some #(= (:id %) (:id primary)) (rest candidates)) + (when (not-any? #(= (:lib/desired-column-alias %) (:lib/desired-column-alias primary)) (rest candidates)) + ;; there are multiple candidates but :lib/desired-column-alias disambiguates them, + ;; so reference by name + (dissoc primary :id)) + primary)))) (mu/defn ^:private fk-column-for :- [:maybe lib.metadata/ColumnMetadata] "Given a query stage find an FK column that points to the PK `pk-col`." @@ -795,14 +802,14 @@ (let [joinable-col-options {:include-implicitly-joinable? false :include-implicitly-joinable-for-source-card? false}] (letfn [(filter-clause [x y] - (lib.filter/filter-clause (lib.filter.operator/operator-def :=) x y))] - (or (when-let [pk-col (pk-column query stage-number joinable joinable-col-options)] - (when-let [fk-col (fk-column-for query stage-number pk-col)] - (filter-clause fk-col pk-col))) - (when-let [pk-col (pk-column query stage-number (lib.util/query-stage query stage-number) nil)] - (when-let [fk-col (fk-column-for-pk-in pk-col (lib.metadata.calculation/visible-columns - query stage-number joinable joinable-col-options))] - (filter-clause pk-col fk-col)))))))) + (lib.filter/filter-clause (lib.filter.operator/operator-def :=) x y))] + (or (when-let [pk-col (pk-column query stage-number joinable joinable-col-options)] + (when-let [fk-col (fk-column-for query stage-number pk-col)] + (filter-clause fk-col pk-col))) + (when-let [pk-col (pk-column query stage-number (lib.util/query-stage query stage-number) nil)] + (when-let [fk-col (fk-column-for-pk-in pk-col (lib.metadata.calculation/visible-columns + query stage-number joinable joinable-col-options))] + (filter-clause pk-col fk-col)))))))) (defn- add-join-alias-to-joinable-columns [cols a-join] (let [join-alias (lib.join.util/current-join-alias a-join) diff --git a/src/metabase/query_processor/middleware/resolve_joins.clj b/src/metabase/query_processor/middleware/resolve_joins.clj index 8bbf5fa1087..360b1e09920 100644 --- a/src/metabase/query_processor/middleware/resolve_joins.clj +++ b/src/metabase/query_processor/middleware/resolve_joins.clj @@ -71,10 +71,16 @@ (when-not (seq source-metadata) (throw (ex-info (tru "Cannot use :fields :all in join against source query unless it has :source-metadata.") {:join join}))) - (for [{field-name :name, base-type :base_type, field-id :id} source-metadata] - (if field-id - [:field field-id {:join-alias alias}] - [:field field-name {:base-type base-type, :join-alias alias}]))) + (let [duplicate-ids (into #{} + (keep (fn [[item freq]] + (when (> freq 1) + item))) + (frequencies (map :id source-metadata)))] + (for [{field-name :name, base-type :base_type, field-id :id} source-metadata] + (if (and field-id (not (contains? duplicate-ids field-id))) + ;; field-id is a unique reference, use it + [:field field-id {:join-alias alias}] + [:field field-name {:base-type base-type, :join-alias alias}])))) (mu/defn ^:private handle-all-fields :- mbql.s/Join "Replace `:fields :all` in a join with an appropriate list of Fields." @@ -83,9 +89,9 @@ join (when (= fields :all) {:fields (if source-query - (source-metadata->fields join source-metadata) - (for [[_ id-or-name opts] (qp.add-implicit-clauses/sorted-implicit-fields-for-table source-table)] - [:field id-or-name (assoc opts :join-alias alias)]))}))) + (source-metadata->fields join source-metadata) + (for [[_ id-or-name opts] (qp.add-implicit-clauses/sorted-implicit-fields-for-table source-table)] + [:field id-or-name (assoc opts :join-alias alias)]))}))) (mu/defn ^:private resolve-references :- Joins [joins :- Joins] diff --git a/src/metabase/query_processor/middleware/upgrade_field_literals.clj b/src/metabase/query_processor/middleware/upgrade_field_literals.clj index 3e7f77c4f85..611811fffeb 100644 --- a/src/metabase/query_processor/middleware/upgrade_field_literals.clj +++ b/src/metabase/query_processor/middleware/upgrade_field_literals.clj @@ -20,15 +20,33 @@ (qp.store/cached ::bad-clause-warning (log/warn (u/colorize :red message))))) +(defn- unique-reference? + "Check if the field-id and the (possibly missing) join-alias of `field-clause` + result in an unambiguous reference to one of the `columns` + The join-alias of columns is taken from their field_ref property." + [field-clause columns] + (let [[_ field-id {:keys [join-alias]}] field-clause + matches-by-id (filter #(= (:id %) field-id) columns)] + (or (nil? (next matches-by-id)) + (->> matches-by-id (filter #(= (get-in % [:field-ref 2 :join-alias]) join-alias)) count (= 1))))) + (defn- fix-clause [{:keys [source-aliases field-name->field]} [_ field-name options :as field-clause]] ;; attempt to find a corresponding Field ref from the source metadata. - (let [field-ref (:field_ref (get field-name->field field-name))] + (let [field-ref (:field_ref (get field-name->field field-name)) + ;; the map contains duplicate columns to support lowercase lookup + columns (set (vals field-name->field))] (cond field-ref (mbql.u/match-one field-ref ;; If the matching Field ref is an integer `:field` clause then replace it with the corrected clause. [:field (id :guard integer?) new-options] - [:field id (merge new-options (dissoc options :base-type))] + (let [new-clause [:field id (merge new-options (dissoc options :base-type))]] + (if (unique-reference? new-clause columns) + new-clause + (u/prog1 field-clause + (warn-once + (format "Warning: upgrading field literal %s would result in an ambiguous reference. Not upgrading." + (pr-str field-clause)))))) ;; Otherwise the Field clause in the source query uses a string Field name as well, but that name differs from ;; the one in `source-aliases`. Will this work? Not sure whether or not we need to log something about this. diff --git a/src/metabase/query_processor/util/add_alias_info.clj b/src/metabase/query_processor/util/add_alias_info.clj index 2aedb365a5c..8c694241b22 100644 --- a/src/metabase/query_processor/util/add_alias_info.clj +++ b/src/metabase/query_processor/util/add_alias_info.clj @@ -208,8 +208,13 @@ (defn- fuzzify [clause] (mbql.u/update-field-options clause dissoc :temporal-unit :binning)) -(defn- matching-field-in-source-query* [source-query field-clause & {:keys [normalize-fn] - :or {normalize-fn normalize-clause}}] +(defn- field-signature + [field-clause] + [(second field-clause) (get-in field-clause [2 :join-alias])]) + +(defn- matching-field-in-source-query* + [source-query source-metadata field-clause & {:keys [normalize-fn] + :or {normalize-fn normalize-clause}}] (let [normalized (normalize-fn field-clause) all-exports (exports source-query) field-exports (filter (partial mbql.u/is-clause? :field) @@ -238,18 +243,26 @@ (filter (partial mbql.u/is-clause? :expression) all-exports)) (m/find-first (fn [[_ _ opts :as _aggregation-options-clause]] (= (::source-alias opts) field-name)) - (filter (partial mbql.u/is-clause? :aggregation-options) all-exports))))))) + (filter (partial mbql.u/is-clause? :aggregation-options) all-exports)))) + ;; look for a field referenced by the name in source-metadata + (let [field-name (second field-clause)] + (when (string? field-name) + (when-let [column (m/find-first #(= (:name %) field-name) source-metadata)] + (let [signature (field-signature (:field_ref column))] + (m/find-first #(= (field-signature %) signature) field-exports)))))))) (defn- matching-field-in-join-at-this-level "If `field-clause` is the result of a join *at this level* with a `:source-query`, return the 'source' `:field` clause from that source query." [inner-query [_ _ {:keys [join-alias]} :as field-clause]] (when join-alias - (when-let [matching-join-source-query (:source-query (join-with-alias inner-query join-alias))] - (matching-field-in-source-query* - matching-join-source-query - field-clause - :normalize-fn #(mbql.u/update-field-options (normalize-clause %) dissoc :join-alias))))) + (let [{:keys [source-query source-metadata]} (join-with-alias inner-query join-alias)] + (when source-query + (matching-field-in-source-query* + source-query + source-metadata + field-clause + :normalize-fn #(mbql.u/update-field-options (normalize-clause %) dissoc :join-alias)))))) (defn- field-alias-in-join-at-this-level "If `field-clause` is the result of a join at this level, return the `::desired-alias` from that join (where the Field is @@ -259,10 +272,10 @@ desired-alias)) (defn- matching-field-in-source-query - [{:keys [source-query], :as inner-query} field-clause] + [{:keys [source-query source-metadata], :as inner-query} field-clause] (when (and source-query (= (field-source-table-alias inner-query field-clause) ::source)) - (matching-field-in-source-query* source-query field-clause))) + (matching-field-in-source-query* source-query source-metadata field-clause))) (defn- field-alias-in-source-query [inner-query field-clause] diff --git a/test/metabase/query_processor_test/explicit_joins_test.clj b/test/metabase/query_processor_test/explicit_joins_test.clj index bd8590f652c..7421ecbb528 100644 --- a/test/metabase/query_processor_test/explicit_joins_test.clj +++ b/test/metabase/query_processor_test/explicit_joins_test.clj @@ -8,6 +8,8 @@ [metabase.driver.sql.query-processor-test-util :as sql.qp-test-util] [metabase.driver.util :as driver.u] [metabase.lib.convert :as lib.convert] + [metabase.lib.core :as lib] + [metabase.lib.metadata :as lib.metadata] [metabase.lib.metadata.jvm :as lib.metadata.jvm] [metabase.lib.test-util.mocks-31769 :as lib.tu.mocks-31769] [metabase.query-processor :as qp] @@ -324,6 +326,80 @@ :order-by [[:asc $name]] :limit 3}))))))))) +;; This is a very contrived test. We create two identical cards and join them both +;; in a third card. This means that first two cards bring fields that differ only in +;; their join aliases, but these "get lost" when the third card is joined to a table. +;; Since we have multiple fields with the same ID, the IDs cannot be used to +;; unambiguously refer to the underlying fields. The only way to reference them +;; properly is by the name they have in the source metadata. +(deftest ^:parallel join-against-multiple-card-copies-test + (mt/test-drivers (mt/normal-drivers-with-feature :left-join) + (testing "join against multiple copies of a card (#34227)" + (mt/dataset sample-dataset + (let [metadata-provider (qp.test-util/metadata-provider-with-cards-with-metadata-for-queries + [(mt/mbql-query orders + {:breakout [$user_id] + :aggregation [[:count]]}) + (mt/mbql-query orders + {:breakout [$user_id] + :aggregation [[:count]]}) + (mt/mbql-query people + {:fields [$id] + :joins [{:fields :all + :alias "ord1" + :source-table "card__1" + :condition [:= $id &ord1.orders.user_id]} + {:fields :all + :alias "ord2" + :source-table "card__2" + :condition [:= $id &ord2.orders.user_id]}]})])] + (qp.store/with-metadata-provider metadata-provider + (let [top-card-query (mt/mbql-query people + {:source-table "card__3" + :limit 3}) + [cid cuser-id ccount cuser-id2 ccount2] (->> top-card-query + qp/query->expected-cols + (map :name)) + cid2 (str cid "_2") + col-data-fn (juxt :id :name :source_alias) + top-card-cols [[(mt/id :people :id) cid nil] + [(mt/id :orders :user_id) cuser-id "ord1"] + [nil ccount "ord1"] + [(mt/id :orders :user_id) cuser-id2 "ord2"] + [nil ccount2 "ord2"]]] + (testing "sanity" + (is (= top-card-cols + (->> top-card-query + qp/process-query + mt/cols + (map col-data-fn))))) + + (when (= driver/*driver* :h2) + (testing "suggested join condition references the FK by name" + (let [query (lib/query metadata-provider (lib.metadata/table metadata-provider (mt/id :people))) + card-meta (lib.metadata/card metadata-provider 3)] + (is (=? [:= {} [:field {} (mt/id :people :id)] [:field {} cuser-id]] + (lib/suggested-join-condition query card-meta)))))) + + (testing "the query runs and returns correct data" + (is (= {:columns [cid cid2 cuser-id ccount cuser-id2 ccount2] + :rows [[1 1 1 11 1 11] + [2 nil nil nil nil nil] + [3 3 3 10 3 10]]} + (-> (mt/mbql-query people + {:joins [{:alias "peeps" + :source-table "card__3" + :fields :all + :condition [:= $id [:field cuser-id2 {:base-type :type/Integer + :join-alias "peeps"}]]}] + :fields [$id] + :order-by [[:asc $id]] + :limit 3}) + qp/process-query + mt/rows+column-names + ;; Oracle is returning java.math.BigDecimal objects + (update :rows #(mt/format-rows-by [int int int int int int] %))))))))))))) + (deftest ^:parallel join-on-field-literal-test (mt/test-drivers (mt/normal-drivers-with-feature :left-join) (testing "Can we join on a Field literal for a source query?" -- GitLab