Skip to content
Snippets Groups Projects
Unverified Commit 0dc38ac3 authored by dpsutton's avatar dpsutton Committed by GitHub
Browse files

Ignore fields in joins when hoisting joined fields (#24167)

* Ignore fields in joins when hoisting joined fields

Need to understand two sides of mbql to understand this.

```clojure
(defn nest-expressions
  "Pushes the `:source-table`/`:source-query`, `:expressions`, and `:joins` in the top-level of the query into a
  `:source-query` and updates `:expression` references and `:field` clauses with `:join-alias`es accordingly. See
  tests for examples. This is used by the SQL QP to make sure expressions happen in a subselect."
  ...)
```

Make a query against the orders table joined to the people table and
select order id, order total, and the person's email:

```sql
 SELECT "PUBLIC"."orders"."id"    AS "ID",
       "PUBLIC"."orders"."total" AS "TOTAL",
       "People - User"."email"   AS "People - User__EMAIL"
FROM   "PUBLIC"."orders"
       LEFT JOIN "PUBLIC"."people" "People - User"
              ON "PUBLIC"."orders"."user_id" = "People - User"."id"
LIMIT  1048575
```

Now add a custom column called adjective, which is 'expensive' when
total > 100 else 'cheap'
```sql
 SELECT "source"."id"                   AS "ID",
       "source"."total"                AS "TOTAL",
       "source"."adjective"            AS "adjective",
       "source"."people - user__email" AS "People - User__EMAIL"
FROM   (SELECT "PUBLIC"."orders"."id"         AS "ID",
               "PUBLIC"."orders"."user_id"    AS "USER_ID",
               "PUBLIC"."orders"."product_id" AS "PRODUCT_ID",
               "PUBLIC"."orders"."subtotal"   AS "SUBTOTAL",
               "PUBLIC"."orders"."tax"        AS "TAX",
               "PUBLIC"."orders"."total"      AS "TOTAL",
               "PUBLIC"."orders"."discount"   AS "DISCOUNT",
               "PUBLIC"."orders"."created_at" AS "CREATED_AT",
               "PUBLIC"."orders"."quantity"   AS "QUANTITY",
               CASE
                 WHEN "PUBLIC"."orders"."total" > 50 THEN 'expensive'
                 ELSE 'cheap'
               end                            AS "adjective",
               "People - User"."email"        AS "People - User__EMAIL",
               "People - User"."id"           AS "People - User__ID"
        FROM   "PUBLIC"."orders"
               LEFT JOIN "PUBLIC"."people" "People - User"
                      ON "PUBLIC"."orders"."user_id" = "People - User"."id")
       "source"
LIMIT  1048575
```

We put the "work" in a nested query and then just update the outer
select to select `source.total` instead of `orders.total`. But the way
this figured out which joins to hoist up was a bit broken. It walked all
over the inner query, finding fields it was interested in. However, it
also got fields it shouldn't have, by descending into join information
that should be opaque at this level.

In the repro for this, we make a card Q1, selecting product_id and
count, but with an implicit join to products and filtering where
category = doohickey.

```clojure
;; card Q1
{:type :query,
 :query {:source-table 4,     #_ reviews
         :filter [:= [:field 26 {:source-field 33}] "Doohickey"], #_ products.category
         :aggregation [[:count]],
         :breakout [[:field 33 nil]],  #_ reviews.product_id
         :limit 2},
 :database 1}
 ```

A second question Q2 queries the Orders table and joins to Q1 on
orders.product_id = q1.product_id, and adds a custom column `+ 1 1`.
```
;; card Q2, based on Q1
{:source-table 2,
 :expressions {"CC" [:+ 1 1]},
 :fields [[:field 9 nil]
          [:field 3 nil]
          [:field 5 nil]
          [:field 6 nil]
          [:field 8 nil]
          [:field 7 nil]
          [:field 1 nil]
          [:field 4 {:temporal-unit :default}]
          [:field 2 nil]
          [:expression
           "CC"
           {:metabase.query-processor.util.add-alias-info/desired-alias "CC",
            :metabase.query-processor.util.add-alias-info/position 9}]
          [:field 33 nil]
          [:field "count" {:base-type :type/BigInteger}]],
 :joins [{:alias "Question 4918",
          :strategy :left-join,
          :fields [[:field 33 nil]
                   [:field "count" {:base-type :type/BigInteger}]],
          :condition [:=
                      [:field 5 nil]
                      [:field 33 nil]],
          :source-card-id 4918,
          }]}
```

and this should yield the sql:

```sql
SELECT "source"."ID" AS "ID",
       "source"."PRODUCT_ID" AS "PRODUCT_ID",
       "source"."TOTAL" AS "TOTAL",
       "source"."CC" AS "CC",
       "source"."Question 4918__PRODUCT_ID" AS "Question 4918__PRODUCT_ID",
       "source"."Question 4918__count" AS "Question 4918__count"
FROM (
  SELECT "PUBLIC"."ORDERS"."ID" AS "ID",
         "PUBLIC"."ORDERS"."USER_ID" AS "USER_ID",
         "PUBLIC"."ORDERS"."PRODUCT_ID" AS "PRODUCT_ID",
         "PUBLIC"."ORDERS"."SUBTOTAL" AS "SUBTOTAL",
         "PUBLIC"."ORDERS"."TAX" AS "TAX",
         "PUBLIC"."ORDERS"."TOTAL" AS "TOTAL",
         "PUBLIC"."ORDERS"."DISCOUNT" AS "DISCOUNT",
         "PUBLIC"."ORDERS"."CREATED_AT" AS "CREATED_AT",
         "PUBLIC"."ORDERS"."QUANTITY" AS "QUANTITY",
         (1 + 1) AS "CC",
         "Question 4918"."PRODUCT_ID" AS "Question 4918__PRODUCT_ID",
         "Question 4918"."count" AS "Question 4918__count"
  FROM "PUBLIC"."ORDERS"
  LEFT JOIN (
    SELECT "PUBLIC"."REVIEWS"."PRODUCT_ID" AS "PRODUCT_ID",
           count(*) AS "count"
    FROM "PUBLIC"."REVIEWS"
    LEFT JOIN "PUBLIC"."PRODUCTS" "PRODUCTS__via__PRODUCT_ID"
      ON "PUBLIC"."REVIEWS"."PRODUCT_ID" = "PRODUCTS__via__PRODUCT_ID"."ID"
    WHERE "PRODUCTS__via__PRODUCT_ID"."CATEGORY" = 'Doohickey'
    GROUP BY "PUBLIC"."REVIEWS"."PRODUCT_ID"
    ORDER BY "PUBLIC"."REVIEWS"."PRODUCT_ID" ASC) "Question 4918"
  ON "PUBLIC"."ORDERS"."PRODUCT_ID" = "Question 4918"."PRODUCT_ID") "source"
LIMIT 1048575
```

But when it was looking through to see which fields to hoist top level,
it was also finding `"PRODUCTS__via__PRODUCT_ID"."ID"` and
`"PRODUCTS__via__PRODUCT_ID"."CATEGORY"` as fields it should hoist up
because it searched the whole tree underneath it which includes join
conditions and such.

But of course that doesn't matter, the only thing is really analyzing
what fields come out of a query, and those fields do not come out of the
nested query

```sql
    SELECT "PUBLIC"."REVIEWS"."PRODUCT_ID" AS "PRODUCT_ID",
           count(*) AS "count"
    FROM "PUBLIC"."REVIEWS"
    LEFT JOIN "PUBLIC"."PRODUCTS" "PRODUCTS__via__PRODUCT_ID"
      ON "PUBLIC"."REVIEWS"."PRODUCT_ID" = "PRODUCTS__via__PRODUCT_ID"."ID"
    WHERE "PRODUCTS__via__PRODUCT_ID"."CATEGORY" = 'Doohickey'
    GROUP BY "PUBLIC"."REVIEWS"."PRODUCT_ID"
    ORDER BY "PUBLIC"."REVIEWS"."PRODUCT_ID" ASC) "Question 4918"
```

that only returns product_id and count.

And this matches the error it was (helpfully) throwing:

```clojure
investigation=> (qp/compile nested-query)
Execution error (ExceptionInfo) at metabase.query-processor.util.add-alias-info/field-source-table-alias (add_alias_info.clj:182).
Cannot determine the source table or query for Field clause
[:field
 26 #_ products.category
 {:source-field 33, #_reviews.product_id
  :join-alias "PRODUCTS__via__PRODUCT_ID",
  :metabase.query-processor.util.add-alias-info/source-table "PRODUCTS__via__PRODUCT_ID",
  :metabase.query-processor.util.add-alias-info/source-alias "CATEGORY"}]
```

And here's the query it was analyzing when determining which fields it
needed to hoist (looking for ones with `:join-alias` (which is also in
the nest_query_test.clj file):

```clojure
;; removed some cruft and extra field information for brevity
{:source-table 2,
 :expressions  {"CC" [:+ 1 1]},
 :fields       [[:field 33 {:join-alias "Question 4918",}]
                [:field "count" {:join-alias "Question 4918"}]]
 :joins        [{:alias           "Question 4918",
                 :strategy        :left-join,
                 :fields          [[:field 33 {:join-alias "Question 4918"}]
                                   [:field
                                    "count"
                                    {:join-alias "Question 4918"}]]
                 :condition       [:=
                                   [:field 5 nil]
                                   [:field 33 {:join-alias "Question 4918",}]],
                 :source-card-id  4918,
                 :source-query    {:source-table 4,
                                   ;; nested query has filter values with join-alias that should not
                                   ;; be selected
                                   :filter       [:=
                                                  [:field 26 {:join-alias "PRODUCTS__via__PRODUCT_ID"}]
                                                  [:value "Doohickey" {}]],
                                   :aggregation  [[:aggregation-options
                                                   [:count]
                                                   {:name "count"}]],
                                   :breakout     [[:field 33 nil]],
                                   :limit        2,
                                   :order-by     [[:asc
                                                   [:field 33 nil]]],
                                   ;; nested query has an implicit join with conditions that should
                                   ;; not be selected
                                   :joins        [{:alias        "PRODUCTS__via__PRODUCT_ID",
                                                   :strategy     :left-join,
                                                   :condition    [:=
                                                                  [:field 33 nil]
                                                                  [:field
                                                                   30
                                                                   {:join-alias "PRODUCTS__via__PRODUCT_ID"}]]
                                                   :source-table 1,
                                                   :fk-field-id  33}]},
                 :source-metadata [{:field_ref [:field 33 nil]}
                                   {:field_ref [:aggregation 0]}]}]}
```

* Add round-trip test through qp

This test is essentially the repro from the ticket
https://github.com/metabase/metabase/issues/20809

```clojure
debug-qp=> (to-mbql-shorthand (:dataset_query (metabase.models/Card 4919)))
(mt/mbql-query
 orders
 {:joins [{:source-table "card__4918",
           :alias "Question 4918",
           :condition [:=
                       $product_id
                       [:field
                        %reviews.product_id
                        {:join-alias "Question 4918"}]],
           :fields :all}],
  :expressions {"CC" [:+ 1 1]}})
debug-qp=> (to-mbql-shorthand (:dataset_query (metabase.models/Card 4918)))
(mt/mbql-query
 reviews
 {:breakout [$product_id],
  :aggregation [[:count]],
  :filter [:= $product_id->products.category "Doohickey"]})
```

Thanks to @cam for providing such a lovely tool
parent 6d46ef12
No related branches found
No related tags found
No related merge requests found
......@@ -5,7 +5,8 @@
(This namespace is here rather than in the shared MBQL lib because it relies on other QP-land utils like the QP
refs stuff.)"
(:require [medley.core :as m]
(:require [clojure.walk :as walk]
[medley.core :as m]
[metabase.api.common :as api]
[metabase.mbql.util :as mbql.u]
[metabase.plugins.classloader :as classloader]
......@@ -17,7 +18,11 @@
(defn- joined-fields [inner-query]
(m/distinct-by
add/normalize-clause
(mbql.u/match (dissoc inner-query :source-query :source-metadata)
(mbql.u/match (walk/prewalk (fn [x]
(if (map? x)
(dissoc x :source-query :source-metadata)
x))
inner-query)
[:field _ (_ :guard :join-alias)]
&match)))
......
......@@ -2,7 +2,7 @@
(:require [clojure.test :refer :all]
[clojure.walk :as walk]
[metabase.driver :as driver]
[metabase.models.field :refer [Field]]
[metabase.models :refer [Card Field]]
[metabase.query-processor :as qp]
[metabase.query-processor.util.add-alias-info :as add]
[metabase.query-processor.util.nest-query :as nest-query]
......@@ -250,7 +250,75 @@
[:expression "x" {::add/desired-alias "x"
::add/position 1}]]}}
:limit 1})
(nest-expressions query)))))))
(nest-expressions query))))))
(testing "Ignores source-query from joins (#20809)"
(let [query {:source-table 2,
:expressions {"CC" [:+ 1 1]},
:fields [[:field 33 {:join-alias "Question 4918",}]
[:field "count" {:join-alias "Question 4918"}]]
:joins [{:alias "Question 4918",
:strategy :left-join,
:fields [[:field 33 {:join-alias "Question 4918"}]
[:field
"count"
{:join-alias "Question 4918"}]]
:condition [:=
[:field 5 nil]
[:field 33 {:join-alias "Question 4918",}]],
:source-card-id 4918,
:source-query {:source-table 4,
;; nested query has filter values with join-alias that should not
;; be selected
:filter [:=
[:field 26 {:join-alias "PRODUCTS__via__PRODUCT_ID"}]
[:value "Doohickey" {}]],
:aggregation [[:aggregation-options
[:count]
{:name "count"}]],
:breakout [[:field 33 nil]],
:limit 2,
:order-by [[:asc
[:field 33 nil]]],
;; nested query has an implicit join with conditions that should
;; not be selected
:joins [{:alias "PRODUCTS__via__PRODUCT_ID",
:strategy :left-join,
:condition [:=
[:field 33 nil]
[:field
30
{:join-alias "PRODUCTS__via__PRODUCT_ID"}]]
:source-table 1,
:fk-field-id 33}]},
:source-metadata [{:field_ref [:field 33 nil]}
{:field_ref [:aggregation 0]}]}]}]
(is (= [[:field 33 {:join-alias "Question 4918"}]
[:field "count" {:join-alias "Question 4918"}]]
(#'nest-query/joined-fields query))))
(mt/dataset sample-dataset
(mt/with-temp* [Card [base {:dataset_query
(mt/mbql-query
reviews
{:breakout [$product_id],
:aggregation [[:count]],
;; filter on an implicit join
:filter [:= $product_id->products.category "Doohickey"]})}]]
;; the result returned is not important, just important that the query is valid and completes
(is (vector?
(mt/rows
(qp/process-query
(mt/mbql-query
orders
{:joins [{:source-table (str "card__" (:id base)),
:alias (str "Question " (:id base)),
:condition [:=
$product_id
[:field
%reviews.product_id
{:join-alias (str "Question " (:id base))}]],
:fields :all}],
:expressions {"CC" [:+ 1 1]}
:limit 2})))))))))
(deftest nest-expressions-with-joins-test
(testing "If there are any `:joins`, those need to be nested into the `:source-query` as well."
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment