Skip to content
Snippets Groups Projects
Unverified Commit beca114d authored by Cam Saul's avatar Cam Saul Committed by GitHub
Browse files

Generate correct SQL for columns from joins inside other joins or source queries (#16254)

* Fix joining a join inside a nested queries (#12928)

* Code cleanup & dox

* Tests for #13649

* Test fixes :wrench:
parent 3cf82b73
No related branches found
No related tags found
No related merge requests found
......@@ -249,7 +249,7 @@ describe("scenarios > question > notebook", () => {
// NOTE: - This repro is really tightly coupled to the `joinTwoSavedQuestions()` function.
// - Be extremely careful when changing any of the steps within that function.
// - The alternative approach would have been to write one longer repro instead of two separate ones.
it.skip("joined questions should create custom column (metabase#13649)", () => {
it("joined questions should create custom column (metabase#13649)", () => {
// pass down a joined question alias
joinTwoSavedQuestions("13649");
......@@ -259,7 +259,7 @@ describe("scenarios > question > notebook", () => {
popover().within(() => {
cy.get("[contenteditable='true']").type(
// reference joined question by previously set alias
"[13649 → Sum of Rating] / [Sum of Rating]",
"[13649 → sum] / [Sum of Rating]",
);
cy.findByPlaceholderText("Something nice and descriptive")
.click()
......@@ -310,7 +310,7 @@ describe("scenarios > question > notebook", () => {
});
});
it.skip("should join saved questions that themselves contain joins (metabase#12928)", () => {
it("should join saved questions that themselves contain joins (metabase#12928)", () => {
// Save Question 1
cy.createQuestion({
name: "12928_Q1",
......
......@@ -217,22 +217,23 @@
:where [:<= (hsql/raw "rownum") 100]})
(#'sql.qp/mbql->honeysql
:oracle
(mt/mbql-query venues
{:source-table $$venues
:order-by [[:asc $id]]
:filter [:=
&test_data_categories__via__cat.categories.name
[:value "BBQ" {:base_type :type/Text, :semantic_type :type/Name, :database_type "VARCHAR"}]]
:fields [$id $name $category_id $latitude $longitude $price]
:limit 100
:joins [{:source-table $$categories
:alias "test_data_categories__via__cat",
:strategy :left-join
:condition [:=
$category_id
&test_data_categories__via__cat.categories.id]
:fk-field-id (mt/id :venues :category_id)
:fields :none}]}))))))))
(qp/query->preprocessed
(mt/mbql-query venues
{:source-table $$venues
:order-by [[:asc $id]]
:filter [:=
&test_data_categories__via__cat.categories.name
[:value "BBQ" {:base_type :type/Text, :semantic_type :type/Name, :database_type "VARCHAR"}]]
:fields [$id $name $category_id $latitude $longitude $price]
:limit 100
:joins [{:source-table $$categories
:alias "test_data_categories__via__cat",
:strategy :left-join
:condition [:=
$category_id
&test_data_categories__via__cat.categories.id]
:fk-field-id (mt/id :venues :category_id)
:fields :none}]})))))))))
(deftest oracle-connect-with-ssl-test
(mt/test-driver :oracle
......
......@@ -32,12 +32,42 @@
"The INNER query currently being processed, for situations where we need to refer back to it."
nil)
(def ^:dynamic ^:private *source-aliases*
"A map of field-clause -> alias for Fields that come from nested source queries or joins, e.g.
{[:field-id 1 {:join-alias \"X\"}] \"Q1__my_field\"}
This is needed because source queries and joins will generate their own 'unambiguous alias' for Fields that come
from their own source queries or joins, and we need to use that alias when referring to the Field in the parent
query. e.g.
-- all uses of `*category_id` may well be the same column, but we need to use the correct alias based on the one
-- used in nested queries or joins
SELECT J.t3__category_id AS J__category_id
FROM T1
LEFT JOIN (
SELECT T2.category_id AS category_id,
T3.category_id AS T3__category_id
FROM T2
LEFT JOIN T3
ON T2.some_id = t3.id
) J
ON T1.id = J1.T3__category_id
See #16254 for more information."
nil)
(def ^:dynamic *nested-query-level*
"How many levels deep are we into nested queries? (0 = top level.) We keep track of this so we know what level to
find referenced aggregations (otherwise something like [:aggregation 0] could be ambiguous in a nested query).
Each nested query increments this counter by 1."
0)
(def ^:dynamic *field-options*
"Bound to the `options` part of a `:field` clause when that clause is being compiled to HoneySQL. Useful if you store
additional keys there and need to access them."
nil)
(p.types/deftype+ SQLSourceQuery [sql params]
hformat/ToSql
(to-sql [_]
......@@ -134,6 +164,8 @@
:else (mod-fn (hx/+ day-of-week offset) 7))
day-of-week)))
;; TODO -- this is not actually used in this namespace; it's only used in
;; `metabase.driver.sql.parameters.substitution`... consider moving it
(defmulti field->identifier
"Return a HoneySQL form that should be used as the identifier for `field`, an instance of the Field model. The default
implementation returns a keyword generated by from the components returned by `field/qualified-name-components`.
......@@ -300,9 +332,10 @@
(defmethod ->honeysql [:sql (class Field)]
[driver {field-name :name, table-id :table_id, database-type :database_type, :as field}]
;; `indentifer` will automatically unnest nested calls to `identifier`
;; `identifer` will automatically unnest nested calls to `identifier`
(as-> (if *table-alias*
[*table-alias* (unambiguous-field-alias driver [:field (:id field) nil])]
[*table-alias* (or (::source-alias *field-options*)
(unambiguous-field-alias driver [:field (:id field) nil]))]
(let [{schema :schema, table-name :name} (qp.store/table table-id)]
[schema table-name field-name])) expr
(apply hx/identifier :field expr)
......@@ -362,16 +395,21 @@
(hx/* bin-width)
(hx/+ min-value)))
(def ^:dynamic *field-options*
"Bound to the `options` part of a `:field` clause when that clause is being compiled to HoneySQL. Useful if you store
additional keys there and need to access them."
nil)
(defmethod ->honeysql [:sql :field]
[driver [_ field-id-or-name options :as field-clause]]
(binding [*field-options* options]
(if (:join-alias options)
(cond
(get *source-aliases* field-clause)
(let [field-alias (get *source-aliases* field-clause)
options (assoc options ::source-alias field-alias)]
;; recurse with the ::source-alias added to Field options. Since it won't be equal anymore it will prevent
;; infinite recursion.
(->honeysql driver [:field field-id-or-name options]))
(:join-alias options)
(compile-field-with-join-aliases driver field-clause)
:else
(let [honeysql-form (cond
;; selects from an inner select should not
(and (integer? field-id-or-name) (contains? options ::outer-select))
......@@ -381,7 +419,8 @@
(->honeysql driver (qp.store/field field-id-or-name))
:else
(->honeysql driver (hx/identifier :field *table-alias* field-id-or-name)))]
(cond-> (->honeysql driver (hx/identifier :field *table-alias* field-id-or-name))
(:database-type options) (hx/with-database-type-info (:database-type options))))]
(cond->> honeysql-form
(:temporal-unit options) (apply-temporal-bucketing driver options)
(:binning options) (apply-binning options))))))
......@@ -939,11 +978,53 @@
;;; -------------------------------------------- putting it all together --------------------------------------------
(def ^:private SourceAliasInfo
{:this-level-fields {(s/pred vector?) s/Str}
:source-fields {(s/pred vector?) s/Str}})
(declare source-aliases)
(s/defn ^:private join-source-aliases :- SourceAliasInfo
[driver join :- mbql.s/Join]
(let [aliases (source-aliases driver join)]
{:source-fields (:source-fields aliases)
:this-level-fields (into {} (for [fields [(:this-level-fields aliases)
(:source-fields aliases)]
[field alias] fields
:let [field (mbql.u/assoc-field-options
field
:join-alias (:alias join))]]
[field alias]))}))
(s/defn ^:private source-aliases :- SourceAliasInfo
"Build the `*source-aliases*` map. `:this-level-fields` are the ones actually bound to `*source-aliases*`;
`:source-fields` is only used when recursively building the map."
[driver inner-query :- mbql.s/SourceQuery]
(let [recursive (merge
(when-let [source-query (:source-query inner-query)]
{"source" (source-aliases driver source-query)})
(into {} (for [join (:joins inner-query)]
[(:alias join) (join-source-aliases driver join)])))]
{:this-level-fields (into {} (concat (for [k [:breakout :aggregation :fields]
clause (k inner-query)
;; don't need to do anything special with non-field clauses because
;; they don't get an "unambiguous alias"
:when (mbql.u/is-clause? :field clause)
:let [[_ id-or-name] clause]]
[(mbql.u/update-field-options clause dissoc :join-alias)
(if (integer? id-or-name)
(unambiguous-field-alias driver clause)
id-or-name)])))
:source-fields (into {} (for [[source {:keys [this-level-fields]}] recursive
field this-level-fields]
field))}))
(defn- apply-clauses
"Like `apply-top-level-clauses`, but handles `source-query` as well, which needs to be handled in a special way
because it is aliased."
[driver honeysql-form {:keys [source-query], :as inner-query}]
(binding [*query* inner-query]
(binding [*query* inner-query
*source-aliases* (:source-fields (source-aliases driver inner-query))]
(if source-query
(apply-clauses-with-aliased-source-query-table
driver
......
......@@ -8,11 +8,12 @@
[metabase.models.field :refer [Field]]
[metabase.models.setting :as setting]
[metabase.query-processor :as qp]
[metabase.query-processor.interface :as qp.i]
[metabase.test :as mt]
[metabase.util :as u]
[metabase.util.honeysql-extensions :as hx]
[pretty.core :refer [PrettyPrintable]]
[schema.core :as s]
[toucan.db :as db])
[schema.core :as s])
(:import metabase.util.honeysql_extensions.Identifier))
(deftest process-mbql-query-keys-test
......@@ -26,7 +27,7 @@
:fields 4
:breakout 2})))))
;; Let's make sure we're actually attempting to generate the correctl HoneySQL for stuff so we don't sit around
;; Let's make sure we're actually attempting to generate the correct HoneySQL for stuff so we don't sit around
;; scratching our heads wondering why the queries themselves aren't working
;; We'll slap together a driver called `::id-swap` whose only purpose is to replace instances of `Identifier` with
......@@ -80,22 +81,23 @@
:limit 100}
(#'sql.qp/mbql->honeysql
::id-swap
(mt/mbql-query venues
{:source-table $$venues
:order-by [[:asc $id]]
:filter [:=
&c.categories.name
[:value "BBQ" {:base_type :type/Text, :semantic_type :type/Name, :database_type "VARCHAR"}]]
:fields [$id $name $category_id $latitude $longitude $price]
:limit 100
:joins [{:source-table $$categories
:alias "c",
:strategy :left-join
:condition [:=
$category_id
&c.categories.id]
:fk-field-id (mt/id :venues :category_id)
:fields :none}]})))))))
(qp/query->preprocessed
(mt/mbql-query venues
{:source-table $$venues
:order-by [[:asc $id]]
:filter [:=
&c.categories.name
[:value "BBQ" {:base_type :type/Text, :semantic_type :type/Name, :database_type "VARCHAR"}]]
:fields [$id $name $category_id $latitude $longitude $price]
:limit 100
:joins [{:source-table $$categories
:alias "c",
:strategy :left-join
:condition [:=
$category_id
&c.categories.id]
:fk-field-id (mt/id :venues :category_id)
:fields :none}]}))))))))
(deftest correct-identifiers-test
(testing "This HAIRY query tests that the correct identifiers and aliases are used with both a nested query and JOIN in play."
......@@ -123,6 +125,7 @@
(id :table-alias "source")]]
:left-join [[(id :table "PUBLIC" "VENUES") (bound-alias "source" (id :table-alias "v"))]
[:=
#_(bound-alias "source" (id :field "source" "VENUE_ID"))
(hx/with-database-type-info (bound-alias "source" (id :field "source" "VENUE_ID")) "integer")
(hx/with-database-type-info (bound-alias "v" (id :field "v" "ID")) "bigint")]]
:group-by [(hx/with-database-type-info (bound-alias "v" (id :field "v" "NAME")) "varchar")]
......@@ -135,30 +138,31 @@
:asc]]}
(#'sql.qp/mbql->honeysql
::id-swap
(mt/mbql-query checkins
{:source-query {:source-table $$checkins
:fields [$id [:field %date {:temporal-unit :default}] $user_id $venue_id]
:filter [:>
$date
[:absolute-datetime #t "2015-01-01T00:00:00.000000000-00:00" :default]]}
:aggregation [[:count]]
:order-by [[:asc &v.venues.name]]
:breakout [&v.venues.name]
:filter [:and
[:starts-with
&v.venues.name
[:value "F" {:base_type :type/Text
:semantic_type :type/Name
:database_type "VARCHAR"}]]
[:> [:field "user_id" {:base-type :type/Integer}] 0]]
:joins [{:source-table $$venues
:alias "v"
:strategy :left-join
:condition [:=
$venue_id
&v.venues.id]
:fk-field-id (mt/id :checkins :venue_id)
:fields :none}]}))))))))
(qp/query->preprocessed
(mt/mbql-query checkins
{:source-query {:source-table $$checkins
:fields [$id [:field %date {:temporal-unit :default}] $user_id $venue_id]
:filter [:>
$date
[:absolute-datetime #t "2015-01-01T00:00:00.000000000-00:00" :default]]}
:aggregation [[:count]]
:order-by [[:asc &v.venues.name]]
:breakout [&v.venues.name]
:filter [:and
[:starts-with
&v.venues.name
[:value "F" {:base_type :type/Text
:semantic_type :type/Name
:database_type "VARCHAR"}]]
[:> [:field "user_id" {:base-type :type/Integer}] 0]]
:joins [{:source-table $$venues
:alias "v"
:strategy :left-join
:condition [:=
$venue_id
&v.venues.id]
:fk-field-id (mt/id :checkins :venue_id)
:fields :none}]})))))))))
(deftest handle-named-aggregations-test
(testing "Check that named aggregations are handled correctly"
......@@ -275,14 +279,18 @@
(defn- mbql->native [query]
(mt/with-everything-store
(driver/with-driver :h2
(-> (sql.qp/mbql->native :h2 query)
(-> (sql.qp/mbql->native :h2 (qp/query->preprocessed query))
:query
pretty-sql))))
(deftest joined-field-clauses-test
(testing "Should correctly compile `:field` clauses with `:join-alias`"
(testing "when the join is at the same level"
(is (= "SELECT c.NAME AS c__NAME FROM VENUES LEFT JOIN CATEGORIES c ON VENUES.CATEGORY_ID = c.ID"
(is (= (str "SELECT c.NAME AS c__NAME "
"FROM VENUES "
"LEFT JOIN CATEGORIES c"
" ON VENUES.CATEGORY_ID = c.ID "
(format "LIMIT %d" qp.i/absolute-max-results))
(mbql->native
(mt/mbql-query venues
{:fields [&c.categories.name]
......@@ -298,7 +306,8 @@
"FROM VENUES"
" LEFT JOIN CATEGORIES c"
" ON VENUES.CATEGORY_ID = c.ID"
") source")
") source "
(format "LIMIT %d" qp.i/absolute-max-results))
(mbql->native
(mt/mbql-query venues
{:fields [&c.categories.name]
......@@ -366,9 +375,8 @@
"WHERE ((source.PEOPLE__via__USER_ID__SOURCE = ? OR source.PEOPLE__via__USER_ID__SOURCE = ?)"
" AND (source.PRODUCTS__via__PRODUCT_ID__CATEGORY = ?"
" OR source.PRODUCTS__via__PRODUCT_ID__CATEGORY = ?)"
" AND parsedatetime(year(source.CREATED_AT), 'yyyy')"
" BETWEEN parsedatetime(year(dateadd('year', CAST(-2 AS long), now())), 'yyyy')"
" AND parsedatetime(year(dateadd('year', CAST(-1 AS long), now())), 'yyyy')) "
" AND source.CREATED_AT >= parsedatetime(year(dateadd('year', CAST(-2 AS long), now())), 'yyyy')"
" AND source.CREATED_AT < parsedatetime(year(now()), 'yyyy')) "
"GROUP BY source.PRODUCTS__via__PRODUCT_ID__CATEGORY,"
" source.PEOPLE__via__USER_ID__SOURCE,"
" parsedatetime(year(source.CREATED_AT), 'yyyy'),"
......@@ -485,3 +493,121 @@
(is (schema= [(s/one s/Str "date")
(s/one s/Num "expression")]
(-> results mt/rows first)))))))))
(defn- even-prettier-sql [s]
(-> s
pretty-sql
(str/replace #"\s+" " ")
(str/replace #"\(\s*" "(")
(str/replace #"\s*\)" ")")
(str/replace #"PUBLIC\." "")
str/trim))
(defn- mega-query []
(mt/mbql-query nil
{:fields [&P1.products.category
&People.people.source
[:field "count" {:base-type :type/BigInteger}]
&Q2.products.category
[:field "avg" {:base-type :type/Integer, :join-alias "Q2"}]]
:source-query {:source-table $$orders
:aggregation [[:aggregation-options [:count] {:name "count"}]]
:breakout [&P1.products.category
&People.people.source]
:order-by [[:asc &P1.products.category]
[:asc &People.people.source]]
:joins [{:strategy :left-join
:source-table $$products
:condition
[:= $orders.product_id &P1.products.id]
:alias "P1"}
{:strategy :left-join
:source-table $$people
:condition [:= $orders.user_id &People.people.id]
:alias "People"}]}
:joins [{:strategy :left-join
:condition [:= $products.category &Q2.products.category]
:alias "Q2"
:source-query {:source-table $$reviews
:aggregation [[:aggregation-options [:avg $reviews.rating] {:name "avg"}]]
:breakout [&P2.products.category]
:joins [{:strategy :left-join
:source-table $$products
:condition [:= $reviews.product_id &P2.products.id]
:alias "P2"}]}}]
:limit 2}))
(deftest source-aliases-test
(mt/dataset sample-dataset
(mt/with-everything-store
(let [query (mega-query)
small-query (get-in query [:query :source-query])]
(testing (format "Query =\n%s" (u/pprint-to-str small-query))
(is (= {:this-level-fields {[:field (mt/id :products :category) nil] "P1__CATEGORY"
[:field (mt/id :people :source) nil] "People__SOURCE"}
:source-fields {}}
(#'sql.qp/source-aliases :h2 small-query))))
(testing (format "Query =\n%s" (u/pprint-to-str query))
(is (= {[:field (mt/id :products :category) nil] "P1__CATEGORY"
[:field (mt/id :people :source) nil] "People__SOURCE"
[:field (mt/id :products :category) {:join-alias "Q2"}] "P2__CATEGORY"}
(:source-fields (#'sql.qp/source-aliases :h2 (:query query))))))))))
(defn mini-query []
(mt/dataset sample-dataset
(qp/query->preprocessed
(mt/mbql-query orders
{:source-table $$orders
:fields [$id &Q2.products.category]
:joins [{:fields :none
:condition [:= $products.category &Q2.products.category]
:alias "Q2"
:source-query {:source-table $$reviews
:joins [{:fields :all
:source-table $$products
:condition [:=
$reviews.product_id
&Products.products.id]
:alias "Products"}]
:aggregation [[:avg $reviews.rating]]
:breakout [&Products.products.category]}}]
:limit 2}))))
(deftest use-correct-source-aliases-test
(testing "Should generate correct SQL for joins against source queries that contain joins (#12928)")
(mt/dataset sample-dataset
(is (= (->> ["SELECT source.P1__CATEGORY AS P1__CATEGORY,"
" source.People__SOURCE AS People__SOURCE,"
" source.count AS count,"
" Q2.P2__CATEGORY AS Q2__CATEGORY,"
" Q2.avg AS avg"
"FROM ("
" SELECT P1.CATEGORY AS P1__CATEGORY,"
" People.SOURCE AS People__SOURCE,"
" count(*) AS count"
" FROM ORDERS"
" LEFT JOIN PRODUCTS P1"
" ON ORDERS.PRODUCT_ID = P1.ID"
" LEFT JOIN PEOPLE People"
" ON ORDERS.USER_ID = People.ID"
" GROUP BY P1.CATEGORY,"
" People.SOURCE"
" ORDER BY P1.CATEGORY ASC,"
" People.SOURCE ASC"
") source"
"LEFT JOIN ("
" SELECT P2.CATEGORY AS P2__CATEGORY,"
" avg(REVIEWS.RATING) AS avg"
" FROM REVIEWS"
" LEFT JOIN PRODUCTS P2"
" ON REVIEWS.PRODUCT_ID = P2.ID"
" GROUP BY P2.CATEGORY"
") Q2"
" ON source.P1__CATEGORY = Q2.P2__CATEGORY"
"LIMIT 2"]
(str/join " ")
even-prettier-sql)
(-> (mega-query)
mbql->native
even-prettier-sql)))))
......@@ -518,3 +518,63 @@
:alias "CategoriesStats"
:fields :all}]
:limit 3})))))))
(deftest join-source-queries-with-joins-test
(testing "Should be able to join against source queries that themselves contain joins (#12928)"
;; sample-dataset doesn't work on Redshift yet -- see #14784
(mt/test-drivers (disj (mt/normal-drivers-with-feature :nested-queries :left-join :foreign-keys) :redshift)
(mt/dataset sample-dataset
(testing "(#12928)"
(let [query (mt/mbql-query orders
{:source-query {:source-table $$orders
:joins [{:fields :all
:source-table $$products
:condition [:= $orders.product_id &P1.products.id]
:alias "P1"}
{:fields :all
:source-table $$people
:condition [:= $orders.user_id &People.people.id]
:alias "People"}]
:aggregation [[:count]]
:breakout [&P1.products.category
[:field %people.source {:join-alias "People"}]]}
:joins [{:fields :all
:condition [:= $products.category &Q2.products.category]
:alias "Q2"
:source-query {:source-table $$reviews
:joins [{:fields :all
:source-table $$products
:condition [:=
$reviews.product_id
&P2.products.id]
:alias "P2"}]
:aggregation [[:avg $reviews.rating]]
:breakout [&P2.products.category]}}]
:limit 2})]
(is (= [["Doohickey" "Affiliate" 783 "Doohickey" 3]
["Doohickey" "Facebook" 816 "Doohickey" 3]]
(mt/formatted-rows [str str int str int]
(qp/process-query query))))))
(testing "and custom expressions (#13649)"
(let [query (mt/mbql-query orders
{:source-query {:source-table $$orders
:aggregation [[:count]]
:breakout [$product_id]
:filter [:= $product_id 4]}
:joins [{:fields :all
:source-query {:source-table $$orders
:aggregation [[:count]]
:breakout [$product_id]
:filter [:and
[:= $product_id 4]
[:> $quantity 3]]}
:condition [:= $product_id &Q2.orders.product_id]
:alias "Q2"}]
:expressions {:expr [:/
[:field "count" {:base-type :type/BigInteger, :join-alias "Q2"}]
[:field "count" {:base-type :type/BigInteger}]]}
:limit 2})]
(is (= [[4 41 0.46 41]]
(mt/formatted-rows [int int 2.0 int]
(qp/process-query query))))))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment