diff --git a/modules/drivers/bigquery/src/metabase/driver/bigquery.clj b/modules/drivers/bigquery/src/metabase/driver/bigquery.clj index 4c2a80c083d6a9202e0952a6894df36ff4bcb0e5..3a1b4ba72daed0e499a11ab84029c80bc34427f7 100644 --- a/modules/drivers/bigquery/src/metabase/driver/bigquery.clj +++ b/modules/drivers/bigquery/src/metabase/driver/bigquery.clj @@ -58,14 +58,10 @@ (s/defn ^:private dataset-name-for-current-query :- BigQueryIdentifierString "Fetch the dataset name for the database associated with this query, needed because BigQuery requires you to qualify identifiers with it. This is primarily called automatically for the `to-sql` implementation of the - `BigQueryIdentifier` record type; see its definition for more details. - - This looks for the value inside the SQL QP's `*query*` dynamic var; since this won't be bound for non-MBQL queries, - you will want to avoid this function for SQL queries." + `BigQueryIdentifier` record type; see its definition for more details." [] - (or (some-> sql.qp/*query* :dataset-id) - (when (qp.store/initialized?) - (some-> (qp.store/database) :details :dataset-id)))) + (when (qp.store/initialized?) + (some-> (qp.store/database) :details :dataset-id))) ;;; +----------------------------------------------------------------------------------------------------------------+ @@ -412,25 +408,17 @@ ((partial apply h/merge-select) (for [field-clause breakout-field-clauses :when (not (contains? (set fields-field-clauses) field-clause))] (sql.qp/as driver field-clause))))) -(defn- ag-ref->alias [[_ index]] - (let [{{aggregations :aggregation} :query} sql.qp/*query* - [ag-type :as ag] (nth aggregations index)] - (mbql.u/match-one ag - [:distinct _] :count - [:expression operator & _] operator - [:named _ ag-name] (keyword ag-name) - [ag-type & _] ag-type))) - -(defmethod sql.qp/apply-top-level-clause [:bigquery :order-by] - [driver _ honeysql-form {subclauses :order-by, :as query}] - (loop [honeysql-form honeysql-form, [[direction field-clause] & more] subclauses] - (let [honeysql-form (h/merge-order-by honeysql-form [(if (mbql.u/is-clause? :aggregation field-clause) - (ag-ref->alias field-clause) - (sql.qp/field-clause->alias driver field-clause)) - direction])] - (if (seq more) - (recur honeysql-form more) - honeysql-form)))) + +;; as with breakouts BigQuery requires that you use the Field aliases in order by clauses, so override the methods for +;; compiling `:asc` and `:desc` and alias the Fields if applicable +(defn- alias-order-by-field [driver [direction field-clause]] + (let [field-clause (if (mbql.u/is-clause? :aggregation field-clause) + field-clause + (sql.qp/field-clause->alias driver field-clause))] + ((get-method sql.qp/->honeysql [:sql direction]) driver [direction field-clause]))) + +(defmethod sql.qp/->honeysql [:bigquery :asc] [driver clause] (alias-order-by-field driver clause)) +(defmethod sql.qp/->honeysql [:bigquery :desc] [driver clause] (alias-order-by-field driver clause)) ;;; +----------------------------------------------------------------------------------------------------------------+ diff --git a/modules/drivers/bigquery/test/metabase/driver/bigquery_test.clj b/modules/drivers/bigquery/test/metabase/driver/bigquery_test.clj index ca5581ed0ee71b3c24614b3cecd7723e2f4d6758..775c6feb9e245fd3af024c14b189e15e98fdfe98 100644 --- a/modules/drivers/bigquery/test/metabase/driver/bigquery_test.clj +++ b/modules/drivers/bigquery/test/metabase/driver/bigquery_test.clj @@ -9,16 +9,19 @@ [util :as u]] [metabase.db.metadata-queries :as metadata-queries] [metabase.driver.bigquery :as bigquery] + [metabase.driver.sql.query-processor :as sql.qp] [metabase.mbql.util :as mbql.u] [metabase.models [database :refer [Database]] [field :refer [Field]] [table :refer [Table]]] + [metabase.query-processor.test-util :as qp.test-util] [metabase.test [data :as data] [util :as tu]] [metabase.test.data.datasets :refer [expect-with-driver]] [metabase.test.util.timezone :as tu.tz] + [metabase.util.honeysql-extensions :as hx] [toucan.util.test :as tt])) ;; Test native queries @@ -238,3 +241,30 @@ :limit 1} :info {:executed-by 1000 :query-hash (byte-array [1 2 3 4])}})) + +;; let's make sure we're generating correct HoneySQL + SQL for aggregations +(expect-with-driver :bigquery + {:select [[(hx/identifier :field "test_data.venues" "price") (hx/identifier :field-alias "price")] + [(hsql/call :avg (hx/identifier :field "test_data.venues" "category_id")) (hx/identifier :field-alias "avg")]] + :from [(hx/identifier :table "test_data.venues")] + :group-by [(hx/identifier :field-alias "price")] + :order-by [[(hx/identifier :field-alias "avg") :asc]]} + (qp.test-util/with-everything-store + (#'sql.qp/mbql->honeysql + :bigquery + (data/mbql-query venues + {:aggregation [[:avg $category_id]] + :breakout [$price] + :order-by [[:asc [:aggregation 0]]]})))) + +(expect-with-driver :bigquery + {:query (str "SELECT `test_data.venues`.`price` AS `price`," + " avg(`test_data.venues`.`category_id`) AS `avg` " + "FROM `test_data.venues` " + "GROUP BY `price` " + "ORDER BY `avg` ASC, `price` ASC") + :table-name "venues" + :mbql? true} + (qp/query->native + (data/mbql-query venues + {:aggregation [[:avg $category_id]], :breakout [$price], :order-by [[:asc [:aggregation 0]]]}))) diff --git a/modules/drivers/oracle/src/metabase/driver/oracle.clj b/modules/drivers/oracle/src/metabase/driver/oracle.clj index c8086edf565a82f16f1de2269764e2a01a41ef0f..4c09f7e0bcdba2a40707c0be44518cb45fe62f48 100644 --- a/modules/drivers/oracle/src/metabase/driver/oracle.clj +++ b/modules/drivers/oracle/src/metabase/driver/oracle.clj @@ -3,28 +3,25 @@ [set :as set] [string :as str]] [clojure.java.jdbc :as jdbc] - [clojure.tools.logging :as log] [honeysql.core :as hsql] [metabase [config :as config] - [driver :as driver] - [util :as u]] + [driver :as driver]] [metabase.driver.common :as driver.common] + [metabase.driver.sql + [query-processor :as sql.qp] + [util :as sql.u]] [metabase.driver.sql-jdbc [connection :as sql-jdbc.conn] [execute :as sql-jdbc.execute] [sync :as sql-jdbc.sync]] - [metabase.driver.sql.query-processor :as sql.qp] [metabase.driver.sql.util.unprepare :as unprepare] [metabase.util [date :as du] [honeysql-extensions :as hx] - [i18n :refer [trs]] - [ssh :as ssh]] - [schema.core :as s]) + [ssh :as ssh]]) (:import [java.sql ResultSet Types] - java.util.Date - metabase.util.honeysql_extensions.Identifier)) + java.util.Date)) (driver/register! :oracle, :parent :sql-jdbc) @@ -160,76 +157,6 @@ (defmethod sql.qp/unix-timestamp->timestamp [:oracle :milliseconds] [driver _ field-or-value] (sql.qp/unix-timestamp->timestamp driver :seconds (hx// field-or-value (hsql/raw 1000)))) - -(s/defn ^:private increment-identifier-string :- s/Str - [last-component :- s/Str] - (if-let [[_ existing-suffix] (re-find #"^.*_(\d+$)" last-component)] - ;; if last-component already has an alias like col_2 then increment it to col_3 - (let [new-suffix (str (inc (Integer/parseInt existing-suffix)))] - (str/replace last-component (re-pattern (str existing-suffix \$)) new-suffix)) - ;; otherwise just stick a _2 on the end so it's col_2 - (str last-component "_2"))) - -(s/defn ^:private increment-identifier - "Add an appropriate suffix to a keyword `identifier` to make it distinct from previous usages of the same identifier, - e.g. - - (increment-identifier :my_col) ; -> :my_col_2 - (increment-identifier :my_col_2) ; -> :my_col_3" - [identifier :- Identifier] - (update - identifier - :components - (fn [components] - (conj - (vec (butlast components)) - (increment-identifier-string (u/keyword->qualified-name (last components))))))) - -(defn- alias-everything - "Make sure all the columns in `select-clause` are alias forms, e.g. `[:table.col :col]` instead of `:table.col`. - (This faciliates our deduplication logic.)" - [select-clause] - (for [col select-clause] - (cond - ;; if something's already an alias form like [:table.col :col] it's g2g - (sequential? col) - col - - ;; otherwise we *should* be dealing with an Identifier. If so, take the last component of the Identifier and use - ;; that as the alias. - ;; - ;; TODO - could this be done using `->honeysql` or `field->alias` instead? - (instance? Identifier col) - [col (hx/identifier :field-alias (last (:components col)))] - - :else - (do - (log/error (trs "Don't know how to alias {0}, expected an Identifer." col)) - [col col])))) - -(defn- deduplicate-identifiers - "Make sure every column in `select-clause` has a unique alias. This is done because Oracle can't figure out how to use - a query that produces duplicate columns in a subselect." - [select-clause] - (if (= select-clause [:*]) - ;; if we're doing `SELECT *` there's no way we can deduplicate anything so we're SOL, return as-is - select-clause - ;; otherwise we can actually deduplicate things - (loop [already-seen #{}, acc [], [[col alias] & more] (alias-everything select-clause)] - (cond - ;; if not more cols are left to deduplicate, we're done - (not col) - acc - - ;; otherwise if we've already used this alias, replace it with one like `identifier_2` and try agan - (contains? already-seen alias) - (recur already-seen acc (cons [col (increment-identifier alias)] - more)) - - ;; otherwise if we haven't seen it record it as seen and move on to the next column - :else - (recur (conj already-seen alias) (conj acc [col alias]) more))))) - ;; Oracle doesn't support `LIMIT n` syntax. Instead we have to use `WHERE ROWNUM <= n` (`NEXT n ROWS ONLY` isn't ;; supported on Oracle versions older than 12). This has to wrap the actual query, e.g. ;; @@ -267,7 +194,7 @@ ;; back to including a `SELECT *` just to make sure a valid query is produced :from [(-> (merge {:select [:*]} honeysql-query) - (update :select deduplicate-identifiers))] + (update :select sql.u/select-clause-deduplicate-aliases))] :where [:<= (hsql/raw "rownum") value]}) (defmethod sql.qp/apply-top-level-clause [:oracle :page] diff --git a/modules/drivers/oracle/test/metabase/driver/oracle_test.clj b/modules/drivers/oracle/test/metabase/driver/oracle_test.clj index 742feb50efef31ee3701b466fcbfb566d7b8eb3b..1d593b5ec33fc9a1967d6ebd3caeb8a22d0dd00c 100644 --- a/modules/drivers/oracle/test/metabase/driver/oracle_test.clj +++ b/modules/drivers/oracle/test/metabase/driver/oracle_test.clj @@ -2,16 +2,18 @@ "Tests for specific behavior of the Oracle driver." (:require [clojure.java.jdbc :as jdbc] [expectations :refer [expect]] + [honeysql.core :as hsql] [metabase [driver :as driver] [query-processor :as qp] [query-processor-test :as qp.test] [util :as u]] - [metabase.driver.oracle :as oracle] [metabase.driver.sql-jdbc.connection :as sql-jdbc.conn] + [metabase.driver.sql.query-processor :as sql.qp] [metabase.models [field :refer [Field]] [table :refer [Table]]] + [metabase.query-processor.test-util :as qp.test-util] [metabase.test [data :as data] [util :as tu]] @@ -69,36 +71,6 @@ :service-name "MyCoolService" :sid "ORCL"})) -;; `deduplicate-identifiers` should use the last component of an identifier as the alias if it does not already have -;; one -(expect - [[(hx/identifier :field "A" "B" "C" "D") (hx/identifier :field-alias "D")] - [(hx/identifier :field "F") (hx/identifier :field-alias "G")]] - (#'oracle/deduplicate-identifiers - [(hx/identifier :field "A" "B" "C" "D") - [(hx/identifier :field "F") (hx/identifier :field-alias "G")]])) - -;; `deduplicate-identifiers` should append numeric suffixes to duplicate aliases -(expect - [[(hx/identifier :field "A" "B" "C" "D") (hx/identifier :field-alias "D")] - [(hx/identifier :field "E" "D") (hx/identifier :field-alias "D_2")] - [(hx/identifier :field "F") (hx/identifier :field-alias "G")]] - (#'oracle/deduplicate-identifiers - [(hx/identifier :field "A" "B" "C" "D") - (hx/identifier :field "E" "D") - [(hx/identifier :field "F") (hx/identifier :field-alias "G")]])) - -;; `deduplicate-identifiers` should handle aliases that are already suffixed gracefully -(expect - [[(hx/identifier :field "A" "B" "C" "D") (hx/identifier :field-alias "D")] - [(hx/identifier :field "E" "D") (hx/identifier :field-alias "D_2")] - [(hx/identifier :field "F") (hx/identifier :field-alias "D_3")]] - (#'oracle/deduplicate-identifiers - [(hx/identifier :field "A" "B" "C" "D") - (hx/identifier :field "E" "D") - [(hx/identifier :field "F") (hx/identifier :field-alias "D_2")]])) - - (expect com.jcraft.jsch.JSchException (let [engine :oracle @@ -157,3 +129,48 @@ {:database (data/id) :type :query :query {:source-table (u/get-id table)}})))))) + +;; let's make sure we're actually attempting to generate the correctl HoneySQL for joins and source queries so we +;; don't sit around scratching our heads wondering why the queries themselves aren't working + +(expect-with-driver :oracle + {:select [:*] + :from + [{:select + [[(hx/identifier :field oracle.tx/session-schema "venues" "id") (hx/identifier :field-alias "id")] + [(hx/identifier :field oracle.tx/session-schema "venues" "name") (hx/identifier :field-alias "name")] + [(hx/identifier :field oracle.tx/session-schema "venues" "category_id") (hx/identifier :field-alias "category_id")] + [(hx/identifier :field oracle.tx/session-schema "venues" "latitude") (hx/identifier :field-alias "latitude")] + [(hx/identifier :field oracle.tx/session-schema "venues" "longitude") (hx/identifier :field-alias "longitude")] + [(hx/identifier :field oracle.tx/session-schema "venues" "price") (hx/identifier :field-alias "price")]] + + :from [(hx/identifier :table oracle.tx/session-schema "venues")] + :left-join [[(hx/identifier :table oracle.tx/session-schema "categories") "test_data_categories__via__cat"] + [:= + (hx/identifier :field oracle.tx/session-schema "venues" "category_id") + (hx/identifier :field "test_data_categories__via__cat" "id")]] + :where [:= + (hx/identifier :field "test_data_categories__via__cat" "name") + "BBQ"] + :order-by [[(hx/identifier :field oracle.tx/session-schema "venues" "id") :asc]]}] + + :where [:<= (hsql/raw "rownum") 100]} + (qp.test-util/with-everything-store + (#'sql.qp/mbql->honeysql + :oracle + (data/mbql-query venues + {:source-table $$venues + :order-by [[:asc $id]] + :filter [:= + [:joined-field "test_data_categories__via__cat" $categories.name] + [:value "BBQ" {:base_type :type/Text, :special_type :type/Name, :database_type "VARCHAR"}]] + :fields [$id $name $category_id $latitude $longitude $price] + :limit 100 + :joins [{:source-table $$categories + :alias "test_data_categories__via__cat", + :strategy :left-join + :condition [:= + $category_id + [:joined-field "test_data_categories__via__cat" $categories.id]] + :fk-field-id (data/id :venues :category_id) + :fields :none}]})))) diff --git a/modules/drivers/oracle/test/metabase/test/data/oracle.clj b/modules/drivers/oracle/test/metabase/test/data/oracle.clj index a50415267b26377897ff758965fa74979bf1c4c9..85194428f4f7a36df9d6eb86fe03763f957aa4fe 100644 --- a/modules/drivers/oracle/test/metabase/test/data/oracle.clj +++ b/modules/drivers/oracle/test/metabase/test/data/oracle.clj @@ -23,7 +23,7 @@ ;; PUBLIC.CHECKINS.USER_ID | CAM_195.test_data_checkins.user_id ;; PUBLIC.INCIDENTS.TIMESTAMP | CAM_195.sad_toucan_incidents.timestamp (defonce ^:private session-schema-number (rand-int 200)) -(defonce ^:private session-schema (str "CAM_" session-schema-number)) +(defonce session-schema (str "CAM_" session-schema-number)) (defonce ^:private session-password (apply str (repeatedly 16 #(rand-nth (map char (range (int \a) (inc (int \z)))))))) ;; Session password is only used when creating session user, not anywhere else diff --git a/src/metabase/driver/sql/query_processor.clj b/src/metabase/driver/sql/query_processor.clj index 2475da55529cdcf21c136352bb62e4dcd4be8d4d..8473dfe118479efc5a03a7e79166db1f9fc52242 100644 --- a/src/metabase/driver/sql/query_processor.clj +++ b/src/metabase/driver/sql/query_processor.clj @@ -24,12 +24,13 @@ [i18n :refer [tru]] [schema :as su]] [schema.core :as s]) - (:import honeysql.format.ToSql - metabase.util.honeysql_extensions.Identifier)) + (:import metabase.util.honeysql_extensions.Identifier)) ;; TODO - yet another `*query*` dynamic var. We should really consolidate them all so we only need a single one. (def ^:dynamic *query* - "The outer query currently being processed." + "The outer query currently being processed. + (This is only used to power `[:aggregation <index>]` and expression references, because they need to be able to find + the corresponding clauses outside of where they're being processed.)" nil) (def ^:dynamic *nested-query-level* @@ -263,26 +264,29 @@ [driver [_ pred]] (hsql/call :/ (->honeysql driver [:count-where pred]) :%count.*)) +;; actual handling of the name is done in the top-level clause handler for aggregations (defmethod ->honeysql [:sql :named] [driver [_ ag ag-name]] (->honeysql driver ag)) ;; aggregation REFERENCE e.g. the ["aggregation" 0] fields we allow in order-by (defmethod ->honeysql [:sql :aggregation] [driver [_ index]] - (let [aggregation (mbql.u/aggregation-at-index *query* index *nested-query-level*)] - (cond - ;; For some arcane reason we name the results of a distinct aggregation "count", - ;; everything else is named the same as the aggregation - (mbql.u/is-clause? :distinct aggregation) - :count + (mbql.u/match-one (mbql.u/aggregation-at-index *query* index *nested-query-level*) + [:named _ ag-name] + (->honeysql driver (hx/identifier :field-alias ag-name)) - (mbql.u/is-clause? #{:+ :- :* :/} aggregation) - (->honeysql driver aggregation) + ;; For some arcane reason we name the results of a distinct aggregation "count", everything else is named the + ;; same as the aggregation + :distinct + (->honeysql driver (hx/identifier :field-alias :count)) - ;; for everything else just use the name of the aggregation as an identifer, e.g. `:sum` - ;; TODO - this obviously doesn't work right for multiple aggregations of the same type - :else - (first aggregation)))) + #{:+ :- :* :/} + (->honeysql driver &match) + + ;; for everything else just use the name of the aggregation as an identifer, e.g. `:sum` + ;; TODO - this obviously doesn't work right for multiple aggregations of the same type + [ag-type & _] + (->honeysql driver (hx/identifier :field-alias ag-type)))) (defmethod ->honeysql [:sql :absolute-datetime] [driver [_ timestamp unit]] @@ -303,7 +307,7 @@ ;;; | Field Aliases (AS Forms) | ;;; +----------------------------------------------------------------------------------------------------------------+ -(s/defn field-clause->alias :- (s/maybe ToSql) +(s/defn field-clause->alias "Generate HoneySQL for an approriate alias (e.g., for use with SQL `AN`) for a Field clause of any type, or `nil` if the Field should not be aliased (e.g. if `field->alias` returns `nil`)." [driver, field-clause :- mbql.s/Field] @@ -350,16 +354,12 @@ (defmethod apply-top-level-clause [:sql :aggregation] [driver _ honeysql-form {aggregations :aggregation}] - (loop [form honeysql-form, [ag & more] aggregations] - (let [form (h/merge-select - form - [(->honeysql driver ag) - (->honeysql driver (hx/identifier - :field-alias - (driver/format-custom-field-name driver (annotate/aggregation-name ag))))])] - (if-not (seq more) - form - (recur form more))))) + (let [honeysql-ags (for [ag aggregations] + [(->honeysql driver ag) + (->honeysql driver (hx/identifier + :field-alias + (driver/format-custom-field-name driver (annotate/aggregation-name ag))))])] + (reduce h/merge-select honeysql-form honeysql-ags))) ;;; ----------------------------------------------- breakout & fields ------------------------------------------------ @@ -446,42 +446,59 @@ (declare build-honeysql-form) -(defn- make-honeysql-join-clauses - "Returns a seq of honeysql join clauses, joining to `table-or-query-expr`. `jt-or-jq` can be either a `JoinTable` or - a `JoinQuery`" - [driver table-or-query-expr {:keys [join-alias fk-field-id pk-field-id]}] - (let [source-field (qp.store/field fk-field-id) - pk-field (qp.store/field pk-field-id)] - [[table-or-query-expr (->honeysql driver (hx/identifier :table-alias join-alias))] - [:= - (->honeysql driver source-field) - (binding [*table-alias* join-alias] - (->honeysql driver pk-field))]])) - -(s/defn ^:private join-info->honeysql - [driver , {:keys [query table-id], :as info} :- mbql.s/JoinInfo] - (if query - (make-honeysql-join-clauses driver (build-honeysql-form driver query) info) - (let [table (qp.store/table table-id) - table-identifier (binding [*table-alias* nil] - (->honeysql driver table))] - (make-honeysql-join-clauses driver table-identifier info)))) - -(defmethod apply-top-level-clause [:sql :join-tables] - [driver _ honeysql-form {:keys [join-tables]}] - (reduce (partial apply h/merge-left-join) honeysql-form (map (partial join-info->honeysql driver) join-tables))) +(defmulti join->honeysql + "Compile a single MBQL `join` to HoneySQL." + {:arglists '([driver join]), :style/indent 1} + driver/dispatch-on-initialized-driver + :hierarchy #'driver/hierarchy) + +(defmulti join-source + "Generate HoneySQL for a table or query to be joined." + {:arglists '([driver join]), :style/indent 1} + driver/dispatch-on-initialized-driver + :hierarchy #'driver/hierarchy) + +(defmethod join-source :sql + [driver {:keys [source-table source-query]}] + (if source-query + (build-honeysql-form driver source-query) + (binding [*table-alias* nil] + (->honeysql driver (qp.store/table source-table))))) + +(s/defmethod join->honeysql :sql + [driver, {:keys [condition alias], :as join} :- mbql.s/Join] + [[(join-source driver join) (->honeysql driver (hx/identifier :table-alias alias))] + (->honeysql driver condition)]) + +(def ^:private join-strategy->merge-fn + {:left-join h/merge-left-join + :right-join h/merge-right-join + :inner-join h/merge-join + :full-join h/merge-full-join}) + +(defmethod apply-top-level-clause [:sql :joins] + [driver _ honeysql-form {:keys [joins]}] + (reduce + (fn [honeysql-form {:keys [strategy], :as join}] + (apply (join-strategy->merge-fn strategy) honeysql-form (join->honeysql driver join))) + honeysql-form + joins)) ;;; ---------------------------------------------------- order-by ---------------------------------------------------- +(defmethod ->honeysql [:sql :asc] + [driver [direction field]] + [(->honeysql driver field) direction]) + +(defmethod ->honeysql [:sql :desc] + [driver [direction field]] + [(->honeysql driver field) direction]) + (defmethod apply-top-level-clause [:sql :order-by] - [driver _ honeysql-form {subclauses :order-by breakout-fields :breakout}] - (let [[{:keys [special-type] :as first-breakout-field}] breakout-fields] - (loop [honeysql-form honeysql-form, [[direction field] & more] subclauses] - (let [honeysql-form (h/merge-order-by honeysql-form [(->honeysql driver field) direction])] - (if (seq more) - (recur honeysql-form more) - honeysql-form))))) + [driver _ honeysql-form {subclauses :order-by}] + (reduce h/merge-order-by honeysql-form (map (partial ->honeysql driver) + subclauses))) ;;; -------------------------------------------------- limit & page -------------------------------------------------- @@ -514,29 +531,39 @@ (def ^:private top-level-clause-application-order "Order to apply top-level clauses in. This is important because we build things like the `SELECT` clause progressively - and MBQL requires us to return results with `:breakout` columns before `:aggregation`, etc." - [:source-table :breakout :aggregation :fields :filter :join-tables :order-by :page :limit]) + and MBQL requires us to return results with `:breakout` columns before `:aggregation`, etc. + + Map of clause -> index, e.g. + + {:source-table 0, :breakout 1, ...}" + (into {} (map-indexed + #(vector %2 %1) + [:source-table :breakout :aggregation :fields :filter :joins :order-by :page :limit]))) (defn- query->keys-in-application-order "Return the keys present in an MBQL `inner-query` in the order they should be processed." [inner-query] ;; sort first by any known top-level clauses according to the `top-level-application-clause-order` defined above, ;; then sort any unknown clauses by name. - (let [known-clause->index (into {} (map-indexed (fn [i clause] [clause i]) top-level-clause-application-order))] - ;; We'll do this using a [<known-applicaton-order-index> <clause-name-keyword>] tuple - (sort-by (fn [clause] [(known-clause->index clause Integer/MAX_VALUE) clause]) (keys inner-query)))) + (sort-by (fn [clause] [(get top-level-clause-application-order clause Integer/MAX_VALUE) clause]) + (keys inner-query))) + +(defn- add-default-select + "Add `SELECT *` to `honeysql-form` if no `:select` clause is present." + [{:keys [select], :as honeysql-form}] + (cond-> honeysql-form + (empty? select) (assoc :select [:*]))) (defn- apply-top-level-clauses - "Loop through all the `clause->handler` entries; if the query contains a given clause, apply the handler fn. Doesn't - handle `:source-query`; since that must be handled in a special way, that is handled separately." + "`apply-top-level-clause` for all of the top-level clauses in `inner-query`, progressively building a HoneySQL form. + Clauses are applied according to the order in `top-level-clause-application-order`." [driver honeysql-form inner-query] - (loop [honeysql-form honeysql-form, [k & more] (query->keys-in-application-order inner-query)] - (let [honeysql-form (apply-top-level-clause driver k honeysql-form inner-query)] - (if (seq more) - (recur honeysql-form more) - ;; ok, we're done; if no `:select` clause was specified (for whatever reason) put a default (`SELECT *`) one - ;; in - (update honeysql-form :select #(if (seq %) % [:*])))))) + (-> (reduce + (fn [honeysql-form k] + (apply-top-level-clause driver k honeysql-form inner-query)) + honeysql-form + (query->keys-in-application-order inner-query)) + add-default-select)) ;;; -------------------------------------------- Handling source queries --------------------------------------------- @@ -587,8 +614,8 @@ (s/defn build-honeysql-form "Build the HoneySQL form we will compile to SQL and execute." - [driverr, {inner-query :query} :- su/Map] - (u/prog1 (apply-clauses driverr {} inner-query) + [driver, {inner-query :query} :- su/Map] + (u/prog1 (apply-clauses driver {} inner-query) (when-not i/*disable-qp-logging* (log/debug (tru "HoneySQL Form:") (u/emoji "ðŸ¯") "\n" (u/pprint-to-str 'cyan <>))))) @@ -613,11 +640,14 @@ (throw e)))] (into [sql] args))) +(defn- mbql->honeysql [driver outer-query] + (binding [*query* outer-query] + (build-honeysql-form driver outer-query))) + (defn mbql->native "Transpile MBQL query into a native SQL statement." [driver {inner-query :query, database :database, :as outer-query}] - (binding [*query* outer-query] - (let [honeysql-form (build-honeysql-form driver outer-query) - [sql & args] (honeysql-form->sql+args driver honeysql-form)] - {:query sql - :params args}))) + (let [honeysql-form (mbql->honeysql driver outer-query) + [sql & args] (honeysql-form->sql+args driver honeysql-form)] + {:query sql + :params args})) diff --git a/src/metabase/driver/sql/util.clj b/src/metabase/driver/sql/util.clj index 4806e3f4b43472d511200570e4be0009296e3d38..eec1af73e04228e778f2a4e069a8a01b5a718d83 100644 --- a/src/metabase/driver/sql/util.clj +++ b/src/metabase/driver/sql/util.clj @@ -1,9 +1,15 @@ (ns metabase.driver.sql.util "Utility functions for writing SQL drivers." - (:require [honeysql.core :as hsql] + (:require [clojure.string :as str] + [clojure.tools.logging :as log] + [honeysql.core :as hsql] [metabase.driver.sql.query-processor :as sql.qp] - [metabase.util.honeysql-extensions :as hx] - [schema.core :as s])) + [metabase.util :as u] + [metabase.util + [honeysql-extensions :as hx] + [i18n :refer [trs]]] + [schema.core :as s]) + (:import metabase.util.honeysql_extensions.Identifier)) (s/defn quote-name "Quote unqualified string or keyword identifier(s) by passing them to `hx/identifier`, then calling HoneySQL `format` @@ -21,3 +27,76 @@ (hsql/format (sql.qp/->honeysql driver (apply hx/identifier identifier-type components)) :quoting (sql.qp/quote-style driver) :allow-dashed-names? true))) + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | Deduplicate Field Aliases | +;;; +----------------------------------------------------------------------------------------------------------------+ + +(s/defn ^:private increment-identifier-string :- s/Str + [last-component :- s/Str] + (if-let [[_ existing-suffix] (re-find #"^.*_(\d+$)" last-component)] + ;; if last-component already has an alias like col_2 then increment it to col_3 + (let [new-suffix (str (inc (Integer/parseInt existing-suffix)))] + (str/replace last-component (re-pattern (str existing-suffix \$)) new-suffix)) + ;; otherwise just stick a _2 on the end so it's col_2 + (str last-component "_2"))) + +(s/defn ^:private increment-identifier + "Add an appropriate suffix to a keyword `identifier` to make it distinct from previous usages of the same identifier, + e.g. + + (increment-identifier :my_col) ; -> :my_col_2 + (increment-identifier :my_col_2) ; -> :my_col_3" + [identifier :- Identifier] + (update + identifier + :components + (fn [components] + (conj + (vec (butlast components)) + (increment-identifier-string (u/keyword->qualified-name (last components))))))) + +(defn select-clause-alias-everything + "Make sure all the columns in `select-clause` are alias forms, e.g. `[:table.col :col]` instead of `:table.col`. + (This faciliates our deduplication logic.)" + [select-clause] + (for [col select-clause] + (cond + ;; if something's already an alias form like [:table.col :col] it's g2g + (sequential? col) + col + + ;; otherwise we *should* be dealing with an Identifier. If so, take the last component of the Identifier and use + ;; that as the alias. + ;; + ;; TODO - could this be done using `->honeysql` or `field->alias` instead? + (instance? Identifier col) + [col (hx/identifier :field-alias (last (:components col)))] + + :else + (do + (log/error (trs "Don't know how to alias {0}, expected an Identifer." col)) + [col col])))) + +(defn select-clause-deduplicate-aliases + "Make sure every column in `select-clause` has a unique alias. This is useful for databases like Oracle that can't + figure out how to use a query that produces duplicate columns in a subselect." + [select-clause] + (if (= select-clause [:*]) + ;; if we're doing `SELECT *` there's no way we can deduplicate anything so we're SOL, return as-is + select-clause + ;; otherwise we can actually deduplicate things + (loop [already-seen #{}, acc [], [[col alias] & more] (select-clause-alias-everything select-clause)] + (cond + ;; if not more cols are left to deduplicate, we're done + (not col) + acc + + ;; otherwise if we've already used this alias, replace it with one like `identifier_2` and try agan + (contains? already-seen alias) + (recur already-seen acc (cons [col (increment-identifier alias)] + more)) + + ;; otherwise if we haven't seen it record it as seen and move on to the next column + :else + (recur (conj already-seen alias) (conj acc [col alias]) more))))) diff --git a/src/metabase/mbql/schema.clj b/src/metabase/mbql/schema.clj index 4423d4467887a393e9dffeca2925a4bc4c9e8e5d..20a91b0d440035a749edc7f6534890ae27883ce9 100644 --- a/src/metabase/mbql/schema.clj +++ b/src/metabase/mbql/schema.clj @@ -5,6 +5,7 @@ [core :as core] [set :as set]] [metabase.mbql.schema.helpers :refer [defclause is-clause? one-of]] + [metabase.util :as u] [metabase.util [date :as du] [schema :as su]] @@ -526,38 +527,81 @@ (set/rename-keys NativeQuery {:query :native}) (s/recursive #'MBQLQuery))) -(def JoinTableInfo - "Schema for information about a JOIN (or equivalent) that should be performed, and how to do it.. This is added - automatically by `resolve-joined-tables` middleware for `fk->` forms that are encountered." - { ;; The alias we should use for the table - :join-alias su/NonBlankString - ;; ID of the Table to JOIN against. Table will be present in the QP store - :table-id su/IntGreaterThanZero - ;; ID of the Field of the Query's SOURCE TABLE to use for the JOIN - ;; TODO - can `fk-field-id` and `pk-field-id` possibly be NAMES of FIELD LITERALS?? - :fk-field-id su/IntGreaterThanZero - ;; ID of the Field on the Table we will JOIN (i.e., Table with `table-id`) to use for the JOIN - :pk-field-id su/IntGreaterThanZero}) - -(def JoinQueryInfo - "Schema for information about about a JOIN (or equivalent) that should be performed using a recursive MBQL or native - query." - ;; Similar to a `JoinTable` but instead of referencing a table, it references a query expression - (assoc JoinTableInfo - :query (s/recursive #'Query))) - -(def JoinInfo - "Schema for information about a JOIN (or equivalent) that needs to be performed, either `JoinTableInfo` or - `JoinQueryInfo`." - (s/if :query - JoinQueryInfo - JoinTableInfo)) - -(def ^:private JoinInfos +(def JoinStrategy + "Strategy that should be used to perform the equivalent of a SQL `JOIN` against another table or a nested query. + These correspond 1:1 to features of the same name in driver features lists; e.g. you should check that the current + driver supports `:full-join` before generating a Join clause using that strategy." + (s/enum :left-join :right-join :inner-join :full-join)) + +(def Join + "Perform the equivalent of a SQL `JOIN` with another Table or nested `:source-query`. JOINs are either explicitly + specified in the incoming query, or implicitly generated when one uses a `:fk->` clause. + In the top-level query, you can reference Fields from the joined table or nested query by the `:fk->` clause for + implicit joins; for explicit joins, you *must* specify `:alias` yourself; you can then reference Fields by using a + `:joined-field` clause, e.g. + [:joined-field \"my_join_alias\" [:field-id 1]] ; for joins against other Tabless + [:joined-field \"my_join_alias\" [:field-literal \"my_field\"]] ; for joins against nested queries" + (-> + { ;; The condition on which to JOIN. Can be anything that is a valid `:filter` clause. For automatically-generated + ;; JOINs this is always + ;; + ;; [:= <source-table-fk-field> [:joined-field <join-table-alias> <dest-table-pk-field>]] + ;; + :condition Filter + ;; + ;; *What* to JOIN. Self-joins can be done by using the same `:source-table` as in the query where this is specified. + ;; YOU MUST SUPPLY EITHER `:source-table` OR `:source-query`, BUT NOT BOTH! + (s/optional-key :source-table) su/IntGreaterThanZero + (s/optional-key :source-query) (s/recursive #'Query) + ;; + ;; Defaults to `:left-join`; used for all automatically-generated JOINs + ;; + ;; Driver implementations: this is guaranteed to be present after pre-processing. + (s/optional-key :strategy) JoinStrategy + ;; + ;; The Fields to include in the results *if* a top-level `:fields` clause *is not* specified. This can be either + ;; `:none`, `:all`, or a sequence of Field clauses. + ;; + ;; * `:none`: no Fields from the joined table or nested query are included (unless indirectly included by + ;; breakouts or other clauses). This is the default, and what is used for automatically-generated joins. + ;; + ;; * `:all`: will include all of the Fields from the joined table or query + ;; + ;; * a sequence of Field clauses: include only the Fields specified. Valid clauses are the same as the top-level + ;; `:fields` clause. This should be non-empty and all elements should be distinct. The normalizer will + ;; automatically remove duplicate fields for you, and replace empty clauses with `:none`. + ;; + ;; Driver implementations: you can ignore this clause. Relevant fields will be added to top-level `:fields` clause + ;; with appropriate aliases. + (s/optional-key :fields) (s/cond-pre + (s/enum :all :none) + (su/distinct (su/non-empty [Field]))) + ;; + ;; The name used to alias the joined table or query. This is usually generated automatically and generally looks + ;; like `table__via__field`. You can specify this yourself if you need to reference a joined field in a + ;; `:joined-field` clause. + ;; + ;; Driver implementations: This is guaranteed to be present after pre-processing. + (s/optional-key :alias) su/NonBlankString + ;; + ;; Used internally, only for annotation purposes in post-processing. When a join is implicitly generated via an + ;; `:fk->` clause, the ID of the foreign key field in the source Table will be recorded here. This information is + ;; used to add `fk_field_id` information to the `:cols` in the query results; I believe this is used to facilitate + ;; drill-thru? :shrug: + ;; + ;; Don't set this information yourself. It will have no effect. + (s/optional-key :fk-field-id) (s/maybe su/IntGreaterThanZero)} + (s/constrained + (fn [{:keys [source-table source-query]}] + (u/xor source-table source-query)) + "Joins can must have either a `source-table` or `source-query`, but not both."))) + +(def Joins + "Schema for a valid sequence of `Join`s. Must be a non-empty sequence, and `:alias`, if specified, must be unique." (s/constrained - (su/distinct [JoinInfo]) - #(su/empty-or-distinct? (filter some? (map :join-alias %))) - "all join aliases must be distinct.")) + (su/non-empty [Join]) + #(su/empty-or-distinct? (filter some? (map :alias %))) + "All join aliases must be unique.")) (def ^java.util.regex.Pattern source-table-card-id-regex "Pattern that matches `card__id` strings that can be used as the `:source-table` of MBQL queries." @@ -574,7 +618,7 @@ (s/optional-key :source-table) SourceTable (s/optional-key :aggregation) (su/non-empty [Aggregation]) (s/optional-key :breakout) (su/non-empty [Field]) - ; TODO - expressions keys should be strings; fix this when we get a chance + ;; TODO - expressions keys should be strings; fix this when we get a chance (s/optional-key :expressions) {s/Keyword FieldOrExpressionDef} (s/optional-key :fields) (su/distinct (su/non-empty [Field])) (s/optional-key :filter) Filter @@ -588,7 +632,7 @@ :items su/IntGreaterThanZero} ;; Various bits of middleware add additonal keys, such as `fields-is-implicit?`, to record bits of state or pass ;; info to other pieces of middleware. Everyone else can ignore them. - (s/optional-key :join-tables) JoinInfos + (s/optional-key :joins) Joins s/Keyword s/Any} (s/constrained diff --git a/src/metabase/mbql/schema/helpers.clj b/src/metabase/mbql/schema/helpers.clj index b558303446d709457fea5e10bf5f48c6605b1712..cee98b80f0ef08d2e6d09aad096cb5c7a0d65847 100644 --- a/src/metabase/mbql/schema/helpers.clj +++ b/src/metabase/mbql/schema/helpers.clj @@ -59,7 +59,7 @@ ;; TODO - this is a copy of the one in the `metabase.mbql.util` namespace. We need to reorganize things a bit so we ;; can use the same fn and avoid circular refs -(defn ^:deprecated is-clause? +(defn is-clause? "If `x` an MBQL clause, and an instance of clauses defined by keyword(s) `k-or-ks`? (is-clause? :count [:count 10]) ; -> true diff --git a/src/metabase/mbql/util.clj b/src/metabase/mbql/util.clj index 61cc95c209afe56cbc2525b52a134779aa8e83fe..acec8d6b935b49dd1e5978ce5009b65b7c04954a 100644 --- a/src/metabase/mbql/util.clj +++ b/src/metabase/mbql/util.clj @@ -354,6 +354,7 @@ number to as optional arg `nesting-level` to make sure you reference aggregations at the right level of nesting." ([query index] (aggregation-at-index query index 0)) + ([query :- mbql.s/Query, index :- su/NonNegativeInt, nesting-level :- su/NonNegativeInt] (if (zero? nesting-level) (or (nth (get-in query [:query :aggregation]) index) diff --git a/src/metabase/models/query/permissions.clj b/src/metabase/models/query/permissions.clj index e139b7ba29167d076e21f4a953e33508b0a6e22d..d51218de51f138e8a242c81e6d2f720cf0857f1b 100644 --- a/src/metabase/models/query/permissions.clj +++ b/src/metabase/models/query/permissions.clj @@ -33,17 +33,17 @@ ;; ↓ ↓ ;; tables->permissions-path-set source-card-read-perms -(defn- query->source-and-join-tables +(defn- query->source-and-joins "Return a sequence of all Tables (as TableInstance maps, or IDs) referenced by `query`." - [{:keys [source-table join-tables source-query native], :as query}] + [{:keys [source-table joins source-query native], :as query}] (cond ;; if we come across a native query just put a placeholder (`::native`) there so we know we need to add native ;; permissions to the complete set below. native [::native] ;; if we have a source-query just recur until we hit either the native source or the MBQL source source-query (recur source-query) - ;; for root MBQL queries just return source-table + join-tables - :else (cons source-table (map :table-id join-tables)))) + ;; for root MBQL queries just return source-table + joins + :else (cons source-table (map :source-table joins)))) (def ^:private PermsOptions "Map of options to be passed to the permissions checking functions." @@ -109,7 +109,7 @@ ;; otherwise if there's no source card then calculate perms based on the Tables referenced in the query (let [{:keys [query database]} (cond-> query (not already-preprocessed?) preprocess-query)] - (tables->permissions-path-set database (query->source-and-join-tables query)))) + (tables->permissions-path-set database (query->source-and-joins query)))) ;; if for some reason we can't expand the Card (i.e. it's an invalid legacy card) just return a set of permissions ;; that means no one will ever get to see it (except for superusers who get to see everything) (catch Throwable e diff --git a/src/metabase/query_processor/middleware/annotate.clj b/src/metabase/query_processor/middleware/annotate.clj index 77bf39150f9d14cdf58b410feb6742c375738479..205418c16a65f7245d66ea8e52a9d90fc73fa4af 100644 --- a/src/metabase/query_processor/middleware/annotate.clj +++ b/src/metabase/query_processor/middleware/annotate.clj @@ -62,13 +62,13 @@ ;;; | Adding :cols info for MBQL queries | ;;; +----------------------------------------------------------------------------------------------------------------+ -(s/defn ^:private join-with-alias :- (s/maybe mbql.s/JoinInfo) - [{{:keys [join-tables]} :query} :- su/Map, join-alias :- su/NonBlankString] +(s/defn ^:private join-with-alias :- (s/maybe mbql.s/Join) + [{{:keys [joins]} :query} :- su/Map, join-alias :- su/NonBlankString] (some - (fn [{alias :join-alias, :as join}] + (fn [{:keys [alias], :as join}] (when (= alias join-alias) join)) - join-tables)) + joins)) ;;; --------------------------------------------------- Field Info --------------------------------------------------- diff --git a/src/metabase/query_processor/middleware/resolve_joined_tables.clj b/src/metabase/query_processor/middleware/resolve_joined_tables.clj index 7872ab3ba40bc57159411cf7d16404410b686f51..10f70a4fb9ac48da69d4601e8129d09ae07eff32 100644 --- a/src/metabase/query_processor/middleware/resolve_joined_tables.clj +++ b/src/metabase/query_processor/middleware/resolve_joined_tables.clj @@ -114,19 +114,22 @@ (resolve-one-join-info clause pk-info)))) -;;; ---------------------------------------- Adding :join-tables to the query ---------------------------------------- +;;; ------------------------------------------- Adding :joins to the query ------------------------------------------- -(s/defn ^:private resolved-join-info->join-clause :- mbql.s/JoinInfo +(s/defn ^:private resolved-join-info->join-clause :- mbql.s/Join [{:keys [source-table alias fk-field-id pk-field-id]} :- ResolvedJoinInfo] - {:table-id source-table - :join-alias alias + {:source-table source-table + :alias alias + :strategy :left-join + :condition [:= [:field-id fk-field-id] [:joined-field alias [:field-id pk-field-id]]] + :fk-field-id fk-field-id - :pk-field-id pk-field-id}) + :fields :none}) (s/defn ^:private add-implicit-join-clauses :- mbql.s/Query [query, resolved-join-infos :- [ResolvedJoinInfo]] (let [join-clauses (map resolved-join-info->join-clause resolved-join-infos)] - (update-in query [:query :join-tables] (comp distinct concat) join-clauses))) + (update-in query [:query :joins] (comp distinct concat) join-clauses))) ;;; --------------------------------------------- Replacing fk-> clauses --------------------------------------------- diff --git a/test/metabase/driver/sql/query_processor_test.clj b/test/metabase/driver/sql/query_processor_test.clj index 9b192b8d0c9d65072b56d3d6b499380e69089cdd..61e2a77440503b7e4ad260685f15a91a4dd56d9f 100644 --- a/test/metabase/driver/sql/query_processor_test.clj +++ b/test/metabase/driver/sql/query_processor_test.clj @@ -1,6 +1,14 @@ (ns metabase.driver.sql.query-processor-test (:require [expectations :refer [expect]] - [metabase.driver.sql.query-processor :as sql.qp])) + [honeysql.core :as hsql] + [metabase.driver :as driver] + [metabase.driver.sql.query-processor :as sql.qp] + [metabase.query-processor.test-util :as qp.test-util] + [metabase.test.data :as data] + [metabase.util + [honeysql-extensions :as hx] + [pretty :refer [PrettyPrintable]]]) + (:import metabase.util.honeysql_extensions.Identifier)) ;; make sure our logic for deciding which order to process keys in the query works as expected (expect @@ -12,3 +20,141 @@ :aggregation 3 :fields 4 :breakout 2})) + +;; Let's make sure we're actually attempting to generate the correctl HoneySQL for stuff so we don't sit around +;; scratching our heads wondering why the queries themselves aren't working + +;; We'll slap together a driver called `::id-swap` whose only purpose is to replace instances of `Identifier` with +;; `CustomIdentifier` when `->honeysql` is called. This way we can be sure it's being called everywhere it's used so +;; drivers have the chance to do custom things as needed. Also `::id-swap` will record the current `*table-alias*` at +;; the time `->honeysql` is called so we can make sure that's correct +(driver/register! ::id-swap, :parent :sql, :abstract? true) + +(defrecord ^:private CustomIdentifier [identifier table-alias] + PrettyPrintable + (pretty [_] + (let [identifier (cons 'id (cons (:identifier-type identifier) (:components identifier)))] + (if table-alias + (list 'bound-alias table-alias identifier) + identifier)))) + +(defn- id [& args] + (CustomIdentifier. (apply hx/identifier args) nil)) + +(defn- bound-alias [table-alias identifier] + (assoc identifier :table-alias table-alias)) + +(defmethod sql.qp/->honeysql [::id-swap Identifier] + [driver identifier] + ((get-method sql.qp/->honeysql [:sql Identifier]) driver (CustomIdentifier. identifier sql.qp/*table-alias*))) + + +;; This query tests that the correct HoneySQL gets generated for a query with a join, and that the correct identifiers +;; are used +(expect + {:select [[(id :field "PUBLIC" "VENUES" "ID") (id :field-alias "ID")] + [(id :field "PUBLIC" "VENUES" "NAME") (id :field-alias "NAME")] + [(id :field "PUBLIC" "VENUES" "CATEGORY_ID") (id :field-alias "CATEGORY_ID")] + [(id :field "PUBLIC" "VENUES" "LATITUDE") (id :field-alias "LATITUDE")] + [(id :field "PUBLIC" "VENUES" "LONGITUDE") (id :field-alias "LONGITUDE")] + [(id :field "PUBLIC" "VENUES" "PRICE") (id :field-alias "PRICE")]] + :from [(id :table "PUBLIC" "VENUES")] + :where [:= + (bound-alias "c" (id :field "c" "NAME")) + "BBQ"] + :left-join [[(id :table "PUBLIC" "CATEGORIES") (id :table-alias "c")] + [:= + (id :field "PUBLIC" "VENUES" "CATEGORY_ID") + (bound-alias "c" (id :field "c" "ID"))]] + :order-by [[(id :field "PUBLIC" "VENUES" "ID") :asc]] + :limit 100} + (qp.test-util/with-everything-store + (#'sql.qp/mbql->honeysql + ::id-swap + (data/mbql-query venues + {:source-table $$venues + :order-by [[:asc $id]] + :filter [:= + [:joined-field "c" $categories.name] + [:value "BBQ" {:base_type :type/Text, :special_type :type/Name, :database_type "VARCHAR"}]] + :fields [$id $name $category_id $latitude $longitude $price] + :limit 100 + :joins [{:source-table $$categories + :alias "c", + :strategy :left-join + :condition [:= + $category_id + [:joined-field "c" $categories.id]] + :fk-field-id (data/id :venues :category_id) + :fields :none}]})))) + +;; This HAIRY query tests that the correct identifiers and aliases are used with both a nested query and JOIN in play. +;; +;; TODO `*table-alias*` stays bound to `:source` in a few places below where it probably shouldn't (for the top-level +;; SELECT `:field-alias` identifiers and the `v` `:table-alias` identifier) but since drivers shouldn't be qualifying +;; aliases with aliases things still work the right way. +(expect + {:select [[(bound-alias "v" (id :field "v" "NAME")) (bound-alias :source (id :field-alias "NAME"))] + [:%count.* (bound-alias :source (id :field-alias "count"))]] + :from [[{:select [[(id :field "PUBLIC" "CHECKINS" "ID") (id :field-alias "ID")] + [(id :field "PUBLIC" "CHECKINS" "DATE") (id :field-alias "DATE")] + [(id :field "PUBLIC" "CHECKINS" "USER_ID") (id :field-alias "USER_ID")] + [(id :field "PUBLIC" "CHECKINS" "VENUE_ID") (id :field-alias "VENUE_ID")]] + :from [(id :table "PUBLIC" "CHECKINS")] + :where [:> + (id :field "PUBLIC" "CHECKINS" "DATE") + #inst "2015-01-01T00:00:00.000-00:00"]} + (id :table-alias "source")]] + :left-join [[(id :table "PUBLIC" "VENUES") (bound-alias :source (id :table-alias "v"))] + [:= + (bound-alias :source (id :field "source" "VENUE_ID")) + (bound-alias "v" (id :field "v" "ID"))]], + + :group-by [(bound-alias "v" (id :field "v" "NAME"))] + :where [:and + [:like (bound-alias "v" (id :field "v" "NAME")) "F%"] + [:> (bound-alias :source (id :field "source" "user_id")) 0]], + :order-by [[(bound-alias "v" (id :field "v" "NAME")) :asc]]} + (qp.test-util/with-everything-store + (driver/with-driver :h2 + (#'sql.qp/mbql->honeysql + ::id-swap + (data/mbql-query checkins + {:source-query {:source-table $$checkins + :fields [$id [:datetime-field $date :default] $user_id $venue_id] + :filter [:> + $date + [:absolute-datetime #inst "2015-01-01T00:00:00.000000000-00:00" :default]],}, + :aggregation [[:count]] + :order-by [[:asc [:joined-field "v" $venues.name]]] + :breakout [[:joined-field "v" $venues.name]], + :filter [:and + [:starts-with + [:joined-field "v" $venues.name] + [:value "F" {:base_type :type/Text, :special_type :type/Name, :database_type "VARCHAR"}]] + [:> [:field-literal "user_id" :type/Integer] 0]] + :joins [{:source-table $$venues + :alias "v" + :strategy :left-join + :condition [:= + $venue_id + [:joined-field "v" $venues.id]] + :fk-field-id (data/id :checkins :venue_id) + :fields :none}]}))))) + + +;; This query tests that named aggregations are handled correctly +(expect + {:select [[(id :field "PUBLIC" "VENUES" "PRICE") (id :field-alias "PRICE")] + [(hsql/call :avg (id :field "PUBLIC" "VENUES" "CATEGORY_ID")) (id :field-alias "my_average")]] + :from [(id :table "PUBLIC" "VENUES")] + :group-by [(id :field "PUBLIC" "VENUES" "PRICE")], + :order-by [[(id :field-alias "my_average") :asc]]} + (qp.test-util/with-everything-store + (metabase.driver/with-driver :h2 + (#'sql.qp/mbql->honeysql + ::id-swap + (data/mbql-query venues + {:aggregation [[:named [:avg $category_id] "my_average"]] + :breakout [$price] + :order-by [[:asc [:aggregation 0]]]}))))) diff --git a/test/metabase/driver/sql/util_test.clj b/test/metabase/driver/sql/util_test.clj new file mode 100644 index 0000000000000000000000000000000000000000..a3fc934cd52a1a79544712051cd283e1986eeed1 --- /dev/null +++ b/test/metabase/driver/sql/util_test.clj @@ -0,0 +1,33 @@ +(ns metabase.driver.sql.util-test + (:require [expectations :refer [expect]] + [metabase.driver.sql.util :as sql.u] + [metabase.util.honeysql-extensions :as hx])) + +;; `select-clause-deduplicate-aliases` should use the last component of an identifier as the alias if it does not +;; already have one +(expect + [[(hx/identifier :field "A" "B" "C" "D") (hx/identifier :field-alias "D")] + [(hx/identifier :field "F") (hx/identifier :field-alias "G")]] + (sql.u/select-clause-deduplicate-aliases + [(hx/identifier :field "A" "B" "C" "D") + [(hx/identifier :field "F") (hx/identifier :field-alias "G")]])) + +;; `select-clause-deduplicate-aliases` should append numeric suffixes to duplicate aliases +(expect + [[(hx/identifier :field "A" "B" "C" "D") (hx/identifier :field-alias "D")] + [(hx/identifier :field "E" "D") (hx/identifier :field-alias "D_2")] + [(hx/identifier :field "F") (hx/identifier :field-alias "G")]] + (sql.u/select-clause-deduplicate-aliases + [(hx/identifier :field "A" "B" "C" "D") + (hx/identifier :field "E" "D") + [(hx/identifier :field "F") (hx/identifier :field-alias "G")]])) + +;; `select-clause-deduplicate-aliases` should handle aliases that are already suffixed gracefully +(expect + [[(hx/identifier :field "A" "B" "C" "D") (hx/identifier :field-alias "D")] + [(hx/identifier :field "E" "D") (hx/identifier :field-alias "D_2")] + [(hx/identifier :field "F") (hx/identifier :field-alias "D_3")]] + (sql.u/select-clause-deduplicate-aliases + [(hx/identifier :field "A" "B" "C" "D") + (hx/identifier :field "E" "D") + [(hx/identifier :field "F") (hx/identifier :field-alias "D_2")]])) diff --git a/test/metabase/query_processor/middleware/annotate_test.clj b/test/metabase/query_processor/middleware/annotate_test.clj index 95e348fec19ffd4fce171bffce552d4e7b9b8a03..ddd22690c30519f80495c94caf3f486a323b3cad 100644 --- a/test/metabase/query_processor/middleware/annotate_test.clj +++ b/test/metabase/query_processor/middleware/annotate_test.clj @@ -10,6 +10,7 @@ [test-util :as qp.test-util]] [metabase.query-processor.middleware.annotate :as annotate] [metabase.test.data :as data] + [toucan.db :as db] [toucan.util.test :as tt])) ;;; +----------------------------------------------------------------------------------------------------------------+ @@ -37,13 +38,17 @@ ;;; | add-mbql-column-info | ;;; +----------------------------------------------------------------------------------------------------------------+ +(defn- info-for-field + ([field-id] + (db/select-one (into [Field] (disj (set qp.store/field-columns-to-fetch) :database_type)) :id field-id)) + ([table-key field-key] + (info-for-field (data/id table-key field-key)))) + ;; make sure columns are comming back the way we'd expect (expect - [(-> (Field (data/id :venues :price)) - (dissoc :database_type) - (assoc :source :fields))] - (qp.store/with-store - (qp.store/store-field! (Field (data/id :venues :price))) + [(assoc (info-for-field :venues :price) + :source :fields)] + (qp.test-util/with-everything-store (-> (#'annotate/add-mbql-column-info {:query {:fields [[:field-id (data/id :venues :price)]]}} {:columns [:price]}) @@ -55,11 +60,9 @@ ;; TODO - this can be removed, now that `fk->` forms are "sugar" and replaced with `:joined-field` clauses before the ;; query ever makes it to the 'annotate' stage (expect - [(-> (Field (data/id :categories :name)) - (dissoc :database_type) - (assoc :fk_field_id (data/id :venues :category_id), :source :fields))] - (qp.store/with-store - (qp.store/store-field! (Field (data/id :categories :name))) + [(assoc (info-for-field :categories :name) + :fk_field_id (data/id :venues :category_id), :source :fields)] + (qp.test-util/with-everything-store (-> (#'annotate/add-mbql-column-info {:query {:fields [[:fk-> [:field-id (data/id :venues :category_id)] @@ -70,28 +73,29 @@ ;; we should get `:fk_field_id` and information where possible when using `:joined-field` clauses (expect - [(-> (Field (data/id :categories :name)) - (dissoc :database_type) - (assoc :fk_field_id (data/id :venues :category_id), :source :fields))] - (qp.store/with-store - (qp.store/store-field! (Field (data/id :categories :name))) - (-> (#'annotate/add-mbql-column-info - {:query {:fields [[:joined-field "CATEGORIES__via__CATEGORY_ID" [:field-id (data/id :categories :name)]]] - :join-tables [{:join-alias "CATEGORIES__via__CATEGORY_ID" - :table-id (data/id :categories) - :fk-field-id (data/id :venues :category_id) - :pk-field-id (data/id :categories :id)}]}} - {:columns [:name]}) - :cols - vec))) + [(assoc (info-for-field :categories :name) + :fk_field_id (data/id :venues :category_id), :source :fields)] + (data/$ids venues + (qp.test-util/with-everything-store + (-> (#'annotate/add-mbql-column-info + {:query {:fields [[:joined-field "CATEGORIES__via__CATEGORY_ID" [:field-id $categories.name]]] + :joins [{:alias "CATEGORIES__via__CATEGORY_ID" + :source-table $$table + :condition [:= + [:field-id $category_id] + [:joined-field "CATEGORIES__via__CATEGORY_ID" [:field-id $categories.id]]] + :strategy :left-join + :fk-field-id $category_id}]}} + {:columns [:name]}) + :cols + vec)))) ;; when a `:datetime-field` form is used, we should add in info about the `:unit` (expect - [(-> (Field (data/id :venues :price)) - (dissoc :database_type) - (assoc :unit :month, :source :fields))] - (qp.store/with-store - (qp.store/store-field! (Field (data/id :venues :price))) + [(assoc (info-for-field :venues :price) + :unit :month + :source :fields)] + (qp.test-util/with-everything-store (-> (#'annotate/add-mbql-column-info {:query {:fields [[:datetime-field [:field-id (data/id :venues :price)] :month]]}} {:columns [:price]}) @@ -198,9 +202,8 @@ :special_type :type/Number :name "sum" :display_name "sum"} - (qp.store/with-store + (qp.test-util/with-everything-store (data/$ids venues - (qp.store/store-field! (Field $price)) (col-info-for-aggregation-clause [:sum [:+ [:field-id $price] 1]])))) ;; if a driver is kind enough to supply us with some information about the `:cols` that come back, we should include diff --git a/test/metabase/query_processor/middleware/resolve_joined_tables_test.clj b/test/metabase/query_processor/middleware/resolve_joined_tables_test.clj index 889b0f247b3c3c5c92bcdc3a33e6316e9db7b2ed..be9e4bafe1f0c4530fc6e3779384a73b55381f00 100644 --- a/test/metabase/query_processor/middleware/resolve_joined_tables_test.clj +++ b/test/metabase/query_processor/middleware/resolve_joined_tables_test.clj @@ -10,7 +10,7 @@ :type :query :query query}))) -;; make sure `:join-tables` get added automatically for `:fk->` clauses +;; make sure `:joins` get added automatically for `:fk->` clauses (expect {:database (data/id) :type :query @@ -18,17 +18,21 @@ {:source-table $$table :fields [[:field-id $name] [:joined-field "CATEGORIES__via__CATEGORY_ID" [:field-id $categories.name]]] - :join-tables [{:join-alias "CATEGORIES__via__CATEGORY_ID" - :table-id (data/id :categories) - :fk-field-id $category_id - :pk-field-id $categories.id}]})} + :joins [{:source-table (data/id :categories) + :alias "CATEGORIES__via__CATEGORY_ID" + :condition [:= + [:field-id $category_id] + [:joined-field "CATEGORIES__via__CATEGORY_ID" [:field-id $categories.id]]] + :strategy :left-join + :fields :none + :fk-field-id $category_id}]})} (resolve-joined-tables (data/$ids venues {:source-table $$table :fields [[:field-id $name] [:fk-> [:field-id $category_id] [:field-id $categories.name]]]}))) -;; For FK clauses inside nested source queries, we should add the `:join-tables` info to the nested query instead of +;; For FK clauses inside nested source queries, we should add the `:joins` info to the nested query instead of ;; at the top level (#8972) (expect {:database (data/id) @@ -38,10 +42,14 @@ {:source-table $$table :fields [[:field-id $name] [:joined-field "CATEGORIES__via__CATEGORY_ID" [:field-id $categories.name]]] - :join-tables [{:join-alias "CATEGORIES__via__CATEGORY_ID" - :table-id (data/id :categories) - :fk-field-id $category_id - :pk-field-id $categories.id}]})}} + :joins [{:source-table (data/id :categories) + :alias "CATEGORIES__via__CATEGORY_ID" + :condition [:= + [:field-id $category_id] + [:joined-field "CATEGORIES__via__CATEGORY_ID" [:field-id $categories.id]]] + :strategy :left-join + :fields :none + :fk-field-id $category_id}]})}} (resolve-joined-tables {:source-query (data/$ids venues @@ -59,10 +67,14 @@ {:source-table $$table :fields [[:field-id $name] [:joined-field "CATEGORIES__via__CATEGORY_ID" [:field-id $categories.name]]] - :join-tables [{:join-alias "CATEGORIES__via__CATEGORY_ID" - :table-id (data/id :categories) - :fk-field-id $category_id - :pk-field-id $categories.id}]})}}} + :joins [{:source-table (data/id :categories) + :alias "CATEGORIES__via__CATEGORY_ID" + :condition [:= + [:field-id $category_id] + [:joined-field "CATEGORIES__via__CATEGORY_ID" [:field-id $categories.id]]] + :strategy :left-join + :fields :none + :fk-field-id $category_id}]})}}} (resolve-joined-tables {:source-query {:source-query @@ -84,10 +96,14 @@ :aggregation [[:count]] :breakout [[:joined-field "VENUES__via__VENUE_ID" [:field-id $venues.price]]] :order-by [[:asc [:joined-field "VENUES__via__VENUE_ID" [:field-id $venues.price]]]] - :join-tables [{:join-alias "VENUES__via__VENUE_ID" - :table-id (data/id :venues) - :fk-field-id $venue_id - :pk-field-id $venues.id}]})} + :joins [{:source-table (data/id :venues) + :alias "VENUES__via__VENUE_ID" + :condition [:= + [:field-id $venue_id] + [:joined-field "VENUES__via__VENUE_ID" [:field-id $venues.id]]] + :strategy :left-join + :fields :none + :fk-field-id $venue_id}]})} (resolve-joined-tables (data/$ids [checkins {:wrap-field-ids? true}] {:source-query {:source-table $$table diff --git a/test/metabase/query_processor/test_util.clj b/test/metabase/query_processor/test_util.clj index 8de6987a1e08725e1c7fd6ad362fd15b9403f13b..603178a0ff4852d5a1002c99fb6e85626f4a7f19 100644 --- a/test/metabase/query_processor/test_util.clj +++ b/test/metabase/query_processor/test_util.clj @@ -2,20 +2,33 @@ "Utilities for writing Query Processor tests that test internal workings of the QP rather than end-to-end results, e.g. middleware tests." (:require [metabase.models + [database :refer [Database]] [field :refer [Field]] [table :refer [Table]]] [metabase.query-processor.store :as qp.store] + [metabase.test.data :as data] + [metabase.util.schema :as su] + [schema.core :as s] [toucan.db :as db])) +(s/defn ^:private everything-store-database [] + (or (:database @@#'qp.store/*store*) + (db/select-one (into [Database] qp.store/database-columns-to-fetch), :id (data/id)))) + +(s/defn ^:private everything-store-table [table-id :- (s/maybe su/IntGreaterThanZero)] + (or (get-in @@#'qp.store/*store* [:tables table-id]) + (db/select-one (into [Table] qp.store/table-columns-to-fetch), :id table-id))) + +(s/defn ^:private everything-store-field [field-id :- (s/maybe su/IntGreaterThanZero)] + (or (get-in @@#'qp.store/*store* [:fields field-id]) + (db/select-one (into [Field] qp.store/field-columns-to-fetch), :id field-id))) + (defn do-with-everything-store "Impl for `with-everything-store`." [f] - (with-redefs [qp.store/table (fn [table-id] - (or (get-in @@#'qp.store/*store* [:tables table-id]) - (db/select-one (into [Table] qp.store/table-columns-to-fetch), :id table-id))) - qp.store/field (fn [field-id] - (or (get-in @@#'qp.store/*store* [:fields field-id]) - (db/select-one (into [Field] qp.store/field-columns-to-fetch), :id field-id)))] + (with-redefs [qp.store/database everything-store-database + qp.store/table everything-store-table + qp.store/field everything-store-field] (qp.store/with-store (f)))) diff --git a/test/metabase/query_processor_test/aggregation_test.clj b/test/metabase/query_processor_test/aggregation_test.clj index 08a93db0e710dce07828ed21f3da151dc5f3fbbe..2cd1b823a2f765617a70b08ccece18caaad6f236 100644 --- a/test/metabase/query_processor_test/aggregation_test.clj +++ b/test/metabase/query_processor_test/aggregation_test.clj @@ -344,3 +344,9 @@ (qp.test/rows (data/run-mbql-query venues {:aggregation [[:sum $id] [:sum $price]]})))) + +;; !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +;; ! ! +;; ! tests for named aggregations can be found in `expression-aggregations-test` ! +;; ! ! +;; !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! diff --git a/test/metabase/query_processor_test/expressions_test.clj b/test/metabase/query_processor_test/expressions_test.clj index e515aac004f77a7b8f2ba46bb9aeff8626200603..bf09b3e839262787fd2f40a54d27a91c0b73b37d 100644 --- a/test/metabase/query_processor_test/expressions_test.clj +++ b/test/metabase/query_processor_test/expressions_test.clj @@ -112,7 +112,7 @@ {:expressions {"bird-scarcity" ~formula} :fields [[:expression "bird-scarcity"]] :filter ~filter-clause - :order-by [[:asc [:field-id $date]]] + :order-by [[:asc [:field-id ~'$date]]] :limit 10}) rows (format-rows-by [(partial u/round-to-decimals 2)])))) diff --git a/test/metabase/query_processor_test/nested_queries_test.clj b/test/metabase/query_processor_test/nested_queries_test.clj index a3627191bb518fa6dcf423828122b6365ebdf5c7..bb2b35eb21983a0224f0c1c523a0514ce8ec1a41 100644 --- a/test/metabase/query_processor_test/nested_queries_test.clj +++ b/test/metabase/query_processor_test/nested_queries_test.clj @@ -27,7 +27,7 @@ [toucan.util.test :as tt])) (defn- rows+cols - "Return the `:rows` and relevant parts of `:cols` from the RESULTS. + "Return the `:rows` and relevant parts of `:cols` from the `results`. (This is used to keep the output of various tests below focused and manageable.)" {:style/indent 0} [results] @@ -710,3 +710,18 @@ :filter [:= [:field-literal (db/select-one-field :name Field :id $date) "type/DateTime"] "2014-03-30"]}}))))) + +;; make sure filters in source queries are applied correctly! +(datasets/expect-with-drivers (qp.test/non-timeseries-drivers-with-feature :nested-queries :foreign-keys) + [["Fred 62" 1] + ["Frolic Room" 1]] + (qp.test/format-rows-by [str int] + (qp.test/rows + (qp/process-query + (data/mbql-query checkins + {:source-query {:source-table $$checkins + :filter [:> $date "2015-01-01"]} + :aggregation [:count] + :order-by [[:asc $venue_id->venues.name]] + :breakout [$venue_id->venues.name] + :filter [:starts-with $venue_id->venues.name "F"]}))))) diff --git a/test/metabase/test/data.clj b/test/metabase/test/data.clj index 7c89d3a098ec0303bd5efdac31ab29431a994d75..a055b8651296771c0060d9a0a4fea222a6a500e1 100644 --- a/test/metabase/test/data.clj +++ b/test/metabase/test/data.clj @@ -101,14 +101,23 @@ [table-name body & {:keys [wrap-field-ids?], :or {wrap-field-ids? true}}] (walk/postwalk (fn [form] - (or (when (symbol? form) - (if (= form '$$table) - `(id ~(keyword table-name)) - (let [[first-char & rest-chars] (name form)] - (when (= first-char \$) - (let [token (apply str rest-chars)] - (token->id-call wrap-field-ids? table-name token)))))) - form)) + (cond + (not (symbol? form)) + form + + (= form '$$table) + `(id ~(keyword table-name)) + + (str/starts-with? form "$$") + (let [table-name (str/replace form #"^\$\$" "")] + `(id ~(keyword table-name))) + + (str/starts-with? form "$") + (let [field-name (str/replace form #"^\$" "")] + (token->id-call wrap-field-ids? table-name field-name)) + + :else + form)) body)) (defmacro $ids @@ -123,6 +132,10 @@ $$table -> (id :venues) + You can reference other tables by using `$$` as well: + + $$categories -> (id :categories) + You can pass options by wrapping `table-name` in a vector: ($ids [venues {:wrap-field-ids? true}] @@ -136,16 +149,23 @@ (m/mapply $->id (keyword table-name) `(do ~@body) (merge {:wrap-field-ids? false} options)))) +(declare id) (defn wrap-inner-mbql-query "Wrap inner QUERY with `:database` ID and other 'outer query' kvs. DB ID is fetched by looking up the Database for the query's `:source-table`." {:style/indent 0} [query] - {:database (db/select-one-field :db_id Table, :id (:source-table query)) + {:database (id) :type :query :query query}) +(defn add-source-table-if-needed [table query] + (if (and query + (some query #{:source-table :source-query})) + query + (assoc query :source-table (id table)))) + (defmacro mbql-query "Build a query, expands symbols like `$field` into calls to `id` and wraps them in `:field-id`. See the dox for `$->id` for more information on how `$`-prefixed expansion behaves. @@ -160,8 +180,7 @@ {:style/indent 1} [table & [query]] `(wrap-inner-mbql-query - ~(merge `{:source-table (id ~(keyword table))} - ($->id table query)))) + (add-source-table-if-needed ~(keyword table) ~($->id table query)))) (defmacro run-mbql-query "Like `mbql-query`, but runs the query as well."