From ea1f25f39575343a7f02c4e18d6c6c4ab0a3b31e Mon Sep 17 00:00:00 2001 From: Cam Saul <1455846+camsaul@users.noreply.github.com> Date: Thu, 13 Jan 2022 01:18:00 -0800 Subject: [PATCH] Add alias info util namespace (#19610) --- shared/src/metabase/mbql/schema.cljc | 17 +- shared/src/metabase/mbql/util.cljc | 120 ++++- shared/test/metabase/mbql/util_test.cljc | 44 +- .../query_processor/util/add_alias_info.clj | 328 ++++++++++++ .../util/add_alias_info_test.clj | 468 ++++++++++++++++++ test/metabase/test_runner/effects.clj | 35 +- 6 files changed, 965 insertions(+), 47 deletions(-) create mode 100644 src/metabase/query_processor/util/add_alias_info.clj create mode 100644 test/metabase/query_processor/util/add_alias_info_test.clj diff --git a/shared/src/metabase/mbql/schema.cljc b/shared/src/metabase/mbql/schema.cljc index ac22da124f6..ec825e7fe96 100644 --- a/shared/src/metabase/mbql/schema.cljc +++ b/shared/src/metabase/mbql/schema.cljc @@ -258,12 +258,15 @@ ;; Expression *references* refer to a something in the `:expressions` clause, e.g. something like ;; -;; [:+ [:field 1 nil] [:field 2 nil]]` +;; [:+ [:field 1 nil] [:field 2 nil]] +;; +;; As of 0.42.0 `:expression` references can have an optional options map (defclause ^{:requires-features #{:expressions}} expression - expression-name helpers/NonBlankString) + expression-name helpers/NonBlankString + options (optional (s/pred map? "map"))) (def BinningStrategyName - "Schema for a valid value for the `strategy-name` param of a `binning-strategy` clause." + "Schema for a valid value for the `strategy-name` param of a [[field]] clause with `:binning` information." (s/enum :num-bins :bin-width :default)) (defn- validate-bin-width [schema] @@ -389,7 +392,7 @@ (def ^:private Field* (one-of expression field)) -;; TODO -- consider renaming this FieldOrExpression, +;; TODO -- consider renaming this FieldOrExpression (def Field "Schema for either a `:field` clause (reference to a Field) or an `:expression` clause (reference to an expression)." (s/recursive #'Field*)) @@ -408,7 +411,11 @@ ;; ;; TODO - it would be nice if we could check that there's actually an aggregation with the corresponding index, ;; wouldn't it -(defclause aggregation, aggregation-clause-index s/Int) +;; +;; As of 0.42.0 `:aggregation` references can have an optional options map. +(defclause aggregation + aggregation-clause-index s/Int + options (optional (s/pred map? "map"))) (def FieldOrAggregationReference "Schema for any type of valid Field clause, or for an indexed reference to an aggregation clause." diff --git a/shared/src/metabase/mbql/util.cljc b/shared/src/metabase/mbql/util.cljc index 52c998c0c14..7d72e11810b 100644 --- a/shared/src/metabase/mbql/util.cljc +++ b/shared/src/metabase/mbql/util.cljc @@ -440,7 +440,14 @@ (unique-name \"A\")]) ;; -> [\"A\" \"B\" \"A_2\"] - If idempotence is desired, the function returned by the generator also has a 2 airity version where the first argument is the object for which we are generating the name. + By default, unique aliases are generated for each unique `[id original-name]` key pair. By default, a unique `id` is + generated for every call, meaning repeated calls to [[unique-name-generator]] with the same `original-name` will + return different unique aliases. If idempotence is desired, the function returned by the generator also has a 2 + airity version with the signature + + (unique-name-fn id original-name) + + for example: (let [unique-name (unique-name-generator)] [(unique-name :x \"A\") @@ -448,21 +455,76 @@ (unique-name :x \"A\") (unique-name :y \"A\")]) ;; -> [\"A\" \"B\" \"A\" \"A_2\"] - " - [] - (let [identity-objects->aliases (atom {}) - aliases (atom {})] + + Finally, [[unique-name-generator]] accepts the following options to further customize behavior: + + ### `:name-key-fn` + + Generated aliases are unique by the value of `[id (name-key-fn original-name)]`; the default is `identity`, so by + default aliases are unique by `[id name-key-fn]`. Specify something custom here if you want to make the unique + aliases unique by some other value, for example to make them unique without regards to case: + + (let [f (unique-name-generator :name-key-fn str/lower-case)] + [(f \"x\") + (f \"X\") + (f \"X\")]) + ;; -> [\"x\" \"X_2\" \"X_3\"] + + This is useful for databases that treat column aliases as case-insensitive (see #19618 for some examples of this). + + ### `:unique-alias-fn` + + The function used to generate a potentially-unique alias given an original alias and unique suffix with the signature + + (unique-alias-fn original suffix) + + By default, combines them like `original_suffix`, but you can supply a custom function if you need to change this + behavior: + + (let [f (unique-name-generator :unique-alias-fn (fn [x y] (format \"%s~~%s\" y x)))] + [(f \"x\") + (f \"x\")]) + ;; -> [\"x\" \"2~~x\"] + + This is useful if you need to constrain the generated suffix in some way, for example by limiting its length or + escaping characters disallowed in a column alias. + + Values generated by this function are recursively checked for uniqueness, and will keep trying values a unique value + is generated; for this reason the function *must* return a unique value for every unique input. Use caution when + limiting the length of the identifier generated (consider appending a hash in cases like these)." + [& {:keys [name-key-fn unique-alias-fn] + :or {name-key-fn identity + unique-alias-fn (fn [original suffix] + (str original \_ suffix))}}] + (let [id+original->unique (atom {}) ; map of [id original-alias] -> unique-alias + original->count (atom {})] ; map of original-alias -> count (fn generate-name - ([alias] (generate-name (gensym) alias)) - ([identity-object alias] - (or (@identity-objects->aliases [identity-object alias]) - (loop [maybe-unique alias] - (let [total-count (get (swap! aliases update maybe-unique (fnil inc 0)) maybe-unique)] - (if (= total-count 1) - (do - (swap! identity-objects->aliases assoc [identity-object alias] maybe-unique) - maybe-unique) - (recur (str maybe-unique \_ total-count)))))))))) + ([alias] + (generate-name (gensym) alias)) + + ([id original] + (let [name-key (name-key-fn original)] + (or + ;; if we already have generated an alias for this key (e.g. `[id original]`), return it as-is. + (@id+original->unique [id name-key]) + ;; otherwise generate a new unique alias. + ;; see if we're the first to try to use this candidate alias. Update the usage count in `original->count` + (let [total-count (get (swap! original->count update name-key (fnil inc 0)) name-key)] + (if (= total-count 1) + ;; if we are the first to do it, record it in `id+original->unique` and return it. + (do + (swap! id+original->unique assoc [id name-key] original) + original) + ;; otherwise prefix the alias by the current total count (e.g. `id` becomes `id_2`) and recur. If `id_2` + ;; is unused, it will get returned. Otherwise we'll recursively try `id_2_2`, and so forth. + (let [candidate (unique-alias-fn original (str total-count))] + ;; double-check that `unique-alias-fn` isn't doing something silly like truncating the generated alias + ;; to aggressively or forgetting to include the `suffix` -- otherwise we could end up with an infinite + ;; loop + (assert (not= candidate original) + (str "unique-alias-fn must return a different string than its input. Input: " + (pr-str candidate))) + (recur id candidate)))))))))) (s/defn uniquify-names :- (s/constrained [s/Str] distinct? "sequence of unique strings") "Make the names in a sequence of string names unique by adding suffixes such as `_2`. @@ -591,20 +653,30 @@ :else x)) -(s/defn update-field-options :- mbql.s/field - "Like `clojure.core/update`, but for the options in a `:field` clause." - [[_ id-or-name opts] :- mbql.s/field f & args] - [:field id-or-name (remove-empty (apply f opts args))]) +(s/defn update-field-options :- mbql.s/FieldOrAggregationReference + "Like [[clojure.core/update]], but for the options in a `:field`, `:expression`, or `:aggregation` clause." + {:arglists '([field-or-ag-ref-or-expression-ref f & args])} + [[clause-type id-or-name opts] :- mbql.s/FieldOrAggregationReference f & args] + (let [opts (not-empty (remove-empty (apply f opts args)))] + ;; `:field` clauses should have a `nil` options map if there are no options. `:aggregation` and `:expression` + ;; should get the arg removed if it's `nil` or empty. (For now. In the future we may change this if we make the + ;; 3-arg versions the "official" normalized versions.) + (cond + opts [clause-type id-or-name opts] + (= clause-type :field) [clause-type id-or-name nil] + :else [clause-type id-or-name]))) (defn assoc-field-options - "Like `clojure.core/assoc`, but for the options in a `:field` clause." - [field-clause & kvs] - (apply update-field-options field-clause assoc kvs)) + "Like [[clojure.core/assoc]], but for the options in a `:field`, `:expression`, or `:aggregation` clause." + [clause & kvs] + (apply update-field-options clause assoc kvs)) (defn with-temporal-unit "Set the `:temporal-unit` of a `:field` clause to `unit`." - [field-clause unit] - (assoc-field-options field-clause :temporal-unit unit)) + [clause unit] + ;; it doesn't make sense to call this on an `:expression` or `:aggregation`. + (assert (is-clause? :field clause)) + (assoc-field-options clause :temporal-unit unit)) #?(:clj (p/import-vars diff --git a/shared/test/metabase/mbql/util_test.cljc b/shared/test/metabase/mbql/util_test.cljc index 6133af86b78..232005bdacd 100644 --- a/shared/test/metabase/mbql/util_test.cljc +++ b/shared/test/metabase/mbql/util_test.cljc @@ -1,5 +1,6 @@ (ns metabase.mbql.util-test - (:require [clojure.test :as t] + (:require [clojure.string :as str] + [clojure.test :as t] [metabase.mbql.util :as mbql.u] metabase.types)) @@ -597,7 +598,26 @@ (map (mbql.u/unique-name-generator) [:x :y :x :z] ["count" "sum" "count" "count_2"])))) (t/testing "Can the same object have multiple aliases" (t/is (= ["count" "sum" "count" "count_2"] - (map (mbql.u/unique-name-generator) [:x :y :x :x] ["count" "sum" "count" "count_2"]))))) + (map (mbql.u/unique-name-generator) [:x :y :x :x] ["count" "sum" "count" "count_2"])))) + + (t/testing "idempotence (2-arity calls to generated function)" + (let [unique-name (mbql.u/unique-name-generator)] + (t/is (= ["A" "B" "A" "A_2"] + [(unique-name :x "A") + (unique-name :x "B") + (unique-name :x "A") + (unique-name :y "A")])))) + + (t/testing "options" + (t/testing :name-key-fn + (let [f (mbql.u/unique-name-generator :name-key-fn str/lower-case)] + (t/is (= ["x" "X_2" "X_3"] + (map f ["x" "X" "X"]))))) + + (t/testing :unique-alias-fn + (let [f (mbql.u/unique-name-generator :unique-alias-fn (fn [x y] (str y "~~" x)))] + (t/is (= ["x" "2~~x"] + (map f ["x" "x"]))))))) ;;; --------------------------------------------- query->max-rows-limit ---------------------------------------------- @@ -701,4 +721,22 @@ (t/testing "Should normalize the clause" (t/is (= [:field 1 nil] - (mbql.u/update-field-options [:field 1 {:a {:b 1}}] assoc-in [:a :b] nil))))) + (mbql.u/update-field-options [:field 1 {:a {:b 1}}] assoc-in [:a :b] nil)))) + + (t/testing "Should work with `:expression` and `:aggregation` references as well" + (t/is (= [:expression "wow" {:a 1}] + (mbql.u/update-field-options [:expression "wow"] assoc :a 1))) + (t/is (= [:expression "wow" {:a 1, :b 2}] + (mbql.u/update-field-options [:expression "wow" {:b 2}] assoc :a 1))) + (t/is (= [:aggregation 0 {:a 1}] + (mbql.u/update-field-options [:aggregation 0] assoc :a 1))) + (t/is (= [:aggregation 0 {:a 1, :b 2}] + (mbql.u/update-field-options [:aggregation 0 {:b 2}] assoc :a 1))) + + ;; in the future when we make the 3-arg version the normalized/"official" version we will probably want to stop + ;; doing this. + (t/testing "Remove empty options entirely from `:expression` and `:aggregation` (for now)" + (t/is (= [:expression "wow"] + (mbql.u/update-field-options [:expression "wow" {:b 2}] dissoc :b))) + (t/is (= [:aggregation 0] + (mbql.u/update-field-options [:aggregation 0 {:b 2}] dissoc :b)))))) diff --git a/src/metabase/query_processor/util/add_alias_info.clj b/src/metabase/query_processor/util/add_alias_info.clj new file mode 100644 index 00000000000..eb747e2cf8d --- /dev/null +++ b/src/metabase/query_processor/util/add_alias_info.clj @@ -0,0 +1,328 @@ +(ns metabase.query-processor.util.add-alias-info + (:require [clojure.string :as str] + [clojure.walk :as walk] + [metabase.driver :as driver] + [metabase.mbql.util :as mbql.u] + [metabase.query-processor.error-type :as qp.error-type] + [metabase.query-processor.store :as qp.store] + [metabase.util.i18n :refer [tru]])) + +;; these methods were moved from [[metabase.driver.sql.query-processor]] in 0.42.0 + +(defmulti prefix-field-alias + "Create a Field alias by combining a `prefix` string with `field-alias` string. The default implementation just joins + the two strings with `__` -- override this if you need to do something different." + {:arglists '([driver prefix field-alias]), :added "0.38.1"} + driver/dispatch-on-initialized-driver + :hierarchy #'driver/hierarchy) + +(defmethod prefix-field-alias :default + [_driver prefix field-alias] + (str prefix "__" field-alias)) + +(defmulti ^String escape-alias + "Return the String that should be emitted in the query for the generated `alias-name`, which will follow the + equivalent of a SQL `AS` clause. This is to allow for escaping names that particular databases may not allow as + aliases for custom expressions or fields (even when quoted). + + Defaults to identity (i.e. returns `alias-name` unchanged)." + {:added "0.41.0" :arglists '([driver alias-name])} + driver/dispatch-on-initialized-driver + :hierarchy #'driver/hierarchy) + +(defn- make-unique-alias-fn + "Creates a function with the signature + + (unique-alias position original-alias) + + To return a uniquified version of `original-alias`. Memoized by `position`, so duplicate calls will result in the + same unique alias." + [] + (let [unique-name-fn (mbql.u/unique-name-generator + :name-key-fn str/lower-case + ;; TODO -- we should probably limit the length somehow like we do in + ;; [[metabase.query-processor.middleware.add-implicit-joins/join-alias]], and also update this + ;; function and that one to append a short suffix if we are limited by length. See also + ;; [[escape-alias]] above + :unique-alias-fn (fn [original suffix] + (escape-alias driver/*driver* (str original \_ suffix))))] + (fn unique-alias-fn [position original-alias] + (unique-name-fn position (escape-alias driver/*driver* original-alias))))) + +;; TODO -- this should probably limit the resulting alias, and suffix a short hash as well if it gets too long. See also +;; [[unique-alias-fn]] below. +(defmethod escape-alias :default + [_driver alias-name] + alias-name) + +(defn- remove-namespaced-options [options] + (when options + (not-empty (into {} + (remove (fn [[k _]] + (when (keyword? k) + (namespace k)))) + options)))) + +(defn normalize-clause + "Normalize a `:field`/`:expression`/`:aggregation` clause by removing extra info so it can serve as a key for + `:qp/refs`." + [clause] + (mbql.u/match-one clause + ;; optimization: don't need to rewrite a `:field` clause without any options + [:field _ nil] + &match + + [:field id-or-name opts] + ;; this doesn't use [[mbql.u/update-field-options]] because this gets called a lot and the overhead actually adds up + ;; a bit + [:field id-or-name (remove-namespaced-options (dissoc opts :source-fields))] + + ;; for `:expression` and `:aggregation` references, remove the options map if they are empty. + [:expression expression-name opts] + (if-let [opts (remove-namespaced-options opts)] + [:expression expression-name opts] + [:expression expression-name]) + + [:aggregation index opts] + (if-let [opts (remove-namespaced-options opts)] + [:aggregation index opts] + [:aggregation index]) + + _ + &match)) + +(defn- selected-clauses + "Get all the clauses that are returned by this level of the query as a map of normalized-clause -> index of that + column in the results." + [{:keys [fields breakout aggregation], :as query}] + ;; this is cached for the duration of the QP run because it's a little expensive to calculate and caching this speeds + ;; up this namespace A LOT + (qp.store/cached (select-keys query [:fields :breakout :aggregation]) + (into + {} + (comp cat + (map-indexed + (fn [i clause] + [(normalize-clause clause) i]))) + [breakout + (map-indexed + (fn [i ag] + (mbql.u/replace ag + [:aggregation-options wrapped opts] + [:aggregation i] + + ;; aggregation clause should be preprocessed into an `:aggregation-options` clause by now. + _ + (throw (ex-info (tru "Expected :aggregation-options clause, got {0}" (pr-str ag)) + {:type qp.error-type/qp, :clause ag})))) + aggregation) + fields]))) + +(defn- clause->position + "Get the position (i.e., column index) `clause` is returned as, if it is returned (i.e. if it is in `:breakout`, + `:aggregation`, or `:fields`). Not all clauses are returned." + [inner-query clause] + ((selected-clauses inner-query) (normalize-clause clause))) + +(defn- this-level-join-aliases [{:keys [joins]}] + (into #{} (map :alias) joins)) + +(defn- field-is-from-join-in-this-level? [inner-query [_ _ {:keys [join-alias]}]] + (when join-alias + ((this-level-join-aliases inner-query) join-alias))) + +(defn- field-instance + {:arglists '([field-clause])} + [[_ id-or-name]] + (when (integer? id-or-name) + (qp.store/field id-or-name))) + +(defn- field-table-id [field-clause] + (:table_id (field-instance field-clause))) + +(defn- field-source-table-alias + "Determine the appropriate `::source-table` alias for a `field-clause`." + {:arglists '([inner-query field-clause])} + [{:keys [source-table], :as inner-query} [_ _id-or-name {:keys [join-alias]}, :as field-clause]] + (let [table-id (field-table-id field-clause) + join-is-this-level? (field-is-from-join-in-this-level? inner-query field-clause)] + (cond + join-is-this-level? join-alias + (and table-id (= table-id source-table)) table-id + :else ::source))) + +(defn- exports [query] + (into #{} (mbql.u/match (dissoc query :source-query :source-metadata :joins) + [(_ :guard #{:field :expression :aggregation}) _ (_ :guard (every-pred map? ::position))]))) + +(defn- join-with-alias [{:keys [joins]} join-alias] + (some (fn [join] + (when (= (:alias join) join-alias) + join)) + joins)) + +(defn- matching-field-in-join-at-this-level + "If `field-clause` is the result of a join *at this level* with a `:source-query`, return the 'source' `:field` clause + from that source query." + [inner-query [_ _ {:keys [join-alias]} :as field-clause]] + (when join-alias + (when-let [matching-join-source-query (:source-query (join-with-alias inner-query join-alias))] + (let [normalized (mbql.u/update-field-options (normalize-clause field-clause) dissoc :join-alias)] + (some (fn [a-clause] + (when (and (mbql.u/is-clause? :field a-clause) + (= (mbql.u/update-field-options (normalize-clause a-clause) dissoc :join-alias) + normalized)) + a-clause)) + (exports matching-join-source-query)))))) + +(defn- field-alias-in-join-at-this-level + "If `field-clause` is the result of a join at this level, return the `::desired-alias` from that join (where the Field is + introduced). This is the appropriate `::source-alias` for such a Field." + [inner-query field-clause] + (when-let [[_ _ {::keys [desired-alias]}] (matching-field-in-join-at-this-level inner-query field-clause)] + desired-alias)) + +(defn- matching-field-in-source-query + [{:keys [source-query], :as inner-query} [_ _ {:keys [join-alias]}, :as field-clause]] + (when (= (field-source-table-alias inner-query field-clause) ::source) + (let [normalized (normalize-clause field-clause)] + (some (fn [a-clause] + (when (and (mbql.u/is-clause? :field a-clause) + (= (normalize-clause a-clause) + normalized)) + a-clause)) + (exports source-query))))) + +(defn- field-alias-in-source-query + [inner-query field-clause] + (when-let [[_ _ {::keys [desired-alias]}] (matching-field-in-source-query inner-query field-clause)] + desired-alias)) + +(defn- field-name + "*Actual* name of a `:field` from the database or source query (for Field literals)." + [_inner-query [_ id-or-name :as field-clause]] + (or (:name (field-instance field-clause)) + (when (string? id-or-name) + id-or-name))) + +(defn- expensive-field-info + "Calculate extra stuff about `field-clause` that's a little expensive to calculate. This is done once so we can pass + it around instead of recalculating it a bunch of times." + [inner-query field-clause] + {:field-name (field-name inner-query field-clause) + :join-is-this-level? (field-is-from-join-in-this-level? inner-query field-clause) + :alias-from-join (field-alias-in-join-at-this-level inner-query field-clause) + :alias-from-source-query (field-alias-in-source-query inner-query field-clause)}) + +(defn- field-source-alias + "Determine the appropriate `::source-alias` for a `field-clause`." + {:arglists '([inner-query field-clause expensive-field-info])} + [{:keys [source-table], :as inner-query} + [_ _id-or-name {:keys [join-alias]}, :as field-clause] + {:keys [field-name join-is-this-level? alias-from-join alias-from-source-query]}] + (cond + (and join-alias (not join-is-this-level?)) (prefix-field-alias driver/*driver* join-alias field-name) + (and join-is-this-level? alias-from-join) alias-from-join + alias-from-source-query alias-from-source-query + :else field-name)) + +(defn- field-desired-alias + "Determine the appropriate `::desired-alias` for a `field-clause`." + {:arglists '([inner-query field-clause expensive-field-info])} + [inner-query + [_ _id-or-name {:keys [join-alias]} :as field-clause] + {:keys [field-name alias-from-join alias-from-source-query]}] + (cond + join-alias (prefix-field-alias driver/*driver* join-alias (or alias-from-join field-name)) + alias-from-source-query alias-from-source-query + :else field-name)) + +(defmulti ^:private clause-alias-info + {:arglists '([inner-query unique-alias-fn clause])} + (fn [_ _ [clause-type]] + clause-type)) + +(defmethod clause-alias-info :field + [inner-query unique-alias-fn field-clause] + (let [expensive-info (expensive-field-info inner-query field-clause)] + (merge {::source-table (field-source-table-alias inner-query field-clause) + ::source-alias (field-source-alias inner-query field-clause expensive-info)} + (when-let [position (clause->position inner-query field-clause)] + {::desired-alias (unique-alias-fn position (field-desired-alias inner-query field-clause expensive-info)) + ::position position})))) + +(defmethod clause-alias-info :aggregation + [{aggregations :aggregation, :as inner-query} unique-alias-fn [_ index opts :as ag-ref-clause]] + (let [position (clause->position inner-query ag-ref-clause)] + ;; an aggregation is ALWAYS returned, so it HAS to have a `position`. If it does not, the aggregation reference + ;; is busted. + (when-not position + (throw (ex-info (tru "Aggregation does not exist at index {0}" index) + {:type qp.error-type/invalid-query + :clause ag-ref-clause + :query inner-query}))) + (let [[_ ag-name _ :as matching-ag] (nth aggregations index)] + ;; make sure we have an `:aggregation-options` clause like we expect. This is mostly a precondition check + ;; since we should never be running this code on not-preprocessed queries, so it's not i18n'ed + (when-not (mbql.u/is-clause? :aggregation-options matching-ag) + (throw (ex-info (format "Expected :aggregation-options, got %s. (Query must be fully preprocessed.)" + (pr-str matching-ag)) + {:clause ag-ref-clause, :query inner-query}))) + {::desired-alias (unique-alias-fn position ag-name) + ::position position}))) + +(defmethod clause-alias-info :expression + [inner-query unique-alias-fn [_ expression-name :as expression-ref-clause]] + (when-let [position (clause->position inner-query expression-ref-clause)] + {::desired-alias (unique-alias-fn position expression-name) + ::position position})) + +(defn- add-alias-info* [inner-query] + (assert (not (:strategy inner-query)) "add-alias-info* should not be called on a join") ; not user-facing + (let [unique-alias-fn (make-unique-alias-fn)] + (mbql.u/replace inner-query + ;; don't rewrite anything inside any source queries or source metadata. + (_ :guard (constantly (some (partial contains? (set &parents)) + [:source-query :source-metadata]))) + &match + + #{:field :aggregation :expression} + (mbql.u/update-field-options &match merge (clause-alias-info inner-query unique-alias-fn &match))))) + +(defn add-alias-info + "Add extra info to `:field` clauses, `:expression` references, and `:aggregation` references in `query`. `query` must + be fully preprocessed. + + Adds some or all of the following keys: + + ### `::source-table` + + String name, integer Table ID, or the keyword `::source`. Use this alias to qualify the clause during compilation. + String names are aliases for joins. `::source` means this clause comes from the `:source-query`; the alias to use is + theoretically driver-specific but in practice is + `source` (see [[metabase.driver.sql.query-processor/source-query-alias]]). An integer Table ID means this comes from + the `:source-table` (either directly or indirectly via one or more `:source-query`s; use the Table's schema and name + to qualify the clause. + + ### `::source-alias` + + String name to use to refer to this clause during compilation. + + ### `::desired-alias` + + If this clause is 'selected' (i.e., appears in `:fields`, `:aggregation`, or `:breakout`), select the clause `AS` + this alias. This alias is guaranteed to be unique. + + ### `::position` + + If this clause is 'selected', this is the position the clause will appear in the results (i.e. the corresponding + column index)." + [query-or-inner-query] + (walk/postwalk + (fn [form] + (if (and (map? form) + ((some-fn :source-query :source-table) form) + (not (:strategy form))) + (vary-meta (add-alias-info* form) assoc ::transformed true) + form)) + query-or-inner-query)) diff --git a/test/metabase/query_processor/util/add_alias_info_test.clj b/test/metabase/query_processor/util/add_alias_info_test.clj new file mode 100644 index 00000000000..933af78c529 --- /dev/null +++ b/test/metabase/query_processor/util/add_alias_info_test.clj @@ -0,0 +1,468 @@ +(ns metabase.query-processor.util.add-alias-info-test + (:require [clojure.test :refer :all] + [clojure.walk :as walk] + [metabase.driver :as driver] + [metabase.driver.h2 :as h2] + [metabase.models.field :refer [Field]] + [metabase.query-processor :as qp] + [metabase.query-processor.middleware.fix-bad-references :as fix-bad-refs] + [metabase.query-processor.util.add-alias-info :as add] + [metabase.test :as mt])) + +(comment h2/keep-me) + +(deftest normalize-clause-test + (is (= [:expression "wow"] + (#'add/normalize-clause [:expression "wow"]) + (#'add/normalize-clause [:expression "wow" nil]) + (#'add/normalize-clause [:expression "wow" {}]) + (#'add/normalize-clause [:expression "wow" {::amazing true}]))) + (is (= [:field 1 nil] + (#'add/normalize-clause [:field 1 nil]) + (#'add/normalize-clause [:field 1 {}]) + (#'add/normalize-clause [:field 1 {::amazing true}])))) + +;; TODO -- this is duplicated with [[metabase.query-processor.util.nest-query-test/remove-source-metadata]] +(defn- remove-source-metadata [x] + (walk/postwalk + (fn [x] + (if ((every-pred map? :source-metadata) x) + (dissoc x :source-metadata) + x)) + x)) + +(defn- add-alias-info [query] + (mt/with-everything-store + (driver/with-driver (or driver/*driver* :h2) + (-> query qp/query->preprocessed add/add-alias-info remove-source-metadata (dissoc :middleware))))) + +(deftest join-in-source-query-test + (is (query= (mt/mbql-query venues + {:source-query {:source-table $$venues + :joins [{:strategy :left-join + :source-table $$categories + :alias "Cat" + :condition [:= + [:field %category_id {::add/source-table $$venues + ::add/source-alias "CATEGORY_ID"}] + [:field %categories.id {:join-alias "Cat" + ::add/source-table "Cat" + ::add/source-alias "ID"}]]}] + :fields [[:field %id {::add/source-table $$venues + ::add/source-alias "ID" + ::add/desired-alias "ID" + ::add/position 0}] + [:field %categories.name {:join-alias "Cat" + ::add/source-table "Cat" + ::add/source-alias "NAME" + ::add/desired-alias "Cat__NAME" + ::add/position 1}]]} + :breakout [[:field %categories.name {:join-alias "Cat" + ::add/source-table ::add/source + ::add/source-alias "Cat__NAME" + ::add/desired-alias "Cat__NAME" + ::add/position 0}]] + :order-by [[:asc [:field %categories.name {:join-alias "Cat" + ::add/source-table ::add/source + ::add/source-alias "Cat__NAME" + ::add/desired-alias "Cat__NAME" + ::add/position 0}]]] + :limit 1}) + (add-alias-info + (mt/mbql-query venues + {:source-query {:source-table $$venues + :joins [{:strategy :left-join + :source-table $$categories + :alias "Cat" + :condition [:= $category_id &Cat.categories.id]}] + :fields [$id + &Cat.categories.name]} + :breakout [&Cat.categories.name] + :limit 1}))))) + +(deftest multiple-joins-test + (mt/dataset sample-dataset + (is (query= (mt/mbql-query orders + {:source-query {:source-table $$orders + :joins [{:source-table $$products + :alias "P1" + :condition [:= + [:field %product_id {::add/source-alias "PRODUCT_ID" + ::add/source-table $$orders}] + [:field %products.id {:join-alias "P1" + ::add/source-alias "ID" + ::add/source-table "P1"}]] + :strategy :left-join}] + :fields [[:field %products.category {:join-alias "P1" + ::add/desired-alias "P1__CATEGORY" + ::add/position 0 + ::add/source-alias "CATEGORY" + ::add/source-table "P1"}]]} + :joins [{:source-query {:source-table $$reviews + :joins [{:source-table $$products + :alias "P2" + :condition [:= + [:field + %reviews.product_id + {::add/source-alias "PRODUCT_ID" + ::add/source-table $$reviews}] + [:field + %products.id + {:join-alias "P2" + ::add/source-alias "ID" + ::add/source-table "P2"}]] + :strategy :left-join}] + :fields [[:field + %products.category + {:join-alias "P2" + ::add/desired-alias "P2__CATEGORY" + ::add/position 0 + ::add/source-alias "CATEGORY" + ::add/source-table "P2"}]]} + :alias "Q2" + :condition [:= + [:field %products.category {:join-alias "P1" + ::add/desired-alias "P1__CATEGORY" + ::add/position 0 + ::add/source-alias "P1__CATEGORY" + ::add/source-table ::add/source}] + [:field %products.category {:join-alias "Q2" + ::add/source-alias "P2__CATEGORY" + ::add/source-table "Q2"}]] + :strategy :left-join}] + :fields [[:field %products.category {:join-alias "P1" + ::add/desired-alias "P1__CATEGORY" + ::add/position 0 + ::add/source-alias "P1__CATEGORY" + ::add/source-table ::add/source}]] + :limit 1}) + (add-alias-info + (mt/mbql-query orders + {:fields [&P1.products.category] + :source-query {:source-table $$orders + :fields [&P1.products.category] + :joins [{:strategy :left-join + :source-table $$products + :condition [:= $product_id &P1.products.id] + :alias "P1"}]} + :joins [{:strategy :left-join + :condition [:= &P1.products.category &Q2.products.category] + :alias "Q2" + :source-query {:source-table $$reviews + :fields [&P2.products.category] + :joins [{:strategy :left-join + :source-table $$products + :condition [:= $reviews.product_id &P2.products.id] + :alias "P2"}]}}] + :limit 1})))))) + +(deftest uniquify-aliases-test + (mt/dataset sample-dataset + (is (query= (mt/mbql-query products + {:source-table $$products + :expressions {:CATEGORY [:concat + [:field %category + {::add/source-table $$products + ::add/source-alias "CATEGORY" + ::add/desired-alias "CATEGORY" + ::add/position 0}] + "2"]} + :fields [[:field %category {::add/source-table $$products + ::add/source-alias "CATEGORY" + ::add/desired-alias "CATEGORY" + ::add/position 0}] + [:expression "CATEGORY" {::add/desired-alias "CATEGORY_2" + ::add/position 1}]] + :limit 1}) + (add-alias-info + (mt/mbql-query products + {:expressions {:CATEGORY [:concat [:field %category nil] "2"]} + :fields [[:field %category nil] + [:expression "CATEGORY"]] + :limit 1})))))) + +(deftest not-null-test + (is (query= (mt/mbql-query checkins + {:aggregation [[:aggregation-options [:count] {:name "count"}]] + :filter [:!= + [:field %date {:temporal-unit :default + ::add/source-table $$checkins + ::add/source-alias "DATE"}] + [:value nil {:base_type :type/Date + :effective_type :type/Date + :coercion_strategy nil + :semantic_type nil + :database_type "DATE" + :name "DATE" + :unit :default}]]}) + (add-alias-info + (mt/mbql-query checkins + {:aggregation [[:count]] + :filter [:not-null $date]}))))) + +(deftest duplicate-aggregations-test + (is (query= (mt/mbql-query venues + {:source-query {:source-table $$venues + :aggregation [[:aggregation-options [:count] {:name "count"}] + [:aggregation-options [:count] {:name "count_2"}] + [:aggregation-options [:count] {:name "count_3"}]]} + :fields [[:field "count" {:base-type :type/BigInteger + ::add/source-table ::add/source + ::add/source-alias "count" + ::add/desired-alias "count" + ::add/position 0}] + [:field "count_2" {:base-type :type/BigInteger + ::add/source-table ::add/source + ::add/source-alias "count_2" + ::add/desired-alias "count_2" + ::add/position 1}] + [:field "count_3" {:base-type :type/BigInteger + ::add/source-table ::add/source + ::add/source-alias "count_3" + ::add/desired-alias "count_3" + ::add/position 2}]] + :limit 1}) + (add-alias-info + (mt/mbql-query venues + {:source-query {:source-table $$venues + :aggregation [[:aggregation-options [:count] {:name "count"}] + [:count] + [:count]]} + :limit 1}))))) + +(deftest multiple-expressions-test + (mt/with-everything-store + (is (query= (mt/$ids venues + {$price 0 + [:expression "big_price"] 1 + [:expression "price_divided_by_big_price"] 2}) + (#'add/selected-clauses + (mt/$ids venues + {:expressions {:big_price [:+ $price 2] + :price_divided_by_big_price [:/ $price [:expression "big_price"]]} + :fields [$price + [:expression "big_price"] + [:expression "price_divided_by_big_price"]] + :limit 1}))))) + + (is (query= (mt/$ids venues + (let [price [:field %price {::add/position 0 + ::add/source-table $$venues + ::add/source-alias "PRICE" + ::add/desired-alias "PRICE"}] + big-price [:expression + "big_price" + {::add/position 1 + ::add/desired-alias "big_price"}] + price-divided-by-big-price [:expression + "price_divided_by_big_price" + {::add/position 2 + ::add/desired-alias "price_divided_by_big_price"}]] + {:source-table $$venues + :expressions {:big_price [:+ price 2] + :price_divided_by_big_price [:/ price big-price]} + :fields [price + big-price + price-divided-by-big-price] + :limit 1})) + (:query + (add-alias-info + (mt/mbql-query venues + {:expressions {:big_price [:+ $price 2] + :price_divided_by_big_price [:/ $price [:expression "big_price"]]} + :fields [$price + [:expression "big_price"] + [:expression "price_divided_by_big_price"]] + :limit 1})))))) + +(deftest join-source-query-join-test + (with-redefs [fix-bad-refs/fix-bad-references identity] + (mt/dataset sample-dataset + (is (query= (mt/mbql-query orders + {:joins [{:source-query {:source-table $$reviews + :aggregation [[:aggregation-options + [:avg [:field %reviews.rating {::add/source-table $$reviews + ::add/source-alias "RATING"}]] + {:name "avg"}]] + :breakout [[:field %products.category {:join-alias "P2" + ::add/source-table "P2" + ::add/source-alias "CATEGORY" + ::add/desired-alias "P2__CATEGORY" + ::add/position 0}]] + :joins [{:strategy :left-join + :source-table $$products + :condition [:= + [:field %reviews.product_id {::add/source-table $$reviews + ::add/source-alias "PRODUCT_ID"}] + [:field %products.id {:join-alias "P2" + ::add/source-table "P2" + ::add/source-alias "ID"}]] + :alias "P2"}]} + :alias "Q2" + :strategy :left-join + :condition [:= + [:field %products.category {::add/source-table ::add/source + ::add/source-alias "CATEGORY"}] + [:field %products.category {:join-alias "Q2" + ::add/source-table "Q2" + ::add/source-alias "P2__CATEGORY" + ::add/desired-alias "Q2__P2__CATEGORY" + ::add/position 1}]]}] + :fields [[:field "count" {:base-type :type/BigInteger + ::add/source-table ::add/source + ::add/source-alias "count" + ::add/desired-alias "count" + ::add/position 0}] + [:field %products.category {:join-alias "Q2" + ::add/source-table "Q2" + ::add/source-alias "P2__CATEGORY" + ::add/desired-alias "Q2__P2__CATEGORY" + ::add/position 1}] + [:field "avg" {:base-type :type/Integer + :join-alias "Q2" + ::add/source-table "Q2" + ::add/source-alias "avg" + ::add/desired-alias "Q2__avg" + ::add/position 2}]] + :limit 2}) + (add-alias-info + (mt/mbql-query orders + {:fields [[:field "count" {:base-type :type/BigInteger}] + &Q2.products.category + [:field "avg" {:base-type :type/Integer, :join-alias "Q2"}]] + :joins [{:strategy :left-join + :condition [:= $products.category &Q2.products.category] + :alias "Q2" + :source-query {:source-table $$reviews + :aggregation [[:aggregation-options [:avg $reviews.rating] {:name "avg"}]] + :breakout [&P2.products.category] + :joins [{:strategy :left-join + :source-table $$products + :condition [:= $reviews.product_id &P2.products.id] + :alias "P2"}]}}] + :limit 2}))))))) + +(driver/register! ::custom-prefix-style :parent :h2) + +(defmethod add/prefix-field-alias ::custom-prefix-style + [_driver prefix field-alias] + (format "%s~~%s" prefix field-alias)) + +(deftest custom-prefix-style-test + (let [db (mt/db)] + (driver/with-driver ::custom-prefix-style + (mt/with-db db + (is (query= (mt/$ids venues + {:source-table $$venues + :fields [[:field %price {::add/source-table $$venues + ::add/source-alias "PRICE" + ::add/desired-alias "PRICE" + ::add/position 0}] + [:field %categories.name {:join-alias "Cat" + ::add/source-table "Cat" + ::add/source-alias "NAME" + ::add/desired-alias "Cat~~NAME" + ::add/position 1}]] + :joins [{:source-table $$categories + :alias "Cat" + :condition [:= + [:field %category_id {::add/source-table $$venues + ::add/source-alias "CATEGORY_ID"}] + [:field %categories.id {:join-alias "Cat" + ::add/source-table "Cat" + ::add/source-alias "ID"}]] + :strategy :left-join}] + :limit 1}) + (-> (mt/mbql-query venues + {:fields [$price] + :joins [{:source-table $$categories + :fields [&Cat.categories.name] + :alias "Cat" + :condition [:= $category_id &Cat.categories.id]}] + :limit 1}) + add-alias-info + :query))))))) + +(driver/register! ::custom-escape :parent :h2) + +(defmethod add/escape-alias ::custom-escape + [_driver field-alias] + (str "COOL." field-alias)) + +(deftest custom-escape-alias-test + (let [db (mt/db)] + (driver/with-driver ::custom-escape + (mt/with-db db + (is (query= (mt/$ids venues + (merge + {:source-query (let [price [:field %price {::add/source-table $$venues + ::add/source-alias "PRICE" + ::add/desired-alias "COOL.PRICE" + ::add/position 0}]] + {:source-table $$venues + :expressions {:double_price [:* price 2]} + :fields [price + [:expression "double_price" {::add/desired-alias "COOL.double_price" + ::add/position 1}]] + :limit 1})} + (let [double-price [:field + "double_price" + {:base-type :type/Integer + ::add/source-table ::add/source + ;; TODO -- these don't agree with the source query (maybe they should + ;; both be prefixed by another `COOL.` I think) although I'm not sure it + ;; makes sense to try to assume this stuff either. Arguably the field + ;; clause should be `[:field "COOL.double_price" ...]` or something + ::add/source-alias "double_price" + ::add/desired-alias "COOL.double_price" + ::add/position 0}]] + {:aggregation [[:aggregation-options [:count] {:name "count"}]] + :breakout [double-price] + :order-by [[:asc double-price]]}))) + (-> (mt/mbql-query venues + {:source-query {:source-table $$venues + :expressions {:double_price [:* $price 2]} + :fields [$price + [:expression "double_price"]] + :limit 1} + :aggregation [[:count]] + :breakout [[:field "double_price" {:base-type :type/Integer}]]}) + add-alias-info + :query))))))) + +(deftest use-source-unique-aliases-test + (testing "Make sure uniquified aliases in the source query end up getting used for `::add/source-alias`" + ;; keep track of the IDs so we don't accidentally fetch the wrong ones after we switch the name of `price` + (let [name-id (mt/id :venues :name) + price-id (mt/id :venues :price)] + ;; create a condition where we'd have duplicate column names for one reason or another to make sure we end up using + ;; the correct unique alias from the source query + (mt/with-temp-vals-in-db Field price-id {:name "Name"} + (is (query= (mt/$ids venues + {:source-query {:source-table $$venues + :fields [[:field name-id {::add/source-table $$venues + ::add/source-alias "NAME" + ::add/desired-alias "NAME" + ::add/position 0}] + [:field price-id {::add/source-table $$venues + ::add/source-alias "Name" + ::add/desired-alias "Name_2" + ::add/position 1}]]} + :fields [[:field name-id {::add/source-table ::add/source + ::add/source-alias "NAME" + ::add/desired-alias "NAME" + ::add/position 0}] + [:field price-id {::add/source-table ::add/source + ::add/source-alias "Name_2" + ::add/desired-alias "Name_2" + ::add/position 1}]] + :limit 1}) + (-> (mt/mbql-query venues + {:source-query {:source-table $$venues + :fields [[:field name-id nil] + [:field price-id nil]]} + :fields [[:field name-id nil] + [:field price-id nil]] + :limit 1}) + add-alias-info + :query))))))) diff --git a/test/metabase/test_runner/effects.clj b/test/metabase/test_runner/effects.clj index 67d1cf1d069..3283f91ba34 100644 --- a/test/metabase/test_runner/effects.clj +++ b/test/metabase/test_runner/effects.clj @@ -10,7 +10,6 @@ " (:require [clojure.data :as data] [clojure.test :as t] - dev.debug-qp [metabase.util.date-2 :as date-2] [metabase.util.i18n.impl :as i18n.impl] [schema.core :as s])) @@ -46,20 +45,26 @@ :diffs (when-not pass?# [[actual# [(s/check schema# actual#) nil]]])}))) +(defn query=-report + "Impl for [[t/assert-expr]] `query=`." + [message expected actual] + (let [pass? (= expected actual)] + (merge + {:type (if pass? :pass :fail) + :message message + :expected expected + :actual actual} + ;; don't bother adding names unless the test actually failed + (when-not pass? + (let [add-names (requiring-resolve 'dev.debug-qp/add-names)] + {:expected (add-names expected) + :actual (add-names actual) + :diffs (let [[only-in-actual only-in-expected] (data/diff actual expected)] + [[(add-names actual) [(add-names only-in-expected) (add-names only-in-actual)]]])}))))) + ;; basically the same as normal `=` but will add comment forms to MBQL queries for Field clauses and source tables ;; telling you the name of the referenced Fields/Tables (defmethod t/assert-expr 'query= - [message [_ expected actual :as args]] - `(let [expected# ~expected - actual# ~actual - pass?# (= expected# actual#) - expected# (dev.debug-qp/add-names expected#) - actual# (dev.debug-qp/add-names actual#)] - (t/do-report - {:type (if pass?# :pass :fail) - :message ~message - :expected expected# - :actual actual# - :diffs (when-not pass?# - (let [[only-in-actual# only-in-expected#] (data/diff actual# expected#)] - [[actual# [only-in-expected# only-in-actual#]]]))}))) + [message [_ expected actual]] + `(t/do-report + (query=-report ~message ~expected ~actual))) -- GitLab