diff --git a/project.clj b/project.clj index be31a81289277cf838477cd131f052d6395ce3ee..454c25021e4123996b01d5778314403736ff66a1 100644 --- a/project.clj +++ b/project.clj @@ -132,15 +132,14 @@ ;; and re-enable them if we can get them to work #_:unused-fn-args #_:unused-locals] - :exclude-linters [#_:constant-test ; gives us false positives with forms like (when config/is-test? ...) - :deprecations]} ; Turn this off temporarily until we finish removing self-deprecated functions & macros + :exclude-linters [:deprecations]} ; Turn this off temporarily until we finish removing self-deprecated functions & macros :docstring-checker {:include [#"^metabase"] :exclude [#"test" #"^metabase\.http-client$"]} :profiles {:dev {:dependencies [[expectations "2.2.0-beta2"] ; unit tests [ring/ring-mock "0.3.0"]] ; Library to create mock Ring requests for unit tests :plugins [[docstring-checker "1.0.2"] ; Check that all public vars have docstrings. Run with 'lein docstring-checker' - [jonase/eastwood "0.2.6" + [jonase/eastwood "0.3.1" :exclusions [org.clojure/clojure]] ; Linting [lein-bikeshed "0.4.1"] ; Linting [lein-expectations "0.0.8"] ; run unit tests with 'lein expectations' diff --git a/src/metabase/mbql/normalize.clj b/src/metabase/mbql/normalize.clj index b8af46de4651ccf353cdbc0ff4b1964286ed8da5..97659f887571ad4e03a9ab530ce6ae5aad01b7da 100644 --- a/src/metabase/mbql/normalize.clj +++ b/src/metabase/mbql/normalize.clj @@ -285,59 +285,58 @@ (defn- canonicalize-aggregation-subclause "Remove `:rows` type aggregation (long-since deprecated; simpliy means no aggregation) if present, and wrap `:field-ids` where appropriate." - [[ag-type :as ag-subclause]] - (cond - (= ag-type :rows) + [ag-subclause] + (mbql.u/replace ag-subclause + seq? (recur (vec &match)) + + [:rows & _] nil ;; For named aggregations (`[:named <ag> <name>]`) we want to leave as-is an just canonicalize the ag it names - (= ag-type :named) - (let [[_ ag ag-name] ag-subclause] - [:named (canonicalize-aggregation-subclause ag) ag-name]) - - (#{:+ :- :* :/} ag-type) - (vec - (cons - ag-type - ;; if args are also ag subclauses normalize those, but things like numbers are allowed too so leave them as-is - (for [arg (rest ag-subclause)] - (cond-> arg - (mbql-clause? arg) canonicalize-aggregation-subclause)))) + [:named ag ag-name] + [:named (canonicalize-aggregation-subclause ag) ag-name] + + [(ag-type :guard #{:+ :- :* :/}) & args] + (apply + vector + ag-type + ;; if args are also ag subclauses normalize those, but things like numbers are allowed too so leave them as-is + (for [arg args] + (cond-> arg + (mbql-clause? arg) canonicalize-aggregation-subclause))) ;; for metric macros (e.g. [:metric <metric-id>]) do not wrap the metric in a :field-id clause - (= :metric ag-type) - ag-subclause + [:metric _] + &match ;; something with an arg like [:sum [:field-id 41]] - (second ag-subclause) - (let [[_ field] ag-subclause] - [ag-type (wrap-implicit-field-id field)]) - - :else - ag-subclause)) + [ag-type field] + [ag-type (wrap-implicit-field-id field)])) (defn- wrap-single-aggregations "Convert old MBQL 95 single-aggregations like `{:aggregation :count}` or `{:aggregation [:count]}` to MBQL 98 multiple-aggregation syntax (e.g. `{:aggregation [[:count]]}`)." [aggregations] - (cond + (mbql.u/replace aggregations + seq? (recur (vec &match)) + ;; something like {:aggregations :count} -- MBQL 95 single aggregation - (keyword? aggregations) - [[aggregations]] + keyword? + [[&match]] ;; special-case: MBQL 98 multiple aggregations using unwrapped :count or :rows ;; e.g. {:aggregations [:count [:sum 10]]} or {:aggregations [:count :count]} - (and (mbql-clause? aggregations) - (aggregation-subclause? (second aggregations)) - (not (is-clause? #{:named :+ :- :* :/} aggregations))) - (reduce concat (map wrap-single-aggregations aggregations)) + [(_ :guard (every-pred keyword? (complement #{:named :+ :- :* :/}))) + (_ :guard aggregation-subclause?) + & _] + (vec (reduce concat (map wrap-single-aggregations aggregations))) ;; something like {:aggregations [:sum 10]} -- MBQL 95 single aggregation - (mbql-clause? aggregations) - [(vec aggregations)] + [(_ :guard keyword?) & _] + [&match] - :else - (vec aggregations))) + _ + &match)) (defn- canonicalize-aggregations "Canonicalize subclauses (see above) and make sure `:aggregation` is a sequence of clauses instead of a single @@ -347,10 +346,12 @@ (map canonicalize-aggregation-subclause) (filterv identity))) -(defn- canonicalize-filter [[filter-name & args, :as filter-clause]] - (cond - ;; for other `and`/`or`/`not` compound filters, recurse on the arg(s), then simplify the whole thing - (#{:and :or :not} filter-name) +(defn- canonicalize-filter [filter-clause] + (mbql.u/replace filter-clause + seq? (recur (vec &match)) + + ;; for `and`/`or`/`not` compound filters, recurse on the arg(s), then simplify the whole thing + [(filter-name :guard #{:and :or :not}) & args] (mbql.u/simplify-compound-filter (vec (cons filter-name ;; we need to canonicalize any other mbql clauses that might show up in @@ -359,47 +360,51 @@ (map (comp canonicalize-mbql-clauses canonicalize-filter) args)))) - ;; string filters should get the string implict filter options added if not specified explicitly - (#{:starts-with :ends-with :contains :does-not-contain} filter-name) - (let [[field arg options] args] - (cond-> [filter-name (wrap-implicit-field-id field) arg] - (seq options) (conj options))) + [(filter-name :guard #{:starts-with :ends-with :contains :does-not-contain}) field arg options] + [filter-name (wrap-implicit-field-id field) arg options] - (= :inside filter-name) - (let [[field-1 field-2 & coordinates] args] - (vec - (concat - [:inside (wrap-implicit-field-id field-1) (wrap-implicit-field-id field-2)] - coordinates))) + [(filter-name :guard #{:starts-with :ends-with :contains :does-not-contain}) field arg] + [filter-name (wrap-implicit-field-id field) arg] + + [:inside field-1 field-2 & coordinates] + (vec + (concat + [:inside (wrap-implicit-field-id field-1) (wrap-implicit-field-id field-2)] + coordinates)) + + ;; if you put a `:datetime-field` inside a `:time-interval` we should fix it for you + [:time-interval [:datetime-field field _] & args] + (recur (apply vector :time-interval field args)) ;; all the other filter types have an implict field ID for the first arg ;; (e.g. [:= 10 20] gets canonicalized to [:= [:field-id 10] 20] - (#{:= :!= :< :<= :> :>= :is-null :not-null :between :inside :time-interval} filter-name) - (apply vector filter-name (wrap-implicit-field-id (first args)) (rest args)) + [(filter-name :guard #{:= :!= :< :<= :> :>= :is-null :not-null :between :inside :time-interval}) field & more] + (apply vector filter-name (wrap-implicit-field-id field) more) ;; don't wrap segment IDs in `:field-id` - (= filter-name :segment) - filter-clause - - :else + [:segment _] + &match + _ (throw (IllegalArgumentException. (str (tru "Illegal filter clause: {0}" filter-clause)))))) (defn- canonicalize-order-by "Make sure order by clauses like `[:asc 10]` get `:field-id` added where appropriate, e.g. `[:asc [:field-id 10]]`" - [order-by-clause] - (vec (for [subclause order-by-clause - :let [[direction field-id] (if (#{:asc :desc :ascending :descending} (first subclause)) - ;; normal [<direction> <field>] clause - subclause - ;; MBQL 95 reversed [<field> <direction>] clause - (reverse subclause))]] - [(case direction - :asc :asc - :desc :desc - ;; old MBQL 95 names - :ascending :asc - :descending :desc) - (wrap-implicit-field-id field-id)]))) + [clauses] + (mbql.u/replace clauses + seq? (recur (vec &match)) + + ;; MBQL 95 reversed [<field> <direction>] clause + [field :asc] (recur [:asc field]) + [field :desc] (recur [:desc field]) + [field :ascending] (recur [:asc field]) + [field :descending] (recur [:desc field]) + + ;; MBQL 95 names but MBQL 98+ reversed syntax + [:ascending field] (recur [:asc field]) + [:descending field] (recur [:desc field]) + + [:asc field] [:asc (wrap-implicit-field-id field)] + [:desc field] [:desc (wrap-implicit-field-id field)])) (declare canonicalize-inner-mbql-query) @@ -457,8 +462,8 @@ (fn [clause] (if-not (mbql-clause? clause) clause - (let [[clause-name & args] clause - f (mbql-clause->canonicalization-fn clause-name)] + (let [[clause-name & _] clause + f (mbql-clause->canonicalization-fn clause-name)] (if f (apply f clause) clause)))) @@ -480,8 +485,6 @@ ;;; | REMOVING EMPTY CLAUSES | ;;; +----------------------------------------------------------------------------------------------------------------+ -;; TODO - can't we combine these functions into a single fn? - (defn- non-empty-value? "Is this 'value' in a query map considered non-empty (e.g., should we refrain from removing that key entirely?) e.g.: @@ -512,8 +515,6 @@ (walk/postwalk (fn [x] (cond - ;; TODO - we can remove this part once we take out the `expand` namespace. This is here just to prevent - ;; double-expansion from barfing (record? x) x @@ -543,6 +544,7 @@ (normalize-fragment [:query :filter] [\"=\" 100 200]) ;;-> [:= [:field-id 100] 200]" + {:style/indent 1} [path x] (if-not (seq path) (normalize x) diff --git a/src/metabase/mbql/util.clj b/src/metabase/mbql/util.clj index f8129cd37aff5f9bb673f7c3ecf939cbd3b42de0..ba31f6f9098f2d9e7d702f181455dc22746e7b2b 100644 --- a/src/metabase/mbql/util.clj +++ b/src/metabase/mbql/util.clj @@ -1,11 +1,16 @@ (ns metabase.mbql.util "Utilitiy functions for working with MBQL queries." + (:refer-clojure :exclude [replace]) (:require [clojure [string :as str] [walk :as walk]] [metabase.mbql.schema :as mbql.s] + [metabase.mbql.util.match :as mbql.match] [metabase.util :as u] - [metabase.util.schema :as su] + [metabase.util + [date :as du] + [i18n :refer [tru]] + [schema :as su]] [schema.core :as s])) (s/defn normalize-token :- s/Keyword @@ -36,30 +41,133 @@ ((set k-or-ks) (first x)) (= k-or-ks (first x))))) -(defn clause-instances - "Return a sequence of all the instances of clause(s) in `x`. Like `is-clause?`, you can either look for instances of a - single clause by passing a single keyword or for instances of multiple clauses by passing a set of keywords. Returns - `nil` if no instances were found. - ;; look for :field-id clauses - (clause-instances :field-id {:query {:filter [:= [:field-id 10] 20]}}) - ;;-> [[:field-id 10]] +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | Match & Replace | +;;; +----------------------------------------------------------------------------------------------------------------+ + +(defmacro match + "Return a sequence of things that match a `pattern` or `patterns` inside `x`, presumably a query, returning `nil` if + there are no matches. Recurses through maps and sequences. `pattern` can be one of several things: + + * Keyword name of an MBQL clause + * Set of keyword names of MBQL clauses. Matches any clauses with those names + * A `core.match` pattern + * A symbol naming a class. + * A symbol naming a predicate function + * `_`, which will match anything + + Examples: + + ;; keyword pattern + (match {:fields [[:field-id 10]]} :field-id) ; -> [[:field-id 10]] + + ;; set of keywords + (match some-query #{:field-id :fk->}) ; -> [[:field-id 10], [:fk-> [:field-id 10] [:field-id 20]], ...] + + ;; `core.match` patterns: + ;; match any `:field-id` clause with one arg (which should be all of them) + (match some-query [:field-id _]) + (match some-query [:field-id (_ :guard #(> % 100))]) ; -> [[:field-id 200], ...] + + ;; symbol naming a Class + ;; match anything that is an instance of that class + (match some-query java.util.Date) ; -> [[#inst \"2018-10-08\", ...] + + ;; symbol naming a predicate function + ;; match anything that satisfies that predicate + (match some-query (every-pred integer? even?)) ; -> [2 4 6 8] + + ;; match anything with `_` + (match 100 `_`) ; -> 100 + + + ### Using `core.match` patterns + + See [`core.match` documentation](`https://github.com/clojure/core.match/wiki/Overview`) for more details. - ;; look for :+ or :- clauses - (clause-instances #{:+ :-} ...) + Pattern-matching works almost exactly the way it does when using `core.match/match` directly, with a few + differences: - By default, this will not include subclauses of any clauses it finds, but you can toggle this behavior with the - `include-subclauses?` option: + * `mbql.util/match` returns a sequence of everything that matches, rather than the first match it finds + + * patterns are automatically wrapped in vectors for you when appropriate + + * things like keywords and classes are automatically converted to appropriate patterns for you + + * this macro automatically recurses through sequences and maps as a final `:else` clause. If you don't want to + automatically recurse, use a catch-all pattern (such as `_`). Our macro implementation will optimize out this + `:else` clause if the last pattern is `_` + + ### Returing something other than the exact match with result body + + By default, `match` returns whatever matches the pattern you pass in. But what if you only want to return part of + the match? You can, using `core.match` binding facilities. Bind relevant things in your pattern and pass in the + optional result body. Whatever result body returns will be returned by `match`: + + ;; just return the IDs of Field ID clauses + (match some-query [:field-id id] id) ; -> [1 2 3] + + You can also use result body to filter results; any `nil` values will be skipped: + + (match some-query [:field-id id] + (when (even? id) + id)) + ;; -> [2 4 6 8] + + Of course, it's more efficient to let `core.match` compile an efficient matching function, so prefer using + patterns with `:guard` where possible. + + You can also call `recur` inside result bodies, to use the same matching logic against a different value. + 0 + ### `&match` and `&parents` anaphors + + For more advanced matches, like finding `:field-id` clauses nested anywhere inside `:datetime-field` clauses, + `match` binds a pair of anaphors inside the result body for your convenience. `&match` is bound to the entire + match, regardless of how you may have destructured it; `&parents` is bound to a sequence of keywords naming the + parent top-level keys and clauses of the match. + + (mbql.u/match {:fields [[:datetime-field [:fk-> [:field-id 1] [:field-id 2]] :day]]} :field-id + ;; &parents will be [:fields :datetime-field :fk->] + (when (contains? (set &parents) :datetime-field) + &match)) + ;; -> [[:field-id 1] [:field-id 2]]" + {:style/indent 1} + [x & patterns-and-results] + ;; Actual implementation of these macros is in `mbql.util.match`. They're in a seperate namespace because they have + ;; lots of other functions and macros they use for their implementation (which means they have to be public) that we + ;; would like to discourage you from using directly. + `(mbql.match/match ~x ~patterns-and-results)) + +(defmacro match-one + "Like `match` but returns a single match rather than a sequence of matches." + {:style/indent 1} + [x & patterns-and-results] + `(first (mbql.match/match ~x ~patterns-and-results))) - (clause-instances #{:field-id :fk->} [[:field-id 1] [:fk-> [:field-id 2] [:field-id 3]]]) - ;; -> [[:field-id 1] - [:fk-> [:field-id 2] [:field-id 3]]] - (clause-instances #{:field-id :fk->} [[:field-id 1] [:fk-> [:field-id 2] [:field-id 3]]], :include-subclauses? true) - ;; -> [[:field-id 1] - [:fk-> [:field-id 2] [:field-id 3]] - [:field-id 2] - [:field-id 3]]" +(defmacro replace + "Like `match`, but replace matches in `x` with the results of result body. The same pattern options are supported, + and `&parents` and `&match` anaphors are available in the same way. (`&match` is particularly useful here if you + want to use keywords or sets of keywords as patterns.)" + {:style/indent 1} + [x & patterns-and-results] + ;; as with `match` actual impl is in `match` namespace to discourage you from using the constituent functions and + ;; macros that power this macro directly + `(mbql.match/replace ~x ~patterns-and-results)) + +(defmacro replace-in + "Like `replace`, but only replaces things in the part of `x` in the keypath `ks` (i.e. the way to `update-in` works.)" + {:style/indent 2} + [x ks & patterns-and-results] + `(let [form# ~x + ks# ~ks] + (if-not (seq (get-in form# ks#)) + form# + (update-in form# ks# #(mbql.match/replace % ~patterns-and-results))))) + +(defn ^:deprecated clause-instances + "DEPRECATED: use `match` instead." {:style/indent 1} [k-or-ks x & {:keys [include-subclauses?], :or {include-subclauses? false}}] (let [instances (atom [])] @@ -73,12 +181,8 @@ x) (seq @instances))) -(defn replace-clauses - "Walk a query looking for clauses named by keyword or set of keywords `k-or-ks` and replace them the results of a call - to `(f clause)`. - - (replace-clauses {:filter [:= [:field-id 10] 100]} :field-id (constantly 200)) - ;; -> {:filter [:= 200 100]}" +(defn ^:deprecated replace-clauses + "DEPRECATED: use `replace` instead." {:style/indent 2} [query k-or-ks f] (walk/postwalk @@ -88,12 +192,8 @@ clause)) query)) -(defn replace-clauses-in - "Replace clauses only in a subset of `query`, defined by `keypath`. - - (replace-clauses-in {:filter [:= [:field-id 10] 100], :breakout [:field-id 100]} [:filter] :field-id - (constantly 200)) - ;; -> {:filter [:= 200 100], :breakout [:field-id 100]}" +(defn ^:deprecated replace-clauses-in + "DEPRECATED: use `replace-in` instead!" {:style/indent 3} [query keypath k-or-ks f] (update-in query keypath #(replace-clauses % k-or-ks f))) @@ -105,49 +205,60 @@ ;; TODO - I think we actually should move this stuff into a `mbql.helpers` namespace so we can use the util functions ;; above in the `schema.helpers` namespace instead of duplicating them +(defn- combine-compound-filters-of-type [compound-type subclauses] + + (mapcat #(match-one % + [(_ :guard (partial = compound-type)) & args] + args + + _ + [&match]) + subclauses)) -(s/defn simplify-compound-filter :- mbql.s/Filter +(s/defn simplify-compound-filter :- (s/maybe mbql.s/Filter) "Simplify compound `:and`, `:or`, and `:not` compound filters, combining or eliminating them where possible. This - also fixes theoretically disallowed compound filters like `:and` with only a single subclause." - [[filter-name & args :as filter-clause]] - (cond + also fixes theoretically disallowed compound filters like `:and` with only a single subclause, and eliminates `nils` + from the clauses." + [filter-clause] + (replace filter-clause + seq? (recur (vec &match)) + + ;; if this an an empty filter, toss it + nil nil + [] nil + [(:or :and :or)] nil + + ;; if the clause contains any nils, toss them + [& (args :guard (partial some nil?))] + (recur (filterv some? args)) + ;; for `and` or `not` compound filters with only one subclase, just unnest the subclause - (and (#{:and :or} filter-name) - (= (count args) 1)) - (recur (first args)) + [(:or :and :or) arg] (recur arg) ;; for `and` and `not` compound filters with subclauses of the same type pull up any compounds of the same type ;; e.g. [:and :a [:and b c]] ; -> [:and a b c] - (and (#{:and :or} filter-name) - (some (partial is-clause? filter-name) args)) - (recur - (vec (cons filter-name (mapcat (fn [subclause] - (if (is-clause? filter-name subclause) - (rest subclause) - [subclause])) - args)))) + [:and & (args :guard (partial some (partial is-clause? :and)))] + (recur (apply vector :and (combine-compound-filters-of-type :and args))) + + [:or & (args :guard (partial some (partial is-clause? :or)))] + (recur (apply vector :or (combine-compound-filters-of-type :or args))) ;; for `and` or `or` clauses with duplicate args, remove the duplicates and recur - (and (#{:and :or} filter-name) - (not= (count args) (count (distinct args)))) - (recur (vec (cons filter-name (distinct args)))) + [(clause :guard #{:and :or}) & (args :guard #(not (apply distinct? %)))] + (recur (apply vector clause (distinct args))) ;; for `not` that wraps another `not`, eliminate both - (and (= :not filter-name) - (is-clause? :not (first args))) - (recur (second (first args))) + [:not [:not arg]] + (recur arg) :else filter-clause)) -;; TODO - we should validate the query against the Query schema and the output as well. Flip that on once the schema -;; is locked-in 100% - (s/defn combine-filter-clauses :- mbql.s/Filter "Combine two filter clauses into a single clause in a way that minimizes slapping a bunch of `:and`s together if possible." [filter-clause & more-filter-clauses] - (simplify-compound-filter (vec (cons :and (filter identity (cons filter-clause more-filter-clauses)))))) + (simplify-compound-filter (cons :and (cons filter-clause more-filter-clauses)))) (s/defn add-filter-clause :- mbql.s/Query "Add an additional filter clause to an `outer-query`. If `new-clause` is `nil` this is a no-op." @@ -157,8 +268,9 @@ (update-in outer-query [:query :filter] combine-filter-clauses new-clause))) -(defn query->source-table-id - "Return the source Table ID associated with `query`, if applicable; handles nested queries as well." +(s/defn query->source-table-id :- (s/maybe su/IntGreaterThanZero) + "Return the source Table ID associated with `query`, if applicable; handles nested queries as well. If `query` is + `nil`, returns `nil`." {:argslists '([outer-query])} [{{source-table-id :source-table, source-query :source-query} :query, query-type :type, :as query}] (cond @@ -174,12 +286,40 @@ (and (nil? source-table-id) source-query) (recur (assoc query :query source-query)) + ;; if ID is a `card__id` form that can only mean we haven't preprocessed the query and resolved the source query. + ;; This is almost certainly an accident, so throw an Exception so we can make the proper fixes + ((every-pred string? (partial re-matches mbql.s/source-table-card-id-regex)) source-table-id) + (throw + (Exception. + (str + (tru "Error: query's source query has not been resolved. You probably need to `preprocess` the query first.")))) + ;; otherwise resolve the source Table :else source-table-id)) +(s/defn unwrap-field-clause :- (s/if (partial is-clause? :field-id) + mbql.s/field-id + mbql.s/field-literal) + "Un-wrap a `Field` clause and return the lowest-level clause it wraps, either a `:field-id` or `:field-literal`." + [[clause-name x y, :as clause] :- mbql.s/Field] + ;; TODO - could use `match` to do this + (case clause-name + :field-id clause + :fk-> (recur y) + :field-literal clause + :datetime-field (recur x) + :binning-strategy (recur x))) + +(defn maybe-unwrap-field-clause + "Unwrap a Field `clause`, if it's something that can be unwrapped (i.e. something that is, or wraps, a `:field-id` or + `:field-literal`). Otherwise return `clause` as-is." + [clause] + (if (is-clause? #{:field-id :fk-> :field-literal :datetime-field :binning-strategy} clause) + (unwrap-field-clause clause) + clause)) -(defn field-clause->id-or-literal +(s/defn field-clause->id-or-literal :- (s/cond-pre su/IntGreaterThanZero su/NonBlankString) "Get the actual Field ID or literal name this clause is referring to. Useful for seeing if two Field clauses are referring to the same thing, e.g. @@ -188,24 +328,92 @@ For expressions (or any other clauses) this returns the clause as-is, so as to facilitate the primary use case of comparing Field clauses." - [[clause-name x y, :as clause]] - (case clause-name - :field-id x - :fk-> (recur y) - :field-literal x - :datetime-field (recur x) - :binning-strategy (recur x) - ;; for anything else, including expressions and ag clause references, just return the clause as-is - clause)) + [clause :- mbql.s/Field] + (second (unwrap-field-clause clause))) (s/defn add-order-by-clause :- mbql.s/Query "Add a new `:order-by` clause to an MBQL query. If the new order-by clause references a Field that is already being used in another order-by clause, this function does nothing." - [outer-query :- mbql.s/Query, order-by-clause :- mbql.s/OrderBy] - (let [existing-clauses (set (map (comp field-clause->id-or-literal second) - (-> outer-query :query :order-by)))] - (if (existing-clauses (field-clause->id-or-literal (second order-by-clause))) + [outer-query :- mbql.s/Query, [_ field, :as order-by-clause] :- mbql.s/OrderBy] + (let [existing-fields (set (for [[_ existing-field] (-> outer-query :query :order-by)] + (maybe-unwrap-field-clause existing-field)))] + (if (existing-fields (maybe-unwrap-field-clause field)) ;; Field already referenced, nothing to do outer-query ;; otherwise add new clause at the end (update-in outer-query [:query :order-by] (comp vec conj) order-by-clause)))) + + +(s/defn add-datetime-units :- mbql.s/DateTimeValue + "Return a `relative-datetime` clause with `n` units added to it." + [absolute-or-relative-datetime :- mbql.s/DateTimeValue + n :- s/Num] + (if (is-clause? :relative-datetime absolute-or-relative-datetime) + (let [[_ original-n unit] absolute-or-relative-datetime] + [:relative-datetime (+ n original-n) unit]) + (let [[_ timestamp unit] absolute-or-relative-datetime] + (du/relative-date unit n timestamp)))) + + +(defn dispatch-by-clause-name-or-class + "Dispatch function perfect for use with multimethods that dispatch off elements of an MBQL query. If `x` is an MBQL + clause, dispatches off the clause name; otherwise dispatches off `x`'s class." + [x] + (if (mbql-clause? x) + (first x) + (class x))) + + +(s/defn fk-clause->join-info :- (s/maybe mbql.s/JoinInfo) + "Return the matching info about the JOINed for the 'destination' Field in an `fk->` clause. + + (fk-clause->join-alias [:fk-> [:field-id 1] [:field-id 2]]) + ;; -> \"orders__via__order_id\"" + [query :- mbql.s/Query, [_ source-field-clause] :- mbql.s/fk->] + (let [source-field-id (field-clause->id-or-literal source-field-clause)] + (some (fn [{:keys [fk-field-id], :as info}] + (when (= fk-field-id source-field-id) + info)) + (-> query :query :join-tables)))) + + +(s/defn expression-with-name :- mbql.s/ExpressionDef + "Return the `Expression` referenced by a given `expression-name`." + [query :- mbql.s/Query, expression-name :- su/NonBlankString] + (or (get-in query, [:query :expressions (keyword expression-name)]) + (throw (Exception. (str (tru "No expression named ''{0}''" (name expression-name))))))) + + +(s/defn aggregation-at-index :- mbql.s/Aggregation + "Fetch the aggregation at index. This is intended to power aggregate field references (e.g. [:aggregation 0]). + This also handles nested queries, which could be potentially ambiguous if multiple levels had aggregations. To + support nested queries, you'll need to keep tract of how many `:source-query`s deep you've traveled; pass in this + number to as optional arg `nesting-level` to make sure you reference aggregations at the right level of nesting." + ([query index] + (aggregation-at-index query index 0)) + ([query :- mbql.s/Query, index :- su/NonNegativeInt, nesting-level :- su/NonNegativeInt] + (if (zero? nesting-level) + (or (nth (get-in query [:query :aggregation]) index) + (throw (Exception. (str (tru "No aggregation at index: {0}" index))))) + ;; keep recursing deeper into the query until we get to the same level the aggregation reference was defined at + (recur {:query (get-in query [:query :source-query])} index (dec nesting-level))))) + +(defn ga-id? + "Is this ID (presumably of a Metric or Segment) a GA one?" + [id] + (boolean + (when ((some-fn string? keyword?) id) + (re-find #"^ga(id)?:" (name id))))) + +(defn ga-metric-or-segment? + "Is this metric or segment clause not a Metabase Metric or Segment, but rather a GA one? E.g. something like `[:metric + ga:users]`. We want to ignore those because they're not the same thing at all as MB Metrics/Segments and don't + correspond to objects in our application DB." + [[_ id]] + (ga-id? id)) + +(defn datetime-field? + "Does `field` have a base type or special type that derives from `:type/DateTime`?" + [field] + (or (isa? (:base_type field) :type/DateTime) + (isa? (:special_type field) :type/DateTime))) diff --git a/src/metabase/mbql/util/match.clj b/src/metabase/mbql/util/match.clj new file mode 100644 index 0000000000000000000000000000000000000000..a8597da860fe49f19d1e4a0f14b027feb9fc63cf --- /dev/null +++ b/src/metabase/mbql/util/match.clj @@ -0,0 +1,152 @@ +(ns metabase.mbql.util.match + "Internal implementation of the MBQL `match` and `replace` macros. Don't use these directly." + (:refer-clojure :exclude [replace]) + (:require [clojure.core.match :as match] + [clojure.walk :as walk])) + +;; TODO - I'm not 100% sure we actually need to keep the `&parents` anaphor around, because nobody is actually using +;; it, which makes it dead weight + +;; have to do this at runtime because we don't know if a symbol is a class or pred or whatever when we compile the macro +(defn match-with-pred-or-class + "Return a function to use for pattern matching via `core.match`'s `:guard` functionality based on the value of a + `pred-or-class` passed in as a pattern to `match` or `replace`." + [pred-or-class] + (cond + (class? pred-or-class) + (partial instance? pred-or-class) + + (fn? pred-or-class) + pred-or-class + + :else + ;; this is dev-specific so we don't need to localize it + (throw (IllegalArgumentException. "Invalid pattern: don't know how to handle symbol.")))) + +(defn- generate-pattern + "Generate a single approprate pattern for use with core.match based on the `pattern` input passed into `match` or + `replace`. " + [pattern] + (cond + (keyword? pattern) + [[pattern '& '_]] + + (and (set? pattern) (every? keyword? pattern)) + [[`(:or ~@pattern) '& '_]] + + ;; special case for `_`, we'll let you match anything with that + (= pattern '_) + [pattern] + + (symbol? pattern) + `[(~'_ :guard (match-with-pred-or-class ~pattern))] + + :else + [pattern])) + +(defn- recur-form? [form] + (and (seq? form) + (= 'recur (first form)))) + +(defn- rewrite-recurs + "Replace any `recur` forms with ones that include the implicit `&parents` arg." + [fn-name result-form] + (walk/postwalk + (fn [form] + (if (recur-form? form) + ;; we *could* use plain `recur` here, but `core.match` cannot apply code size optimizations if a `recur` form + ;; is present. Instead, just do a non-tail-call-optimized call to the pattern fn so `core.match` can generate + ;; efficient code. + ;; + ;; (recur [:new-clause ...]) ; -> (match-123456 &parents [:new-clause ...]) + `(~fn-name ~'&parents ~@(rest form)) + form)) + result-form)) + +(defn- generate-patterns-and-results + "Generate the `core.match` patterns and results given the input to our macros. + + `wrap-result-forms?` will wrap the results parts of the pairs in a vector, so we do something like `(reduce concat)` + on all of the results to return a sequence of matches for `match`." + {:style/indent 1} + [fn-name patterns-and-results & {:keys [wrap-result-forms?]}] + (reduce + concat + (for [[pattern result] (partition 2 2 ['&match] patterns-and-results)] + [(generate-pattern pattern) (let [result (rewrite-recurs fn-name result)] + (if (or (not wrap-result-forms?) + (and (seq? result) + (= fn-name (first result)))) + result + [result]))]))) + + +;;; --------------------------------------------------- match-impl --------------------------------------------------- + +(defn match-in-collection + "Internal impl for `match`. If `form` is a collection, call `match-fn` to recursively look for matches in it." + [match-fn clause-parents form] + {:pre [(fn? match-fn) (vector? clause-parents)]} + (cond + (map? form) + (reduce concat (for [[k v] form] + (match-fn (conj clause-parents k) v))) + + (sequential? form) + (mapcat (partial match-fn (if (keyword? (first form)) + (conj clause-parents (first form)) + clause-parents)) + form))) + +(defn- skip-else-clause? + "If the last pattern passed in was `_`, we can skip generating the default `:else` clause, because it will never + match." + ;; TODO - why don't we just let people pass their own `:else` clause instead? + [patterns-and-results] + (= '_ (second (reverse patterns-and-results)))) + +(defmacro match + "Internal impl for `match`. Generate a pattern-matching function using `core.match`, and call it with `form`." + [form patterns-and-results] + (let [match-fn-symb (gensym "match-")] + `(seq + (filter + some? + ((fn ~match-fn-symb [~'&parents ~'&match] + (match/match [~'&match] + ~@(generate-patterns-and-results match-fn-symb patterns-and-results, :wrap-result-forms? true) + ~@(when-not (skip-else-clause? patterns-and-results) + [:else `(match-in-collection ~match-fn-symb ~'&parents ~'&match)]))) + [] + ~form))))) + + +;;; -------------------------------------------------- replace impl -------------------------------------------------- + +(defn replace-in-collection + "Inernal impl for `replace`. Recursively replace values in a collection using a `replace-fn`." + [replace-fn clause-parents form] + (cond + (map? form) + (into form (for [[k v] form] + [k (replace-fn (conj clause-parents k) v)])) + + (sequential? form) + (mapv (partial replace-fn (if (keyword? (first form)) + (conj clause-parents (first form)) + clause-parents)) + form) + :else form)) + +(defmacro replace + "Internal implementation for `replace`. Generate a pattern-matching function with `core.match`, and use it to replace + matching values in `form`." + [form patterns-and-results] + (let [replace-fn-symb (gensym "replace-")] + `((fn ~replace-fn-symb [~'&parents ~'&match] + (match/match [~'&match] + ~@(generate-patterns-and-results replace-fn-symb patterns-and-results, :wrap-result-forms? false) + ~@(when-not (skip-else-clause? patterns-and-results) + [:else `(replace-in-collection ~replace-fn-symb ~'&parents ~'&match)]))) + [] + ~form))) diff --git a/test/metabase/mbql/normalize_test.clj b/test/metabase/mbql/normalize_test.clj index 9a648a1b41c45fca1aca688fc41d24ef3eac1033..c53327469a1cf3dc04e5d2a0c4dfeb5d5cfcd7b1 100644 --- a/test/metabase/mbql/normalize_test.clj +++ b/test/metabase/mbql/normalize_test.clj @@ -22,7 +22,49 @@ (#'normalize/normalize-tokens {:native {:query {:NAME "FAKE_QUERY" :description "Theoretical fake query in a JSON-based query lang"}}})) -;; do aggregations get normalized? +;; METRICS shouldn't get normalized in some kind of wacky way +(expect + {:aggregation [:+ [:metric 10] 1]} + (#'normalize/normalize-tokens {:aggregation ["+" ["METRIC" 10] 1]})) + +;; Nor should SEGMENTS +(expect + {:filter [:= [:+ [:segment 10] 1] 10]} + (#'normalize/normalize-tokens {:filter ["=" ["+" ["SEGMENT" 10] 1] 10]})) + +;; are expression names exempt from lisp-casing/lower-casing? +(expect + {:query {:expressions {:sales_tax [:- [:field-id 10] [:field-id 20]]}}} + (#'normalize/normalize-tokens {"query" {"expressions" {:sales_tax ["-" ["field-id" 10] ["field-id" 20]]}}})) + +;; expression names should always be keywords +(expect + {:query {:expressions {:sales_tax [:- [:field-id 10] [:field-id 20]]}}} + (#'normalize/normalize-tokens {"query" {"expressions" {:sales_tax ["-" ["field-id" 10] ["field-id" 20]]}}})) + +;; expression references should be exempt too +(expect + {:order-by [[:desc [:expression "SALES_TAX"]]]} + (#'normalize/normalize-tokens {:order-by [[:desc [:expression "SALES_TAX"]]]}) ) + +;; ... but they should be converted to strings if passed in as a KW for some reason. Make sure we preserve namespace! +(expect + {:order-by [[:desc [:expression "SALES/TAX"]]]} + (#'normalize/normalize-tokens {:order-by [[:desc ["expression" :SALES/TAX]]]})) + +;; field literals should be exempt too +(expect + {:order-by [[:desc [:field-literal "SALES_TAX" :type/Number]]]} + (#'normalize/normalize-tokens {:order-by [[:desc [:field-literal "SALES_TAX" :type/Number]]]}) ) + +;; ... but they should be converted to strings if passed in as a KW for some reason +(expect + {:order-by [[:desc [:field-literal "SALES/TAX" :type/Number]]]} + (#'normalize/normalize-tokens {:order-by [[:desc ["field_literal" :SALES/TAX "type/Number"]]]})) + + +;;; -------------------------------------------------- aggregation --------------------------------------------------- + (expect {:query {:aggregation :rows}} (#'normalize/normalize-tokens {:query {"AGGREGATION" "ROWS"}})) @@ -83,45 +125,8 @@ {:query {:aggregation [:+ [:sum 10] [:sum 20] [:sum 30]]}} (#'normalize/normalize-tokens {:query {:aggregation ["+" ["sum" 10] ["SUM" 20] ["sum" 30]]}})) -;; METRICS shouldn't get normalized in some kind of wacky way -(expect - {:aggregation [:+ [:metric 10] 1]} - (#'normalize/normalize-tokens {:aggregation ["+" ["METRIC" 10] 1]})) - -;; Nor should SEGMENTS -(expect - {:filter [:= [:+ [:segment 10] 1] 10]} - (#'normalize/normalize-tokens {:filter ["=" ["+" ["SEGMENT" 10] 1] 10]})) - -;; are expression names exempt from lisp-casing/lower-casing? -(expect - {:query {:expressions {:sales_tax [:- [:field-id 10] [:field-id 20]]}}} - (#'normalize/normalize-tokens {"query" {"expressions" {:sales_tax ["-" ["field-id" 10] ["field-id" 20]]}}})) - -;; expression names should always be keywords -(expect - {:query {:expressions {:sales_tax [:- [:field-id 10] [:field-id 20]]}}} - (#'normalize/normalize-tokens {"query" {"expressions" {:sales_tax ["-" ["field-id" 10] ["field-id" 20]]}}})) - -;; expression references should be exempt too -(expect - {:order-by [[:desc [:expression "SALES_TAX"]]]} - (#'normalize/normalize-tokens {:order-by [[:desc [:expression "SALES_TAX"]]]}) ) - -;; ... but they should be converted to strings if passed in as a KW for some reason. Make sure we preserve namespace! -(expect - {:order-by [[:desc [:expression "SALES/TAX"]]]} - (#'normalize/normalize-tokens {:order-by [[:desc ["expression" :SALES/TAX]]]})) - -;; field literals should be exempt too -(expect - {:order-by [[:desc [:field-literal "SALES_TAX" :type/Number]]]} - (#'normalize/normalize-tokens {:order-by [[:desc [:field-literal "SALES_TAX" :type/Number]]]}) ) -;; ... but they should be converted to strings if passed in as a KW for some reason -(expect - {:order-by [[:desc [:field-literal "SALES/TAX" :type/Number]]]} - (#'normalize/normalize-tokens {:order-by [[:desc ["field_literal" :SALES/TAX "type/Number"]]]})) +;;; ---------------------------------------------------- order-by ---------------------------------------------------- ;; does order-by get properly normalized? (expect @@ -140,6 +145,9 @@ {:query {:order-by [[:desc [:field-id 10]]]}} (#'normalize/normalize-tokens {:query {"ORDER_BY" [["DESC" ["field_id" 10]]]}})) + +;;; ----------------------------------------------------- filter ----------------------------------------------------- + ;; the unit & amount in time interval clauses should get normalized (expect {:query {:filter [:time-interval 10 :current :day]}} @@ -180,6 +188,9 @@ {:query {:filter [:starts-with 10 "ABC" {:case-sensitive true}]}} (#'normalize/normalize-tokens {:query {"FILTER" ["starts_with" 10 "ABC" {"case_sensitive" true}]}})) + +;;; --------------------------------------------------- parameters --------------------------------------------------- + ;; make sure we're not running around trying to normalize the type in native query params (expect {:type :native @@ -245,6 +256,8 @@ :target ["dimension" ["template-tag" "names_list"]] :value ["=" 10 20]}]})) +;;; ------------------------------------------------- source queries ------------------------------------------------- + ;; Make sure token normalization works correctly on source queries (expect {:database 4 @@ -274,6 +287,9 @@ :type :query :query {"source_query" {"source_table" 1, "aggregation" "rows"}}})) + +;;; ----------------------------------------------------- other ------------------------------------------------------ + ;; Does the QueryExecution context get normalized? (expect {:context :json-download} @@ -298,19 +314,36 @@ [:field-id 10] (#'normalize/wrap-implicit-field-id [:field-id 10])) +;; make sure `binning-strategy` wraps implicit Field IDs +(expect + {:query {:breakout [[:binning-strategy [:field-id 10] :bin-width 2000]]}} + (#'normalize/canonicalize {:query {:breakout [[:binning-strategy 10 :bin-width 2000]]}})) + + +;;; -------------------------------------------------- aggregation --------------------------------------------------- + ;; Do aggregations get canonicalized properly? +;; field ID should get wrapped in field-id and ags should be converted to multiple ag syntax (expect {:query {:aggregation [[:count [:field-id 10]]]}} (#'normalize/canonicalize {:query {:aggregation [:count 10]}})) +;; ag with no Field ID (expect {:query {:aggregation [[:count]]}} (#'normalize/canonicalize {:query {:aggregation [:count]}})) +;; if already wrapped in field-id it's ok (expect {:query {:aggregation [[:count [:field-id 1000]]]}} (#'normalize/canonicalize {:query {:aggregation [:count [:field-id 1000]]}})) +;; ags in the canonicalized format should pass thru ok +(expect + {:query {:aggregation [[:metric "ga:sessions"] [:metric "ga:1dayUsers"]]}} + (#'normalize/canonicalize + {:query {:aggregation [[:metric "ga:sessions"] [:metric "ga:1dayUsers"]]}})) + ;; :rows aggregation type, being deprecated since FOREVER, should just get removed (expect {:query {:aggregation []}} @@ -389,6 +422,20 @@ {:query {:aggregation [[:cum-count [:field-id 10]]]}} (#'normalize/canonicalize {:query {:aggregation [:cum-count 10]}})) +;; should handle seqs without a problem +(expect + {:query {:aggregation [[:min [:field-id 1]] [:min [:field-id 2]]]}} + (#'normalize/canonicalize {:query {:aggregation '([:min 1] [:min 2])}})) + +;; make sure canonicalization can handle aggregations with expressions where the Field normally goes +(expect + {:query {:aggregation [[:sum [:* [:field-id 4] [:field-id 1]]]]}} + (#'normalize/canonicalize + {:query {:aggregation [[:sum [:* [:field-id 4] [:field-id 1]]]]}})) + + +;;; ---------------------------------------------------- breakout ---------------------------------------------------- + ;; implicit Field IDs should get wrapped in [:field-id] in :breakout (expect {:query {:breakout [[:field-id 10]]}} @@ -398,10 +445,18 @@ {:query {:breakout [[:field-id 10] [:field-id 20]]}} (#'normalize/canonicalize {:query {:breakout [10 20]}})) +;; should handle seqs +(expect + {:query {:breakout [[:field-id 10] [:field-id 20]]}} + (#'normalize/canonicalize {:query {:breakout '(10 20)}})) + (expect {:query {:breakout [[:field-id 1000]]}} (#'normalize/canonicalize {:query {:breakout [[:field-id 1000]]}})) + +;;; ----------------------------------------------------- fields ----------------------------------------------------- + (expect {:query {:fields [[:field-id 10]]}} (#'normalize/canonicalize {:query {:fields [10]}})) @@ -415,6 +470,14 @@ {:query {:fields [[:field-id 1000]]}} (#'normalize/canonicalize {:query {:fields [[:field-id 1000]]}})) +;; should handle seqs +(expect + {:query {:fields [[:field-id 10] [:field-id 20]]}} + (#'normalize/canonicalize {:query {:fields '(10 20)}})) + + +;;; ----------------------------------------------------- filter ----------------------------------------------------- + ;; implicit Field IDs should get wrapped in [:field-id] in filters (expect {:query {:filter [:= [:field-id 10] 20]}} @@ -534,6 +597,20 @@ [:segment "gaid:-11"] [:time-interval [:field-id 6851] -365 :day {}]]}})) +;; should handle seqs +(expect + {:query {:filter [:and [:= [:field-id 100] 1] [:= [:field-id 200] 2]]}} + (#'normalize/canonicalize {:query {:filter '(:and + [:= 100 1] + [:= 200 2])}})) + +;; if you put a `:datetime-field` inside a `:time-interval` we should fix it for you +(expect + {:query {:filter [:time-interval [:field-id 8] -30 :day]}} + (#'normalize/canonicalize {:query {:filter [:time-interval [:datetime-field [:field-id 8] :month] -30 :day]}})) + +;;; ---------------------------------------------------- order-by ---------------------------------------------------- + ;; ORDER BY: MBQL 95 [field direction] should get translated to MBQL 98 [direction field] (expect {:query {:order-by [[:asc [:field-id 10]]]}} @@ -578,11 +655,13 @@ {:query {:filter [:= [:field-id 1] 10]}} (#'normalize/canonicalize {:query {:filter [:= [:field-id [:field-id 1]] 10]}})) -;; make sure `binning-strategy` wraps implicit Field IDs +;; we should handle seqs no prob (expect - {:query {:breakout [[:binning-strategy [:field-id 10] :bin-width 2000]]}} - (#'normalize/canonicalize {:query {:breakout [[:binning-strategy 10 :bin-width 2000]]}})) + {:query {:filter [:= [:field-id 1] 10]}} + (#'normalize/canonicalize {:query {:filter '(:= 1 10)}})) + +;;; ------------------------------------------------- source queries ------------------------------------------------- ;; Make sure canonicalization works correctly on source queries (expect @@ -605,21 +684,16 @@ :required true :default "Widget"}}}}})) +;; make sure we recursively canonicalize source queries (expect - {:database 4, - :type :query, + {:database 4 + :type :query :query {:source-query {:source-table 1, :aggregation []}}} (#'normalize/canonicalize {:database 4 :type :query :query {:source-query {:source-table 1, :aggregation :rows}}})) -;; make sure canonicalization can handle aggregations with expressions where the Field normally goes -(expect - {:query {:aggregation [[:sum [:* [:field-id 4] [:field-id 1]]]]}} - (#'normalize/canonicalize - {:query {:aggregation [[:sum [:* [:field-id 4] [:field-id 1]]]]}})) - ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | REMOVE EMPTY CLAUSES | diff --git a/test/metabase/mbql/util_test.clj b/test/metabase/mbql/util_test.clj index 8c5368649b1c3a459c51496c9a5dd7b00f35b11f..75665fb9d9e9f979fc6549a70b555cd323a829c4 100644 --- a/test/metabase/mbql/util_test.clj +++ b/test/metabase/mbql/util_test.clj @@ -1,38 +1,325 @@ (ns metabase.mbql.util-test - (:require [expectations :refer :all] + (:require [expectations :refer [expect]] [metabase.mbql.util :as mbql.u])) -;; can we use `clause-instances` to find the instances of a clause? +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | match | +;;; +----------------------------------------------------------------------------------------------------------------+ + +;; can we use `match` to find the instances of a clause? (expect [[:field-id 10] [:field-id 20]] - (mbql.u/clause-instances :field-id {:query {:filter [:= - [:field-id 10] - [:field-id 20]]}})) + (mbql.u/match {:query {:filter [:= + [:field-id 10] + [:field-id 20]]}} + [:field-id & _])) -;; clause-instances shouldn't include subclauses of certain clauses if we don't want them +;; is `match` nice enought to automatically wrap raw keywords in appropriate patterns for us? (expect [[:field-id 1] - [:fk-> [:field-id 2] [:field-id 3]]] - (mbql.u/clause-instances #{:field-id :fk->} [[:field-id 1] [:fk-> [:field-id 2] [:field-id 3]]])) + [:field-id 2] + [:field-id 3]] + (mbql.u/match {:fields [[:field-id 1] [:fk-> [:field-id 2] [:field-id 3]]]} + :field-id)) -;; ...but we should be able to ask for them +;; if we pass a set of keywords, will that generate an appropriate pattern to match multiple clauses as well? (expect [[:field-id 1] - [:fk-> [:field-id 2] [:field-id 3]] [:field-id 2] - [:field-id 3]] - (mbql.u/clause-instances #{:field-id :fk->} - [[:field-id 1] [:fk-> [:field-id 2] [:field-id 3]]] - :include-subclauses? true)) + [:field-id 3] + [:datetime-field [:field-id 4]]] + (mbql.u/match {:fields [[:field-id 1] + [:fk-> [:field-id 2] [:field-id 3]] + [:datetime-field [:field-id 4]]]} + #{:field-id :datetime-field})) + +;; `match` shouldn't include subclauses of matches +(expect + [[:field-id 1] + [:fk-> [:field-id 2] [:field-id 3]]] + (mbql.u/match [[:field-id 1] [:fk-> [:field-id 2] [:field-id 3]]] + [(:or :field-id :fk->) & _])) (expect [[:field-id 10] [:field-id 20]] - (mbql.u/clause-instances #{:field-id :+ :-} - {:query {:filter [:= - [:field-id 10] - [:field-id 20]]}})) + (mbql.u/match {:query {:filter [:= + [:field-id 10] + [:field-id 20]]}} + [(:or :field-id :+ :-) & _])) + +;; can we use some of the cool features of pattern matching? +(def ^:private a-query + {:breakout [[:field-id 10] + [:field-id 20] + [:field-literal "Wow"]] + :fields [[:fk-> + [:field-id 30] + [:field-id 40]]]}) + +;; can we use the optional `result` parameter to find return something other than the whole clause? +(expect + [41] + ;; return just the dest IDs of Fields in a fk-> clause + (mbql.u/match a-query + [:fk-> _ [:field-id dest-id]] (inc dest-id))) + +(expect + [10 20] + (mbql.u/match (:breakout a-query) [:field-id id] id)) + +;; match should return `nil` if there are no matches so you don't need to call `seq` +(expect + nil + (mbql.u/match {} [:datetime-field _ unit] unit)) + +;; if pattern is just a raw keyword `match` should be kind enough to turn it into a pattern for you +(expect + [[:field-id 1] + [:field-id 2]] + (mbql.u/match {:fields [[:field-id 1] [:datetime-field [:field-id 2] :day]]} + :field-id)) + +;; can we `:guard` a pattern? +(expect + [[:field-id 2]] + (let [a-field-id 2] + (mbql.u/match {:fields [[:field-id 1] [:field-id 2]]} + [:field-id (id :guard (partial = a-field-id))]))) + +;; ok, if for some reason we can't use `:guard` in the pattern will `match` filter out nil results? +(expect + [2] + (mbql.u/match {:fields [[:field-id 1] [:field-id 2]]} + [:field-id id] + (when (= id 2) + id))) + +;; Ok, if we want to use predicates but still return the whole match, can we use the anaphoric `&match` symbol to +;; return the whole thing? +(def ^:private another-query + {:fields [[:field-id 1] + [:datetime-field [:field-id 2] :day] + [:datetime-field [:fk-> [:field-id 3] [:field-id 4]] :month]]}) + +(expect + [[:field-id 1] + [:field-id 2] + [:field-id 3] + [:field-id 4]] + (let [some-pred? (constantly true)] + (mbql.u/match another-query + :field-id + (when some-pred? + &match)))) + +;; can we use the anaphoric `&parents` symbol to examine the parents of the collection? let's see if we can match +;; `:field-id` clauses that are inside `:datetime-field` clauses, regardless of whether something else wraps them +(expect + [[:field-id 2] + [:field-id 3] + [:field-id 4]] + (mbql.u/match another-query + :field-id + (when (contains? (set &parents) :datetime-field) + &match))) + +;; can we match using a CLASS? +(expect + [#inst "2018-10-08T00:00:00.000-00:00"] + (mbql.u/match [[:field-id 1] + [:field-id 2] + #inst "2018-10-08" + 4000] + java.util.Date)) + +;; can we match using a PREDICATE? +(expect + [4000 5000] + ;; find the integer args to `:=` clauses that are not inside `:field-id` clauses + (mbql.u/match {:filter [:and + [:= [:field-id 1] 4000] + [:= [:field-id 2] 5000]]} + integer? + (when (= := (last &parents)) + &match))) + +;; how can we use predicates not named by a symbol? +(expect + [1 4000 2 5000] + (mbql.u/match {:filter [:and + [:= [:field-id 1] 4000] + [:= [:field-id 2] 5000]]} + (&match :guard #(integer? %)))) + +;; can we use `recur` inside a pattern? +(expect + [[0 :month]] + (mbql.u/match {:filter [:time-interval [:field-id 1] :current :month]} + [:time-interval field :current unit] (recur [:time-interval field 0 unit]) + [:time-interval _ n unit] [n unit])) + +;; can we short-circut a match to prevent recursive matching? +(expect + [10] + (mbql.u/match [[:field-id 10] + [:datetime-field [:field-id 20] :day]] + [:field-id id] id + [_ [:field-id & _] & _] nil)) + +;; can we use a list with a :guard clause? +(expect + [10 20] + (mbql.u/match {:query {:filter [:= + [:field-id 10] + [:field-id 20]]}} + (id :guard int?) id)) + + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | replace | +;;; +----------------------------------------------------------------------------------------------------------------+ + +;; can we use `replace` to replace a specific clause? +(expect + {:breakout [[:datetime-field [:field-id 10] :day] + [:datetime-field [:field-id 20] :day] + [:field-literal "Wow"]] + :fields [[:fk-> + [:datetime-field [:field-id 30] :day] + [:datetime-field [:field-id 40] :day]]]} + (mbql.u/replace a-query [:field-id id] + [:datetime-field [:field-id id] :day])) + +;; can we wrap the pattern in a map to restrict what gets replaced? +(expect + {:breakout [[:datetime-field [:field-id 10] :day] + [:datetime-field [:field-id 20] :day] + [:field-literal "Wow"]] + :fields [[:fk-> [:field-id 30] [:field-id 40]]]} + (mbql.u/replace-in a-query [:breakout] [:field-id id] + [:datetime-field [:field-id id] :day])) + +;; can we use multiple patterns at the same time?! +(expect + {:breakout [[:field-id 10] [:field-id 20] {:name "Wow"}], :fields [30]} + (mbql.u/replace a-query + [:fk-> [:field-id field-id] _] field-id + [:field-literal field-name] {:name field-name})) + +;; can we use `replace` to replace the ID of the dest Field in fk-> clauses? +(expect + {:breakout [[:field-id 10] + [:field-id 20] + [:field-literal "Wow"]] + :fields [[:fk-> [:field-id 30] [:field-id 100]]]} + (mbql.u/replace a-query [:fk-> source [:field-id 40]] + [:fk-> source [:field-id 100]])) + +;; can we use `replace` to fix `fk->` clauses where both args are unwrapped IDs? +(expect + {:query {:fields [[:fk-> [:field-id 1] [:field-id 2]] + [:fk-> [:field-id 3] [:field-id 4]]]}} + (mbql.u/replace-in + {:query {:fields [[:fk-> 1 2] + [:fk-> [:field-id 3] [:field-id 4]]]}} + [:query :fields] + [:fk-> (source :guard integer?) (dest :guard integer?)] + [:fk-> [:field-id source] [:field-id dest]])) + +;; does `replace` accept a raw keyword as the pattern the way `match` does? +(expect + {:fields ["WOW" + [:datetime-field "WOW" :day] + [:datetime-field [:fk-> "WOW" "WOW"] :month]]} + (mbql.u/replace another-query :field-id "WOW")) + +;; does `replace` accept a set of keywords the way `match` does? +(expect + {:fields ["WOW" "WOW" "WOW"]} + (mbql.u/replace another-query #{:datetime-field :field-id} "WOW")) + +;; can we use the anaphor `&match` to look at the entire match? +(expect + {:fields [[:field-id 1] + [:magical-field + [:datetime-field [:field-id 2] :day]] + [:magical-field + [:datetime-field [:fk-> [:field-id 3] [:field-id 4]] :month]]]} + (mbql.u/replace another-query :datetime-field [:magical-field &match])) + +;; can we use the anaphor `&parents` to look at the parents of the match? +(expect + {:fields + [[:field-id 1] + [:datetime-field "WOW" :day] + [:datetime-field [:fk-> "WOW" "WOW"] :month]]} + ;; replace field ID clauses that are inside a datetime-field clause + (mbql.u/replace another-query :field-id + (if (contains? (set &parents) :datetime-field) + "WOW" + &match))) + +;; can we replace using a CLASS? +(expect + [[:field-id 1] + [:field-id 2] + [:timestamp #inst "2018-10-08T00:00:00.000-00:00"] + 4000] + (mbql.u/replace [[:field-id 1] + [:field-id 2] + #inst "2018-10-08" + 4000] + java.util.Date + [:timestamp &match])) + +;; can we replace using a PREDICATE? +(expect + {:filter [:and [:= [:field-id nil] 4000.0] [:= [:field-id nil] 5000.0]]} + ;; find the integer args to `:=` clauses that are not inside `:field-id` clauses and make them FLOATS + (mbql.u/replace {:filter [:and + [:= [:field-id 1] 4000] + [:= [:field-id 2] 5000]]} + integer? + (when (= := (last &parents)) + (float &match)))) + +;; can we do fancy stuff like remove all the filters that use datetime fields from a query? +;; +;; (NOTE: this example doesn't take into account the fact that [:binning-strategy ...] can wrap a `:datetime-field`, +;; so it's only appropriate for drivers that don't support binning (e.g. GA). Also the driver QP will need to be +;; written to handle the nils in a filter clause appropriately.) +(expect + [:and nil [:= [:field-id 100] 20]] + (mbql.u/replace [:and + [:= + [:datetime-field [:field-literal "ga:date"] :day] + [:absolute-datetime #inst "2016-11-08T00:00:00.000-00:00" :day]] + [:= [:field-id 100] 20]] + [_ [:datetime-field & _] & _] nil)) + +;; can we use short-circuting patterns to do something tricky like only replace `:field-id` clauses that aren't +;; wrapped by other clauses? +(expect + [[:datetime-field [:field-id 10] :day] + [:datetime-field [:field-id 20] :month] + [:field-id 30]] + (let [id-is-datetime-field? #{10}] + (mbql.u/replace [[:field-id 10] + [:datetime-field [:field-id 20] :month] + [:field-id 30]] + ;; don't replace anything that's already wrapping a `field-id` + [_ [:field-id & _] & _] + &match + + [:field-id (_ :guard id-is-datetime-field?)] + [:datetime-field &match :day]))) + + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | Other Fns | +;;; +----------------------------------------------------------------------------------------------------------------+ ;; can `simplify-compound-filter` fix `and` or `or` with only one arg? (expect @@ -62,6 +349,54 @@ [:= [:field-id 1] 2] (mbql.u/simplify-compound-filter [:not [:not [:= [:field-id 1] 2]]])) +;; does `simplify-compound-filter` return `nil` for empty filter clauses? +(expect + nil + (mbql.u/simplify-compound-filter nil)) + +(expect + nil + (mbql.u/simplify-compound-filter [])) + +(expect + nil + (mbql.u/simplify-compound-filter [nil nil nil])) + +(expect + nil + (mbql.u/simplify-compound-filter [:and nil nil])) + +(expect + nil + (mbql.u/simplify-compound-filter [:and nil [:and nil nil nil] nil])) + +;; can `simplify-compound-filter` eliminate `nil` inside compound filters? +(expect + [:= [:field-id 1] 2] + (mbql.u/simplify-compound-filter [:and nil [:and nil [:= [:field-id 1] 2] nil] nil])) + +(expect + [:and + [:= [:field-id 1] 2] + [:= [:field-id 3] 4] + [:= [:field-id 5] 6] + [:= [:field-id 7] 8] + [:= [:field-id 9] 10]] + (mbql.u/simplify-compound-filter [:and + nil + [:= [:field-id 1] 2] + [:and + [:= [:field-id 3] 4]] + nil + [:and + [:and + [:and + [:= [:field-id 5] 6] + nil + nil] + [:= [:field-id 7] 8] + [:= [:field-id 9] 10]]]])) + ;; can we add an order-by clause to a query? (expect {:database 1, :type :query, :query {:source-table 1, :order-by [[:asc [:field-id 10]]]}} @@ -113,3 +448,35 @@ :query {:source-table 1 :order-by [[:asc [:field-id 10]]]}} [:asc [:datetime-field [:field-id 10] :day]])) + + +;;; ---------------------------------------------- aggregation-at-index ---------------------------------------------- + +(def ^:private query-with-some-nesting + {:database 1 + :type :query + :query {:source-query {:source-table 1 + :aggregation [[:stddev [:field-id 1]] + [:min [:field-id 1]]]} + :aggregation [[:avg [:field-id 1]] + [:max [:field-id 1]]]}}) + +(expect + [:avg [:field-id 1]] + (mbql.u/aggregation-at-index query-with-some-nesting 0)) + +(expect + [:max [:field-id 1]] + (mbql.u/aggregation-at-index query-with-some-nesting 1)) + +(expect + [:avg [:field-id 1]] + (mbql.u/aggregation-at-index query-with-some-nesting 0 0)) + +(expect + [:stddev [:field-id 1]] + (mbql.u/aggregation-at-index query-with-some-nesting 0 1)) + +(expect + [:min [:field-id 1]] + (mbql.u/aggregation-at-index query-with-some-nesting 1 1))