Skip to content
Snippets Groups Projects
Unverified Commit 3ba61fe9 authored by Cam Saul's avatar Cam Saul
Browse files

Add new auto-bucket-datetime-breakouts middleware; test fixes [ci drivers]

parent 3c2e4380
No related merge requests found
Showing with 218 additions and 94 deletions
......@@ -9,9 +9,7 @@
encode-base64-json metric-name source-name]]
[filters :as filters]
[populate :as populate]]
[metabase.mbql
[normalize :as normalize]
[util :as mbql.u]]
[metabase.mbql.normalize :as normalize]
[metabase.models.table :refer [Table]]
[metabase.query-processor.util :as qp.util]
[puppetlabs.i18n.core :as i18n :refer [tru]]))
......@@ -43,14 +41,22 @@
(def ^:private ^{:arglists '([card])} display-type
(comp qp.util/normalize-token :display))
(defn- add-filter-clauses
"Add `new-filter-clauses` to a query. There is actually an `mbql.u/add-filter-clause` function we should be using
instead, but that validates its input and output, and the queries that come in here aren't always valid (for
example, missing `:database`). If we can, it would be nice to use that instead of reinventing the wheel here."
[{{existing-filter-clause :filter} :query, :as query}, new-filter-clauses]
(let [clauses (filter identity (cons existing-filter-clause new-filter-clauses))
new-filter-clause (when (seq clauses)
(normalize/normalize-fragment [:query :filter] (cons :and clauses)))]
(cond-> query
(seq new-filter-clause) (assoc-in [:query :filter] new-filter-clause))))
(defn- inject-filter
"Inject filter clause into card."
[{:keys [query-filter cell-query] :as root} card]
(-> card
(update :dataset_query #(reduce mbql.u/add-filter-clause
(normalize/normalize %)
(map (partial normalize/normalize-fragment [:query :filter])
[query-filter cell-query])))
(update :dataset_query #(add-filter-clauses % [query-filter cell-query]))
(update :series (partial map (partial inject-filter root)))))
(defn- multiseries?
......
(ns metabase.driver.googleanalytics.query-processor
"The Query Processor is responsible for translating the Metabase Query Language into Google Analytics request format.
See https://developers.google.com/analytics/devguides/reporting/core/v3"
(:require [clojure.string :as s]
(:require [clojure.string :as str]
[clojure.tools.reader.edn :as edn]
[medley.core :as m]
[metabase.query-processor
......@@ -51,8 +51,8 @@
(into {} (for [c chars-to-escape]
{c (str "\\" c)})))
(def ^:private ^{:arglists '([s])} escape-for-regex (u/rpartial s/escape (char-escape-map ".\\+*?[^]$(){}=!<>|:-")))
(def ^:private ^{:arglists '([s])} escape-for-filter-clause (u/rpartial s/escape (char-escape-map ",;\\")))
(def ^:private ^{:arglists '([s])} escape-for-regex (u/rpartial str/escape (char-escape-map ".\\+*?[^]$(){}=!<>|:-")))
(def ^:private ^{:arglists '([s])} escape-for-filter-clause (u/rpartial str/escape (char-escape-map ",;\\")))
(defn- ga-filter ^String [& parts]
(escape-for-filter-clause (apply str parts)))
......@@ -90,10 +90,10 @@
(defn- handle-breakout [{breakout-clause :breakout}]
{:dimensions (if-not breakout-clause
""
(s/join "," (for [breakout-field breakout-clause]
(if (instance? DateTimeField breakout-field)
(unit->ga-dimension (:unit breakout-field))
(->rvalue breakout-field)))))})
(str/join "," (for [breakout-field breakout-clause]
(if (instance? DateTimeField breakout-field)
(unit->ga-dimension (:unit breakout-field))
(->rvalue breakout-field)))))})
;;; ### filter
......@@ -125,15 +125,15 @@
(defn- parse-filter-clause:filters ^String [{:keys [compound-type subclause subclauses], :as clause}]
(case compound-type
:and (s/join ";" (remove nil? (map parse-filter-clause:filters subclauses)))
:or (s/join "," (remove nil? (map parse-filter-clause:filters subclauses)))
:and (str/join ";" (remove nil? (map parse-filter-clause:filters subclauses)))
:or (str/join "," (remove nil? (map parse-filter-clause:filters subclauses)))
:not (parse-filter-subclause:filters subclause :negate)
nil (parse-filter-subclause:filters clause)))
(defn- handle-filter:filters [{filter-clause :filter}]
(when filter-clause
(let [filter (parse-filter-clause:filters filter-clause)]
(when-not (s/blank? filter)
(when-not (str/blank? filter)
{:filters filter}))))
(defn- parse-filter-subclause:interval [{:keys [filter-type field value], :as filter} & [negate?]]
......@@ -173,7 +173,7 @@
(defn- handle-order-by [{:keys [order-by], :as query}]
(when order-by
{:sort (s/join "," (for [{:keys [field direction]} order-by]
{:sort (str/join "," (for [{:keys [field direction]} order-by]
(str (case direction
:ascending ""
:descending "-")
......@@ -205,7 +205,7 @@
:mbql? true})
(defn- parse-number [s]
(edn/read-string (s/replace s #"^0+(.+)$" "$1")))
(edn/read-string (str/replace s #"^0+(.+)$" "$1")))
(def ^:private ga-dimension->date-format-fn
{"ga:minute" parse-number
......@@ -259,7 +259,7 @@
(defn- built-in-metrics
[{query :query}]
(when-let [ags (seq (aggregations query))]
(s/join "," (for [[aggregation-type metric-name] ags
(str/join "," (for [[aggregation-type metric-name] ags
:when (and aggregation-type
(= :metric (qputil/normalize-token aggregation-type))
(string? metric-name))]
......@@ -312,10 +312,12 @@
(when (> (count filter-clause) 1)
(vec filter-clause)))))
(defn- handle-built-in-segments [query]
(-> query
(assoc-in [:ga :segment] (built-in-segments query))
(update-in [:query :filter] remove-built-in-segments)))
(defn- handle-built-in-segments [{{filters :filter} :query, :as query}]
(let [query (assoc-in query [:ga :segment] (built-in-segments query))
filters (remove-built-in-segments filters)]
(if (seq filters)
(assoc-in query [:query :filter] filters)
(m/dissoc-in query [:query :filter]))))
;;; public
......
......@@ -325,7 +325,10 @@
{:query s/Any
(s/optional-key :template-tags) {su/NonBlankString TemplateTag}
;; collection (table) this query should run against. Needed for MongoDB
(s/optional-key :collection) (s/maybe su/NonBlankString)})
(s/optional-key :collection) (s/maybe su/NonBlankString)
;; other stuff gets added in my different bits of QP middleware to record bits of state or pass info around.
;; Everyone else can ignore them.
s/Keyword s/Any})
;;; ----------------------------------------------- MBQL [Inner] Query -----------------------------------------------
......@@ -355,16 +358,9 @@
(s/optional-key :order-by) (su/non-empty [OrderBy])
(s/optional-key :page) {:page su/IntGreaterThanOrEqualToZero
:items su/IntGreaterThanZero}
;;
;; INTERNAL KEYS
;;
;; These keys are added by various bits of QP middleware to track how it was processed. Don't add them
;; yourself unless you like breaking things. Go ahead an ignore them.
;;
;; `fields-is-implict` tracks whether the `:fields` clause was added implicitly, for queries with no aggregations.
;; This is used to determine post-processing column sort order. TODO - I think we can remove this key entirely
;; once we start doing sorting in pre-processing
(s/optional-key :fields-is-implicit) s/Bool}
;; Various bits of middleware add additonal keys, such as `fields-is-implicit?`, to record bits of state or pass
;; info to other pieces of middleware. Everyone else can ignore them.
s/Keyword s/Any}
(fn [query]
(core/= 1 (core/count (select-keys query [:source-query :source-table]))))
"Query must specify either `:source-table` or `:source-query`, but not both."))
......@@ -382,8 +378,10 @@
(def ^:private Settings
"Options that tweak the behavior of the query processor."
;; The timezone the query should be ran in, overriding the default report timezone for the instance.
{(s/optional-key :report-timezone) su/NonBlankString})
;; TODO - should we add s/Any keys here? What if someone wants to add custom middleware!
{(s/optional-key :report-timezone) su/NonBlankString
;; other Settings might be used somewhere, but I don't know about them. Add them if you come across them for
;; documentation purposes
s/Keyword s/Any})
(def ^:private Constraints
"Additional constraints added to a query limiting the maximum number of rows that can be returned. Mostly useful
......@@ -392,7 +390,10 @@
{;; maximum number of results to allow for a query with aggregations
(s/optional-key :max-results) su/IntGreaterThanOrEqualToZero
;; maximum number of results to allow for a query with no aggregations
(s/optional-key :max-results-bare-rows) su/IntGreaterThanOrEqualToZero})
(s/optional-key :max-results-bare-rows) su/IntGreaterThanOrEqualToZero
;; other Constraints might be used somewhere, but I don't know about them. Add them if you come across them for
;; documentation purposes
s/Keyword s/Any})
(def ^:private MiddlewareOptions
"Additional options that can be used to toggle middleware on or off."
......@@ -401,7 +402,10 @@
(s/optional-key :skip-results-metadata?) s/Bool
;; should we skip converting datetime types to ISO-8601 strings with appropriate timezone when post-processing
;; results? Used by `metabase.query-processor.middleware.format-rows`; default `false`
(s/optional-key :format-rows?) s/Bool})
(s/optional-key :format-rows?) s/Bool
;; other middleware options might be used somewhere, but I don't know about them. Add them if you come across them
;; for documentation purposes
s/Keyword s/Any})
;;; ------------------------------------------------------ Info ------------------------------------------------------
......@@ -454,43 +458,30 @@
`Card.dataset_query`."
(s/constrained
;; TODO - move database/virtual-id into this namespace so we don't have to use the magic number here
{:database (s/cond-pre (s/eq -1337) su/IntGreaterThanZero)
{:database (s/cond-pre (s/eq -1337) su/IntGreaterThanZero)
;; Type of query. `:query` = MBQL; `:native` = native. TODO - consider normalizing `:query` to `:mbql`
:type (s/enum :query :native)
(s/optional-key :native) NativeQuery
(s/optional-key :query) MBQLQuery
(s/optional-key :parameters) [Parameter]
:type (s/enum :query :native)
(s/optional-key :native) NativeQuery
(s/optional-key :query) MBQLQuery
(s/optional-key :parameters) [Parameter]
;;
;; OPTIONS
;;
;; These keys are used to tweak behavior of the Query Processor.
;; TODO - can we combine these all into a single `:options` map?
;;
(s/optional-key :settings) (s/maybe Settings)
(s/optional-key :constraints) (s/maybe Constraints)
(s/optional-key :middleware) (s/maybe MiddlewareOptions)
;; The maximum time, in seconds, to return cached results for this query rather than running a new query. This is
;; added automatically when running queries belonging to Cards when caching is enabled. Caching is handled by the
;; automatically by caching middleware.
(s/optional-key :cache-ttl) (s/maybe su/IntGreaterThanZero)
(s/optional-key :settings) (s/maybe Settings)
(s/optional-key :constraints) (s/maybe Constraints)
(s/optional-key :middleware) (s/maybe MiddlewareOptions)
;;
;; INFO
;;
(s/optional-key :info) (s/maybe Info)
;;
;; INTERNAL KEYS
;;
;; Don't try to specify these yourself, unless you want to make the query fail. These are added in along the way
;; for internal usage only.
;;
;; This gets added in for reasons I don't fully understand when we resolve source queries. Not sure I like it
;; being here.
(s/optional-key :result_metadata) (s/maybe [su/Map])
;;
;; `:driver` gets added to the query even though we have middleware that is supposed to resolve driver, and we
;; also have the `*driver*` dynamic var where we should probably be stashing the resolved driver anyway. It might
;; make sense to take this out in the future.
(s/optional-key :driver) {}}
;; Used when recording info about this run in the QueryExecution log; things like context query was ran in and
;; User who ran it
(s/optional-key :info) (s/maybe Info)
;; Other various keys get stuck in the query dictionary at some point or another by various pieces of QP
;; middleware to record bits of state. Everyone else can ignore them.
s/Keyword s/Any}
(fn [{native :native, mbql :query, query-type :type}]
(case query-type
:native (core/and native (core/not mbql))
......
......@@ -49,7 +49,9 @@
(let [[symb-name clause-name] (if (vector? clause-name)
clause-name
[clause-name clause-name])]
`(def ~(vary-meta symb-name assoc :private true, :clause-name (keyword clause-name))
`(def ~(vary-meta symb-name assoc
:clause-name (keyword clause-name)
:doc (format "Schema for a valid %s clause." clause-name))
(clause ~(keyword clause-name) ~@(stringify-names arg-names-and-schemas)))))
......
......@@ -15,6 +15,7 @@
[add-row-count-and-status :as row-count-and-status]
[add-settings :as add-settings]
[annotate-and-sort :as annotate-and-sort]
[auto-bucket-datetime-breakouts :as bucket-datetime]
[bind-effective-timezone :as bind-timezone]
[binning :as binning]
[cache :as cache]
......@@ -105,6 +106,7 @@
add-dim/add-remapping
expand/expand-middleware ; ▲▲▲ QUERY EXPANSION POINT ▲▲▲ All functions *above* will see EXPANDED query during PRE-PROCESSING
implicit-clauses/add-implicit-clauses
bucket-datetime/auto-bucket-datetime-breakouts
source-table/resolve-source-table-middleware
row-count-and-status/add-row-count-and-status ; ▼▼▼ RESULTS WRAPPING POINT ▼▼▼ All functions *below* will see results WRAPPED in `:data` during POST-PROCESSING
parameters/substitute-parameters
......
(ns metabase.query-processor.interface
(ns ^:deprecated metabase.query-processor.interface
"Definitions of `Field`, `Value`, and other record types present in an expanded query.
This namespace should just contain definitions ^:deprecated of various protocols and record types; associated logic
should go in `metabase.query-processor.middleware.expand`."
(:require [metabase.models
(:require [metabase.config :as config]
[metabase.models
[dimension :as dim]
[field :as field]]
[metabase.sync.interface :as i]
......@@ -579,3 +580,10 @@
source-query
native-source-query))))
"Query must specify either `:source-table` or `:source-query`, but not both."))
;; Go ahead and mark all the `->Record` and `map->Record` functions as deprecated too! Just so they show up in red in
;; Emacs
(when config/is-dev?
(doseq [[_ varr] (ns-publics *ns*)
:when (fn? (var-get varr))]
(alter-meta! varr assoc :deprecated true)))
(ns metabase.query-processor.middleware.auto-bucket-datetime-breakouts
"Middleware for automatically bucketing unbucketed `:type/DateTime` breakout Fields with `:day` bucketing."
(:require [metabase.mbql
[schema :as mbql.s]
[util :as mbql.u]]
[metabase.models.field :refer [Field]]
[metabase.util :as u]
[metabase.util.schema :as su]
[schema.core :as s]
[toucan.db :as db]))
(def ^:private FieldTypeInfo
{:id su/IntGreaterThanZero
:base_type (s/maybe su/FieldType)
:special_type (s/maybe su/FieldType)})
(s/defn ^:private is-datetime-field?
[{base-type :base_type, special-type :special_type} :- (s/maybe FieldTypeInfo)]
(or (isa? base-type :type/DateTime)
(isa? special-type :type/DateTime)))
(s/defn ^:private unbucketed-breakouts->field-id->type-info :- {su/IntGreaterThanZero (s/maybe FieldTypeInfo)}
"Fetch a map of Field ID -> type information for the Fields referred to by the `unbucketed-breakouts`."
[unbucketed-breakouts :- (su/non-empty [mbql.s/field-id])]
(u/key-by :id (db/select [Field :id :base_type :special_type]
:id [:in (set (map second unbucketed-breakouts))])))
(s/defn ^:private wrap-unbucketed-datetime-breakouts :- [mbql.s/Field]
"Wrap each breakout in `breakouts` in a `:datetime-field` clause if appropriate; look at corresponing type
information in `field-id->type-inf` to see if we should do so."
[breakouts :- [mbql.s/Field], field-id->type-info :- {su/IntGreaterThanZero (s/maybe FieldTypeInfo)}]
(for [breakout breakouts]
(if (and (mbql.u/is-clause? :field-id breakout)
(is-datetime-field? (field-id->type-info (second breakout))))
[:datetime-field breakout :day]
breakout)))
(s/defn ^:private auto-bucket-datetime-breakouts* :- mbql.s/Query
[{{breakouts :breakout} :query, :as query} :- mbql.s/Query]
;; find any breakouts in the query that are just plain `[:field-id ...]` clauses
(if-let [unbucketed-breakouts (seq (filter (partial mbql.u/is-clause? :field-id) breakouts))]
;; if we found some unbuketed breakouts, fetch the Fields & type info that are referred to by those breakouts...
(let [field-id->type-info (unbucketed-breakouts->field-id->type-info unbucketed-breakouts)]
;; ...and then update each breakout by wrapping it if appropriate
(update-in query [:query :breakout] #(wrap-unbucketed-datetime-breakouts % field-id->type-info)))
;; otherwise if there are no unbuketed breakouts return the query as-is
query))
(defn auto-bucket-datetime-breakouts
"Middleware that automatically wraps breakout `:field-id` clauses in `[:datetime-field ... :day]` if the Field they
refer to has a type that derives from `:type/DateTime`. (This is done for historic reasons, before datetime
bucketing was added to MBQL; datetime Fields defaulted to breaking out by day. We might want to revisit this
behavior in the future.)"
[qp]
(comp qp auto-bucket-datetime-breakouts*))
......@@ -3,7 +3,7 @@
[medley.core :as m]
[metabase.models.database :refer [Database]]
[metabase.query-processor :as qp]
[metabase.test.data :refer :all]
[metabase.test.data :as data]
[toucan.db :as db]))
(def ^:private col-defaults
......@@ -20,7 +20,7 @@
:native_form {:query "SELECT ID FROM VENUES ORDER BY ID DESC LIMIT 2", :params []}}}
(-> (qp/process-query {:native {:query "SELECT ID FROM VENUES ORDER BY ID DESC LIMIT 2"}
:type :native
:database (id)})
:database (data/id)})
(m/dissoc-in [:data :results_metadata])))
;; Check that column ordering is maintained
......@@ -37,7 +37,7 @@
:native_form {:query "SELECT ID, NAME, CATEGORY_ID FROM VENUES ORDER BY ID DESC LIMIT 2", :params []}}}
(-> (qp/process-query {:native {:query "SELECT ID, NAME, CATEGORY_ID FROM VENUES ORDER BY ID DESC LIMIT 2"}
:type :native
:database (id)})
:database (data/id)})
(m/dissoc-in [:data :results_metadata])))
;; Check that we get proper error responses for malformed SQL
......@@ -46,7 +46,7 @@
:error "Column \"ZID\" not found"}
(dissoc (qp/process-query {:native {:query "SELECT ZID FROM CHECKINS LIMIT 2"} ; make sure people know it's to be expected
:type :native
:database (id)})
:database (data/id)})
:stacktrace
:query
:expanded-query))
......
......@@ -22,23 +22,15 @@
;; check that a built-in Metric gets removed from the query and put in `:ga`
(expect
{:query {:filter nil}
:ga {:segment nil, :metrics "ga:users"}}
(qp/transform-query {:query {:aggregation [:metric "ga:users"]}}))
{:ga {:segment nil, :metrics "ga:users"}}
(qp/transform-query {:query {:aggregation [[:metric "ga:users"]]}}))
;; check that a built-in segment gets removed from the query and put in `:ga`
(expect
{:query {:filter nil}
:ga {:segment "gaid::-4", :metrics nil}}
{:ga {:segment "gaid::-4", :metrics nil}}
(qp/transform-query {:query {:filter [:segment "gaid::-4"]}}))
;; check that it still works if wrapped in an `:and`
(expect
{:query {:filter nil}
:ga {:segment "gaid::-4", :metrics nil}}
(qp/transform-query {:query {:filter [:and [:segment "gaid::-4"]]}}))
;; check that other things stay in the order-by clause
(expect
{:query {:filter [:< 100 200]}
......
(ns metabase.query-processor.middleware.auto-bucket-datetime-breakouts-test
(:require [expectations :refer [expect]]
[metabase.models.field :refer [Field]]
[metabase.query-processor.middleware.auto-bucket-datetime-breakouts :as auto-bucket-datetime-breakouts]
[metabase.util :as u]
[toucan.util.test :as tt]))
(defn- auto-bucket [query]
((auto-bucket-datetime-breakouts/auto-bucket-datetime-breakouts identity)
query))
(defn- auto-bucket-mbql [mbql-query]
(-> (auto-bucket {:database 1, :type :query, :query mbql-query})
:query))
;; does a :type/DateTime Field get auto-bucketed when present in a breakout clause?
(tt/expect-with-temp [Field [field {:base_type :type/DateTime, :special_type nil}]]
{:source-table 1
:breakout [[:datetime-field [:field-id (u/get-id field)] :day]]}
(auto-bucket-mbql
{:source-table 1
:breakout [[:field-id (u/get-id field)]]}))
;; should be considered to be :type/DateTime based on `special_type` as well
(tt/expect-with-temp [Field [field {:base_type :type/Integer, :special_type :type/DateTime}]]
{:source-table 1
:breakout [[:datetime-field [:field-id (u/get-id field)] :day]]}
(auto-bucket-mbql
{:source-table 1
:breakout [[:field-id (u/get-id field)]]}))
;; do native queries pass thru unchanged?
(let [native-query {:database 1, :type :native, :native {:query "SELECT COUNT(cans) FROM birds;"}}]
(expect
native-query
(auto-bucket native-query)))
;; do MBQL queries w/o breakouts pass thru unchanged?
(expect
{:source-table 1}
(auto-bucket-mbql
{:source-table 1}))
;; does a breakout Field that isn't :type/DateTime pass thru unchnaged?
(tt/expect-with-temp [Field [field {:base_type :type/Integer, :special_type nil}]]
{:source-table 1
:breakout [[:field-id (u/get-id field)]]}
(auto-bucket-mbql
{:source-table 1
:breakout [[:field-id (u/get-id field)]]}))
;; does a :type/DateTime breakout Field that is already bucketed pass thru unchanged?
(tt/expect-with-temp [Field [field {:base_type :type/DateTime, :special_type nil}]]
{:source-table 1
:breakout [[:datetime-field [:field-id (u/get-id field)] :month]]}
(auto-bucket-mbql
{:source-table 1
:breakout [[:datetime-field [:field-id (u/get-id field)] :month]]}))
;; does the middleware avoid barfing if for some reason the Field could not be resolved in the DB? (That is the job of
;; the resolve middleware to worry about that stuff.)
(expect
{:source-table 1
:breakout [[:field-id Integer/MAX_VALUE]]}
(auto-bucket-mbql
{:source-table 1
:breakout [[:field-id Integer/MAX_VALUE]]}))
......@@ -19,8 +19,8 @@
(expect
{:database (data/id)
:type :query
:query {:aggregation [:count]
:breakout [[:field-literal :price :type/Integer]]
:query {:aggregation [[:count]]
:breakout [[:field-literal "price" :type/Integer]]
:source-query {:source-table (data/id :venues)}}}
(tt/with-temp Card [card {:dataset_query {:database (data/id)
:type :query
......@@ -28,25 +28,24 @@
(fetch-source-query {:database database/virtual-id
:type :query
:query {:source-table (str "card__" (u/get-id card))
:aggregation [:count]
:breakout [[:field-literal :price :type/Integer]]}})))
:aggregation [[:count]]
:breakout [[:field-literal "price" :type/Integer]]}})))
;; make sure that the `fetch-source-query` middleware correctly resolves native queries
(expect
{:database (data/id)
:type :query
:query {:aggregation [:count]
:breakout [[:field-literal :price :type/Integer]]
:source-query {:native (format "SELECT * FROM %s" (data/format-name "venues"))
:template-tags nil}}}
:query {:aggregation [[:count]]
:breakout [[:field-literal "price" :type/Integer]]
:source-query {:native (format "SELECT * FROM %s" (data/format-name "venues"))}}}
(tt/with-temp Card [card {:dataset_query {:database (data/id)
:type :native
:native {:query (format "SELECT * FROM %s" (data/format-name "venues"))}}}]
(fetch-source-query {:database database/virtual-id
:type :query
:query {:source-table (str "card__" (u/get-id card))
:aggregation [:count]
:breakout [[:field-literal :price :type/Integer]]}})))
:aggregation [[:count]]
:breakout [[:field-literal "price" :type/Integer]]}})))
(defn- expand-and-scrub [query-map]
(-> query-map
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment