diff --git a/.dir-locals.el b/.dir-locals.el index 9619e4c333449fa95c3d874d5f29d2cf64f10570..d7348fc7057d8f2e9f3ed7fb5dedec3f67dffc57 100644 --- a/.dir-locals.el +++ b/.dir-locals.el @@ -36,6 +36,7 @@ (expect-with-dataset 1) (expect-with-datasets 1) (format-color 2) + (if-questionable-timezone-support 0) (if-sqlserver 0) (ins 1) (let-400 1) @@ -47,6 +48,7 @@ (matche 1) (matchu 1) (macrolet 1) + (mongo-let 1) (org-perms-case 1) (pdoseq 1) (post-insert 1) diff --git a/circle.yml b/circle.yml index 1d764718a5669f6dd1e5afe5d612606a03b860b8..22afd4a0aeb0903124d4b77173b8e2ceb807b78f 100644 --- a/circle.yml +++ b/circle.yml @@ -8,6 +8,12 @@ machine: version: 2.7.3 dependencies: override: + - sudo apt-get purge mongodb-org* + - sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 7F0CEB10 + - echo "deb http://repo.mongodb.org/apt/ubuntu precise/mongodb-org/3.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.0.list + - sudo apt-get update + - sudo apt-get install -y mongodb-org + - sudo service mongod restart - lein deps - pip install awscli==1.7.3 database: diff --git a/src/metabase/api/common.clj b/src/metabase/api/common.clj index f89bcd8023ae15193ab9038011f89ccac6b1c7b2..0424ec71c25af9037c0289077eb275761f9150f2 100644 --- a/src/metabase/api/common.clj +++ b/src/metabase/api/common.clj @@ -298,7 +298,7 @@ "Parse param string as an [ISO 8601 date](http://en.wikipedia.org/wiki/ISO_8601), e.g. `2015-03-24T06:57:23+00:00`" [symb value :nillable] - (try (u/parse-iso8601 value) + (try (u/->Timestamp value) (catch Throwable _ (throw (invalid-param-exception (name symb) (format "'%s' is not a valid date." value)))))) diff --git a/src/metabase/db/metadata_queries.clj b/src/metabase/db/metadata_queries.clj index 5ba4501207f81e62873cdd8aec3ca71df6c32f6f..df601b419617e7aa7a74680f20fc90e003161d61 100644 --- a/src/metabase/db/metadata_queries.clj +++ b/src/metabase/db/metadata_queries.clj @@ -1,37 +1,35 @@ (ns metabase.db.metadata-queries "Predefined QP queries for getting metadata about an external database." (:require [metabase.driver :as driver] + [metabase.driver.sync :as sync] [metabase.util :as u])) -;; TODO - These queries have to be evaluated by the query processor and macroexpanded at runtime every time they're ran. -;; It would be more efficient if we could let the QP could macroexpand normally for predefined queries like these - (defn- field-query [field query] - (->> (driver/process-query - {:type :query - :database ((u/deref-> field :table :db) :id) - :query (assoc query - :source_table ((u/deref-> field :table) :id))}) - :data - :rows)) + (-> (driver/process-query + {:type :query + :database ((u/deref-> field :table :db) :id) + :query (assoc query + :source_table ((u/deref-> field :table) :id))}) + :data + :rows)) (defn field-distinct-values - "Return the distinct values of FIELD." + "Return the distinct values of FIELD. + This is used to create a `FieldValues` object for `:category` Fields." [{field-id :id :as field}] - (->> (field-query field {:aggregation ["rows"] ; should we add a limit here? In case someone is dumb and tries to get millions of distinct values? - :breakout [field-id]}) ; or should we let them do it - (map first))) + (mapv first (field-query field {:breakout [field-id] + :limit sync/low-cardinality-threshold}))) (defn field-distinct-count "Return the distinct count of FIELD." [{field-id :id :as field}] - (->> (field-query field {:aggregation ["distinct" field-id]}) - first - first)) + (-> (field-query field {:aggregation ["distinct" field-id]}) + first + first)) (defn field-count "Return the count of FIELD." [{field-id :id :as field}] - (->> (field-query field {:aggregation ["count" field-id]}) - first - first)) + (-> (field-query field {:aggregation ["count" field-id]}) + first + first)) diff --git a/src/metabase/driver.clj b/src/metabase/driver.clj index 255036bc1035524be6c4ad8492b95caf91ad13be..869398c1910d7903411a9ea663ab064bc0558bc5 100644 --- a/src/metabase/driver.clj +++ b/src/metabase/driver.clj @@ -116,6 +116,10 @@ (assert (fn? (f driver)) (format "Not a fn: %s" f))))) +(def ^:const driver-defaults + "Default implementations of methods for drivers." + {:date-interval u/relative-date}) + (defmacro defdriver "Define and validate a new Metabase DB driver. @@ -252,7 +256,8 @@ As with the other Field syncing functions in `metabase.driver.sync`, this method should return the modified FIELD, if any, or `nil`." [driver-name driver-map] `(def ~(vary-meta driver-name assoc :metabase.driver/driver (keyword driver-name)) - (let [m# ~driver-map] + (let [m# (merge driver-defaults + ~driver-map)] (verify-driver m#) m#))) diff --git a/src/metabase/driver/mongo.clj b/src/metabase/driver/mongo.clj index bb8a0f28f81f5f99e178298aa8d4a7c2e059a1ec..91bdb6db21eeaaada147a1e1fc09b5141621793d 100644 --- a/src/metabase/driver/mongo.clj +++ b/src/metabase/driver/mongo.clj @@ -154,5 +154,6 @@ :process-query process-query :process-query-in-context process-query-in-context :sync-in-context sync-in-context + :date-interval u/relative-date :humanize-connection-error-message humanize-connection-error-message :active-nested-field-name->type active-nested-field-name->type}) diff --git a/src/metabase/driver/mongo/query_processor.clj b/src/metabase/driver/mongo/query_processor.clj index e26addf607cbb36aa153333956cf178383720598..7f684cd8ccbe0e4213eeb2404dd57c8700d99d9c 100644 --- a/src/metabase/driver/mongo/query_processor.clj +++ b/src/metabase/driver/mongo/query_processor.clj @@ -11,52 +11,56 @@ [db :as mdb] [operators :refer :all] [query :refer :all]) - [metabase.db :refer :all] + (metabase [config :as config] + [db :refer :all]) [metabase.driver.query-processor :as qp] - [metabase.driver.query-processor.interface :refer [qualified-name-components]] + (metabase.driver.query-processor [annotate :as annotate] + [interface :refer [qualified-name-components map->DateTimeField map->DateTimeValue]]) [metabase.driver.mongo.util :refer [with-mongo-connection *mongo-connection* values->base-type]] [metabase.models.field :as field] [metabase.util :as u]) - (:import (com.mongodb CommandResult + (:import java.sql.Timestamp + java.util.Date + (com.mongodb CommandResult DB) - (clojure.lang PersistentArrayMap) - (org.bson.types ObjectId) + clojure.lang.PersistentArrayMap + org.bson.types.ObjectId (metabase.driver.query_processor.interface DateTimeField DateTimeValue Field OrderByAggregateField + RelativeDateTimeValue Value))) -(declare apply-clause - eval-raw-command - process-structured +(declare process-and-run-native process-and-run-structured) +(def ^:private ^:const $subtract :$subtract) + ;; # DRIVER QP INTERFACE (def ^:dynamic ^:private *query* nil) +(defn- log-monger-form [form] + (when-not qp/*disable-qp-logging* + (log/debug (u/format-color 'blue "\nMONGO AGGREGATION PIPELINE:\n%s\n" + (->> form + (walk/postwalk #(if (symbol? %) (symbol (name %)) %)) ; strip namespace qualifiers from Monger form + u/pprint-to-str) "\n")))) + (defn process-and-run "Process and run a MongoDB QUERY." [{query-type :type, :as query}] - (binding [*query* query] - (case (keyword query-type) - :query (let [generated-query (process-structured (:query query))] - (when-not qp/*disable-qp-logging* - (log/debug (u/format-color 'green "\nMONGER FORM:\n%s\n" - (->> generated-query - (walk/postwalk #(if (symbol? %) (symbol (name %)) %)) ; strip namespace qualifiers from Monger form - u/pprint-to-str) "\n"))) ; so it's easier to read - (eval generated-query)) - :native (let [results (eval-raw-command (:query (:native query)))] - (if (sequential? results) results - [results]))))) + {:pre [query-type]} + (case (keyword query-type) + :query (process-and-run-structured query) + :native (process-and-run-native query))) ;; # NATIVE QUERY PROCESSOR -(defn eval-raw-command +(defn- eval-raw-command "Evaluate raw MongoDB javascript code. This must be ran insided the body of a `with-mongo-connection`. (with-mongo-connection [_ \"mongodb://localhost/test\"] @@ -70,145 +74,16 @@ (let [{result "retval"} (PersistentArrayMap/create (.toMap result))] result))) +(defn- process-and-run-native [query] + (let [results (eval-raw-command (:query (:native query)))] + (if (sequential? results) results + [results]))) -;; # STRUCTURED QUERY PROCESSOR - -;; ## AGGREGATION IMPLEMENTATIONS - -(def ^:dynamic *collection-name* - "String name of the collection (i.e., `Table`) that we're currently querying against." - nil) -(def ^:dynamic *constraints* - "Monger clauses generated from query dict `filter` clauses; bound dynamically so we can insert these as appropriate for various types of aggregations." - nil) - -(defn aggregate - "Generate a Monger `aggregate` form." - [& forms] - `(mc/aggregate ^DB *mongo-connection* ~*collection-name* [~@(when *constraints* - [{$match *constraints*}]) - ~@(filter identity forms)])) - -;; Return qualified string name of FIELD, e.g. `venue` or `venue.address`. -(defmulti field->name (fn - (^String [this] (class this)) - (^String [this separator] (class this)))) - -(defmethod field->name Field - ([this] - (field->name this ".")) - ([this separator] - (apply str (interpose separator (rest (qualified-name-components this)))))) - -(defmethod field->name OrderByAggregateField - ([this] - (field->name this nil)) - ([this _] - (let [{:keys [aggregation-type]} (:aggregation (:query *query*))] - (case aggregation-type - :avg "avg" - :count "count" - :distinct "count" - :stddev "stddev" - :sum "sum")))) - -(defmethod field->name DateTimeField - ([this] - (field->name (:field this))) - ([this separator] - (field->name (:field this) separator))) - -(defn- field->$str - "Given a FIELD, return a `$`-qualified field name for use in a Mongo aggregate query, e.g. `\"$user_id\"`." - [field] - (format "$%s" (field->name field))) - -(defn- aggregation:rows [] - `(doall (with-collection ^DB *mongo-connection* ~*collection-name* - ~@(when *constraints* [`(find ~*constraints*)]) - ~@(mapcat apply-clause (dissoc (:query *query*) :filter))))) - -(defn- aggregation:count - ([] - `[{:count (mc/count ^DB *mongo-connection* ~*collection-name* - ~*constraints*)}]) - ([field] - `[{:count (mc/count ^DB *mongo-connection* ~*collection-name* - (merge ~*constraints* - {~(field->name field) {$exists true}}))}])) - -(defn- aggregation:avg [field] - (aggregate {$group {"_id" nil - "avg" {$avg (field->$str field)}}} - {$project {"_id" false, "avg" true}})) - -(defn- aggregation:distinct [field] - ;; Unfortunately trying to do a MongoDB distinct aggregation runs out of memory if there are more than a few thousand values - ;; because Monger currently doesn't expose any way to enable allowDiskUse in aggregations - ;; (see https://groups.google.com/forum/#!searchin/clojure-mongodb/$2BallowDiskUse/clojure-mongodb/3qT34rZSFwQ/tYCxj5coo8gJ). - ;; - ;; We also can't effectively limit the number of values considered in the aggregation meaning simple things like determining categories - ;; in sync (which only needs to know if distinct count is < 40, meaning it can theoretically stop as soon as it sees the 40th value) - ;; will still barf on large columns. - ;; - ;; It's faster and better-behaved to just implement this logic in Clojure-land for the time being. - ;; Since it's lazy we can handle large data sets (I've ran this successfully over 500,000+ document collections w/o issue). - [{:count (let [values (transient (set [])) - limit (:limit (:query *query*)) - keep-taking? (if limit (fn [_] - (< (count values) limit)) - (constantly true)) - field-id (or (:field-id field) ; Field - (:field-id (:field field)))] ; DateTimeField - (->> (@(resolve 'metabase.driver.mongo/field-values-lazy-seq) (sel :one field/Field :id field-id)) ; resolve driver at runtime to avoid circular deps - (filter identity) - (map hash) - (map #(conj! values %)) - (take-while keep-taking?) - dorun) - (count values))}]) - -(defn- aggregation:sum [field] - (aggregate {$group {"_id" nil ; TODO - I don't think this works for _id - "sum" {$sum (field->$str field)}}} - {$project {"_id" false, "sum" true}})) - -(defn- match-aggregation [{:keys [aggregation-type field]}] - (if-not field - ;; aggregations with no Field - (case aggregation-type - :rows (aggregation:rows) - :count (aggregation:count)) - ;; aggregations with a field - ((case aggregation-type - :avg aggregation:avg - :count aggregation:count - :distinct aggregation:distinct - :sum aggregation:sum) ; TODO -- stddev isn't implemented for mongo - field))) - - -;; ## BREAKOUT -;; This is similar to the aggregation stuff but has to be implemented separately since Mongo doesn't really have -;; GROUP BY functionality the same way SQL does. -;; This is annoying, since it effectively duplicates logic we have in the aggregation definitions above and the -;; clause definitions below, but the query we need to generate is different enough that I haven't found a cleaner -;; way of doing this yet. -(defn- breakout-aggregation->field-name+expression - "Match AGGREGATION clause of a structured query that contains a `breakout` clause, and return - a pair containing `[field-name aggregation-expression]`, which are used to generate the Mongo aggregate query." - [{:keys [aggregation-type field]}] - ;; AFAIK these are the only aggregation types that make sense in combination with a breakout clause or are we missing something? - ;; At any rate these seem to be the most common use cases, so we can add more here if and when they're needed. - (if-not field - (case aggregation-type - :rows nil - :count ["count" {$sum 1}]) - (case aggregation-type - :avg ["avg" {$avg (field->$str field)}] - :sum ["sum" {$sum (field->$str field)}]))) -;;; BREAKOUT FIELD NAME ESCAPING FOR $GROUP +;;; # STRUCTURED QUERY PROCESSOR + +;;; ## FORMATTING + ;; We're not allowed to use field names that contain a period in the Mongo aggregation $group stage. ;; Not OK: ;; {"$group" {"source.username" {"$first" {"$source.username"}, "_id" "$source.username"}}, ...} @@ -217,121 +92,168 @@ ;; Escaped: ;; {"$group" {"source___username" {"$first" {"$source.username"}, "_id" "$source.username"}}, ...} -(defn ag-unescape-nested-field-names - "Restore the original, unescaped nested Field names in the keys of RESULTS. - E.g. `:source___service` becomes `:source.service`" - [results] - ;; Build a map of escaped key -> unescaped key by looking at the keys in the first result - ;; e.g. {:source___username :source.username} - (let [replacements (into {} (for [k (keys (first results))] - (let [k-str (name k) - unescaped (s/replace k-str #"___" ".")] - (when-not (= k-str unescaped) - {k (keyword unescaped)}))))] - ;; If the map is non-empty then map set/rename-keys over the results with it - (if-not (seq replacements) - results - (for [row results] - (set/rename-keys row replacements))))) - -(defn- do-breakout - "Generate a Monger query from a structured QUERY dictionary that contains a `breakout` clause. - Since the Monger query we generate looks very different from ones we generate when no `breakout` clause - is present, this is essentialy a separate implementation :/" - [{aggregation :aggregation, breakout-fields :breakout, order-by :order-by, limit :limit, :as query}] - (let [;; Shadow the top-level definition of field->name with one that will use "___" as the separator instead of "." - field->escaped-name (u/rpartial field->name "___") - [ag-field ag-clause] (breakout-aggregation->field-name+expression aggregation) - fields (map field->escaped-name breakout-fields) - $fields (map field->$str breakout-fields) - fields->$fields (zipmap fields $fields)] - `(ag-unescape-nested-field-names - ~(aggregate {$group (merge {"_id" (if (= (count fields) 1) (first $fields) - fields->$fields)} - (when (and ag-field ag-clause) - {ag-field ag-clause}) - (into {} (for [[field $field] fields->$fields] - (when-not (= field "_id") - {field {$first $field}}))))} - {$sort (->> order-by - (mapcat (fn [{:keys [field direction]}] - [(field->escaped-name field) (case direction - :ascending 1 - :descending -1)])) - (apply sorted-map))} - {$project (merge {"_id" false} - (when ag-field - {ag-field true}) - (zipmap fields (repeat true)))} - (when limit - {$limit limit}))))) - -;; ## PROCESS-STRUCTURED - -(defn process-structured - "Process a structured MongoDB QUERY. - This establishes some bindings, then: - - * queries that contain `breakout` clauses are handled by `do-breakout` - * other queries are handled by `match-aggregation`, which hands off to the - appropriate fn defined by a `defaggregation`." - [{:keys [source-table aggregation breakout] :as query}] - (binding [*collection-name* (:name source-table) - *constraints* (when-let [filter-clause (:filter query)] - (apply-clause [:filter filter-clause]))] - (if (seq breakout) (do-breakout query) - (match-aggregation aggregation)))) - - -;; ## CLAUSE APPLICATION 2.0 - -(def ^:private clauses - "Used by `defclause` to store the clause definitions generated by it." - (atom '())) - -(defmacro ^:private defclause - "Generate a new clause definition that will be called inside of a `match` statement - whenever CLAUSE matches MATCH-BINDING. - - In general, these should emit a vector of forms to be included in the generated Monger query; - however, `filter` is handled a little differently (see below)." - [clause match-binding & body] - `(swap! clauses concat '[[~clause ~match-binding] (try - ~@body - (catch Throwable e# - (log/error (color/red ~(format "Failed to process '%s' clause:" (name clause)) - (.getMessage e#)))))])) - -;; ### CLAUSE DEFINITIONS - -;; ### fields -(defclause :fields fields - `[(fields ~(mapv field->name fields))]) - - -;; ### filter - -(defmulti format-value class) - -(defmethod format-value Value [{value :value, {:keys [field-name base-type]} :field}] - (if (and (= field-name "_id") - (= base-type :UnknownField)) - `(ObjectId. ~value) - value)) - -(defmethod format-value DateTimeValue [{^java.sql.Timestamp value :value}] - (java.util.Date. (.getTime value))) +(defprotocol IRValue + (->rvalue [this] + "Format this `Field` or `Value` for use as the right hand value of an expression, e.g. by adding `$` to a `Field`'s name")) + +(defprotocol IField + (->lvalue ^String [this] + "Return an escaped name that can be used as the name of a given Field.") + (->initial-rvalue [this] + "Return the rvalue that should be used in the *initial* projection for this `Field`.")) + + +(defn- field->name + "Return a single string name for FIELD. For nested fields, this creates a combined qualified name." + ^String [^Field field, ^String separator] + (apply str (interpose separator (rest (qualified-name-components field))))) + +(defmacro ^:private mongo-let [[field value] & body] + {:$let {:vars {(keyword field) value} + :in `(let [~field ~(keyword (str "$$" (name field)))] + ~@body)}}) + +(extend-protocol IField + Field + (->lvalue [this] + (field->name this "___")) + + (->initial-rvalue [this] + (str \$ (field->name this "."))) + + OrderByAggregateField + (->lvalue [_] + (let [{:keys [aggregation-type]} (:aggregation (:query *query*))] + (case aggregation-type + :avg "avg" + :count "count" + :distinct "count" + :sum "sum"))) + + DateTimeField + (->lvalue [{unit :unit, ^Field field :field}] + (str (->lvalue field) "~~~" (name unit))) + + (->initial-rvalue [{unit :unit, {:keys [special-type], :as ^Field field} :field}] + (mongo-let [field (as-> field <> + (->initial-rvalue <>) + (cond + (= special-type :timestamp_milliseconds) + {$add [(java.util.Date. 0) <>]} + + (= special-type :timestamp_seconds) + {$add [(java.util.Date. 0) {$multiply [<> 1000]}]} + + :else <>))] + (let [stringify (fn stringify + ([format-string] + (stringify format-string field)) + ([format-string fld] + {:___date {:$dateToString {:format format-string + :date fld}}}))] + (case unit + :default field + :minute (stringify "%Y-%m-%dT%H:%M:00") + :minute-of-hour {$minute field} + :hour (stringify "%Y-%m-%dT%H:00:00") + :hour-of-day {$hour field} + :day (stringify "%Y-%m-%d") + :day-of-week {$dayOfWeek field} + :day-of-month {$dayOfMonth field} + :day-of-year {$dayOfYear field} + :week (stringify "%Y-%m-%d" {$subtract [field + {$multiply [{$subtract [{$dayOfWeek field} + 1]} + (* 24 60 60 1000)]}]}) + :week-of-year {$add [{$week field} + 1]} + :month (stringify "%Y-%m") + :month-of-year {$month field} + ;; For quarter we'll just subtract enough days from the current date to put it in the correct month and stringify it as yyyy-MM + ;; Subtracting (($dayOfYear(field) % 91) - 3) days will put you in correct month. Trust me. + :quarter (stringify "%Y-%m" {$subtract [field + {$multiply [{$subtract [{$mod [{$dayOfYear field} + 91]} + 3]} + (* 24 60 60 1000)]}]}) + :quarter-of-year (mongo-let [month {$month field}] + {$divide [{$subtract [{$add [month 2]} + {$mod [{$add [month 2]} + 3]}]} + 3]}) + :year {$year field}))))) + +(extend-protocol IRValue + Field + (->rvalue [this] + (str \$ (->lvalue this))) + + DateTimeField + (->rvalue [this] + (str \$ (->lvalue this))) + + Value + (->rvalue [{value :value, {:keys [field-name base-type]} :field}] + (if (and (= field-name "_id") + (= base-type :UnknownField)) + `(ObjectId. ~value) + value)) + + DateTimeValue + (->rvalue [{^java.sql.Timestamp value :value, {:keys [unit]} :field}] + (let [stringify (fn stringify + ([format-string] + (stringify format-string value)) + ([format-string v] + {:___date (u/format-date format-string v)})) + extract (u/rpartial u/date-extract value)] + (case (or unit :default) + :default (u/->Date value) + :minute (stringify "yyyy-MM-dd'T'HH:mm:00") + :minute-of-hour (extract :minute) + :hour (stringify "yyyy-MM-dd'T'HH:00:00") + :hour-of-day (extract :hour) + :day (stringify "yyyy-MM-dd") + :day-of-week (extract :day-of-week) + :day-of-month (extract :day-of-month) + :day-of-year (extract :day-of-year) + :week (stringify "yyyy-MM-dd" (u/date-trunc :week value)) + :week-of-year (extract :week-of-year) + :month (stringify "yyyy-MM") + :month-of-year (extract :month) + :quarter (stringify "yyyy-MM" (u/date-trunc :quarter value)) + :quarter-of-year (extract :quarter-of-year) + :year (extract :year)))) + + RelativeDateTimeValue + (->rvalue [{:keys [amount unit field], :as this}] + (->rvalue (map->DateTimeValue {:value (u/relative-date (or unit :day) amount) + :field field})))) + + +;;; ## CLAUSE APPLICATION + +;;; ### initial projection + +(defn- add-initial-projection [query pipeline] + (let [all-fields (distinct (annotate/collect-fields query :keep-date-time-fields))] + (when (seq all-fields) + {$project (into (array-map) (for [field all-fields] + {(->lvalue field) (->initial-rvalue field)}))}))) + + +;;; ### filter (defn- parse-filter-subclause [{:keys [filter-type field value] :as filter}] - (let [field (when field (field->name field)) - value (when value (format-value value))] + (let [field (when field (->lvalue field)) + value (when value (->rvalue value))] (case filter-type :inside (let [lat (:lat filter) lon (:lon filter)] - {$and [{(field->name (:field lat)) {$gte (format-value (:min lat)), $lte (format-value (:max lat))}} - {(field->name (:field lon)) {$gte (format-value (:min lon)), $lte (format-value (:max lon))}}]}) - :between {field {$gte (format-value (:min-val filter)) - $lte (format-value (:max-val filter))}} + {$and [{(->lvalue (:field lat)) {$gte (->rvalue (:min lat)), $lte (->rvalue (:max lat))}} + {(->lvalue (:field lon)) {$gte (->rvalue (:min lon)), $lte (->rvalue (:max lon))}}]}) + :between {field {$gte (->rvalue (:min-val filter)) + $lte (->rvalue (:max-val filter))}} :is-null {field {$exists false}} :not-null {field {$exists true}} :contains {field (re-pattern value)} @@ -350,44 +272,154 @@ (= compound-type :or) {$or (mapv parse-filter-clause subclauses)} :else (parse-filter-subclause clause))) +(defn- handle-filter [{filter-clause :filter} pipeline] + (when filter-clause + {$match (parse-filter-clause filter-clause)})) -(defclause :filter filter-clause - (parse-filter-clause filter-clause)) +;;; ### aggregation -;; ### limit +(def ^:private ^:const ag-type->field-name + {:avg "avg" + :count "count" + :distinct "count" + :sum "sum"}) -(defclause :limit value - `[(limit ~value)]) - -;; ### order_by -(defclause :order-by subclauses - (let [sort-options (mapcat (fn [{:keys [field direction]}] - [(field->name field) (case direction - :ascending 1 - :descending -1)]) - subclauses)] - (when (seq sort-options) - `[(sort (array-map ~@sort-options))]))) - -;; ### page -(defclause :page page-clause - (let [{page-num :page items-per-page :items} page-clause - num-to-skip (* (dec page-num) items-per-page)] - `[(skip ~num-to-skip) - (limit ~items-per-page)])) - - -;; ### APPLY-CLAUSE +(defn- aggregation->rvalue [{:keys [aggregation-type field]}] + (if-not field + (case aggregation-type + :count {$sum 1}) + (case aggregation-type + :avg {$avg (->rvalue field)} + :count {$sum {$cond {:if (->rvalue field) + :then 1 + :else 0}}} + :distinct {$addToSet (->rvalue field)} + :sum {$sum (->rvalue field)}))) + +(defn- handle-breakout+aggregation [{breakout-fields :breakout, {ag-type :aggregation-type, :as aggregation} :aggregation} pipeline] + (let [aggregation? (and ag-type + (not= ag-type :rows)) + breakout? (seq breakout-fields)] + (when (or aggregation? breakout?) + (let [ag-field-name (ag-type->field-name ag-type)] + (filter identity + [ ;; create a totally sweet made-up column called __group to store the fields we'd like to group by + (when breakout? + {$project {"_id" "$_id" + "___group" (into {} (for [field breakout-fields] + {(->lvalue field) (->rvalue field)}))}}) + ;; Now project onto the __group and the aggregation rvalue + {$group (merge {"_id" (when breakout? + "$___group")} + (when aggregation + {ag-field-name (aggregation->rvalue aggregation)}))} + ;; Sort by _id (___group) + {$sort {"_id" 1}} + ;; now project back to the fields we expect + {$project (merge {"_id" false} + (when aggregation? + {ag-field-name (if (= ag-type :distinct) + {$size "$count"} ; HACK + true)}) + (into {} (for [field breakout-fields] + {(->lvalue field) (format "$_id.%s" (->lvalue field))})))}]))))) + + +;;; ### order-by + +(defn- handle-order-by [{:keys [order-by]} pipeline] + (when (seq order-by) + {$sort (into (array-map) (for [{:keys [field direction]} order-by] + {(->lvalue field) (case direction + :ascending 1 + :descending -1)}))})) + + +;;; ### fields + +(defn- handle-fields [{:keys [fields]} pipeline] + (when (seq fields) + ;; add project _id = false to keep _id from getting automatically returned unless explicitly specified + {$project (into (array-map "_id" false) + (for [field fields] + {(->lvalue field) (->rvalue field)}))})) + + +;;; ### limit + +(defn- handle-limit [{:keys [limit]} pipeline] + (when limit + {$limit limit})) + + +;;; ### page + +(defn- handle-page [{{page-num :page items-per-page :items, :as page-clause} :page} pipeline] + (when page-clause + [{$skip (* items-per-page (dec page-num))} + {$limit items-per-page}])) + + +;;; # process + run + +(defn- generate-aggregation-pipeline [query] + (loop [pipeline [], [f & more] [add-initial-projection + handle-filter + handle-breakout+aggregation + handle-order-by + handle-fields + handle-limit + handle-page]] + (let [out (f query pipeline) + pipeline (cond + (nil? out) pipeline + (map? out) (conj pipeline out) + (sequential? out) (vec (concat pipeline out)))] + (if-not (seq more) + pipeline + (recur pipeline more))))) + +(defn- unescape-names + "Restore the original, unescaped nested Field names in the keys of RESULTS. + E.g. `:source___service` becomes `:source.service`" + [results] + ;; Build a map of escaped key -> unescaped key by looking at the keys in the first result + ;; e.g. {:source___username :source.username} + (let [replacements (into {} (for [k (keys (first results))] + (let [k-str (name k) + unescaped (-> k-str + (s/replace #"___" ".") + (s/replace #"~~~(.+)$" ""))] + (when-not (= k-str unescaped) + {k (keyword unescaped)}))))] + ;; If the map is non-empty then map set/rename-keys over the results with it + (if-not (seq replacements) + results + (do (log/debug "Unescaping fields:" (u/pprint-to-str 'green replacements)) + (for [row results] + (set/rename-keys row replacements)))))) -(defmacro match-clause - "Generate a `match` form against all the clauses defined by `defclause`." - [clause] - `(match ~clause - ~@@clauses - ~'_ nil)) -(defn apply-clause - "Match CLAUSE against a clause defined by `defclause`." - [clause] - (match-clause clause)) +(defn- unstringify-dates + "Convert string dates, which we wrap in dictionaries like `{:___date <str>}`, back to `Timestamps`. + This can't be done within the Mongo aggregation framework itself." + [results] + (for [row results] + (into {} (for [[k v] row] + {k (if (and (map? v) + (:___date v)) + (u/->Timestamp (:___date v)) + v)})))) + +(defn- process-and-run-structured [{database :database, {{source-table-name :name} :source-table} :query, :as query}] + {:pre [(map? database) + (string? source-table-name)]} + (binding [*query* query] + (let [generated-pipeline (generate-aggregation-pipeline (:query query))] + (log-monger-form generated-pipeline) + (->> (with-mongo-connection [_ database] + (mc/aggregate *mongo-connection* source-table-name generated-pipeline + :allow-disk-use true)) + unescape-names + unstringify-dates)))) diff --git a/src/metabase/driver/query_processor.clj b/src/metabase/driver/query_processor.clj index e652ba3c1b35448a88081a8a31d215e4babaec02..ba46c351e759b4374b26a2006a03a663a5167b1b 100644 --- a/src/metabase/driver/query_processor.clj +++ b/src/metabase/driver/query_processor.clj @@ -82,11 +82,19 @@ (fn [{{:keys [source-table], {source-table-id :id} :source-table} :query, :as query}] (qp (if-not (should-add-implicit-fields? query) query - (let [fields (->> (sel :many :fields [Field :name :display_name :base_type :special_type :preview_display :display_name :table_id :id :position :description], :table_id source-table-id, - :active true, :field_type [not= "sensitive"], :parent_id nil, (k/order :position :asc), (k/order :id :desc)) - (map resolve/rename-mb-field-keys) - (map map->Field) - (map #(resolve/resolve-table % {source-table-id source-table})))] + (let [fields (for [field (sel :many :fields [Field :name :display_name :base_type :special_type :preview_display :display_name :table_id :id :position :description] + :table_id source-table-id + :active true + :field_type [not= "sensitive"] + :parent_id nil + (k/order :position :asc) (k/order :id :desc))] + (let [field (-> (resolve/rename-mb-field-keys field) + map->Field + (resolve/resolve-table {source-table-id source-table}))] + (if (or (contains? #{:DateField :DateTimeField} (:base-type field)) + (contains? #{:timestamp_seconds :timestamp_milliseconds} (:special-type field))) + (map->DateTimeField {:field field, :unit :day}) + field)))] (if-not (seq fields) (do (log/warn (format "Table '%s' has no Fields associated with it." (:name source-table))) query) diff --git a/src/metabase/driver/query_processor/annotate.clj b/src/metabase/driver/query_processor/annotate.clj index e0e55ea2a2a684108987e06cd40919bc34ef9746..b368c248694f4c5a74d525780cc74de21f544572 100644 --- a/src/metabase/driver/query_processor/annotate.clj +++ b/src/metabase/driver/query_processor/annotate.clj @@ -34,21 +34,28 @@ ;;; ## Field Resolution -(defn- collect-fields +(defn collect-fields "Return a sequence of all the `Fields` inside THIS, recursing as needed for collections. For maps, add or `conj` to property `:path`, recording the keypath used to reach each `Field.` (collect-fields {:name \"id\", ...}) -> [{:name \"id\", ...}] (collect-fields [{:name \"id\", ...}]) -> [{:name \"id\", ...}] (collect-fields {:a {:name \"id\", ...}) -> [{:name \"id\", :path [:a], ...}]" - [this] - {:post [(every? (partial instance? metabase.driver.query_processor.interface.Field) %)]} + [this & [keep-date-time-fields?]] + {:post [(every? (fn [f] + (or (instance? metabase.driver.query_processor.interface.Field f) + (when keep-date-time-fields? + (instance? metabase.driver.query_processor.interface.DateTimeField f)))) %)]} (condp instance? this ;; For a DateTimeField we'll flatten it back into regular Field but include the :unit info for the frontend. ;; Recurse so it is otherwise handled normally metabase.driver.query_processor.interface.DateTimeField - (let [{:keys [field unit]} this] - (collect-fields (assoc field :unit unit))) + (let [{:keys [field unit]} this + fields (collect-fields (assoc field :unit unit) keep-date-time-fields?)] + (if keep-date-time-fields? + (for [field fields] + (i/map->DateTimeField {:field field, :unit unit})) + fields)) metabase.driver.query_processor.interface.Field (if-let [parent (:parent this)] @@ -61,12 +68,12 @@ clojure.lang.IPersistentMap (for [[k v] (seq this) - field (collect-fields v) + field (collect-fields v keep-date-time-fields?) :when field] (assoc field :source k)) clojure.lang.Sequential - (for [[i field] (m/indexed (mapcat collect-fields this))] + (for [[i field] (m/indexed (mapcat (u/rpartial collect-fields keep-date-time-fields?) this))] (assoc field :clause-position i)) nil)) @@ -107,8 +114,8 @@ _ (assert (every? keyword? expected-keys)) missing-keys (set/difference actual-keys expected-keys)] (when (seq missing-keys) - (log/error (u/format-color 'red "Unknown fields - returned by results but not present in expanded query: %s\nExpected: %s\nActual: %s" - missing-keys expected-keys actual-keys))) + (log/warn (u/format-color 'yellow "There are fields we weren't expecting in the results: %s\nExpected: %s\nActual: %s" + missing-keys expected-keys actual-keys))) (concat fields (for [k missing-keys] {:base-type :UnknownField :special-type nil @@ -198,7 +205,7 @@ :destination_id [not= nil])))) ;; Fetch the destination Fields referenced by the ForeignKeys ([fields fk-ids id->dest-id] - (when (seq (vals id->dest-id)) + (when (seq id->dest-id) (fk-field->dest-fn fields fk-ids id->dest-id (sel :many :id->fields [Field :id :name :display_name :table_id :description :base_type :special_type :preview_display] :id [in (vals id->dest-id)])))) ;; Return a function that will return the corresponding destination Field for a given Field diff --git a/src/metabase/driver/query_processor/parse.clj b/src/metabase/driver/query_processor/parse.clj index 5e6ac65306102545e0eec478f8807406f04a7f53..17faae6a4afae2d049a2fd54fc5757d95292bfa9 100644 --- a/src/metabase/driver/query_processor/parse.clj +++ b/src/metabase/driver/query_processor/parse.clj @@ -17,7 +17,7 @@ (match value (_ :guard u/date-string?) (map->DateTimeValue {:field field - :value (u/parse-iso8601 value)}) + :value (u/->Timestamp value)}) ["relative_datetime" "current"] (map->RelativeDateTimeValue {:amount 0, :field field}) diff --git a/src/metabase/driver/sync.clj b/src/metabase/driver/sync.clj index e2144f8bdada4ff7cf24110e0a7ad245a2233115..6ae37cc078307201066a534458988f148707d747 100644 --- a/src/metabase/driver/sync.clj +++ b/src/metabase/driver/sync.clj @@ -357,31 +357,22 @@ ;; ## sync-field -(defmacro ^:private sync-field->> - "Like `->>`, but wrap each form with `try-apply`, and pass FIELD along to the next if the previous form returned `nil`." - [field & fns] - `(->> ~field - ~@(->> fns - (map (fn [f] - (let [[f & args] (if (list? f) f [f])] - `((fn [field#] - (or (u/try-apply ~f ~@args field#) - field#))))))))) - (defn- sync-field! "Sync the metadata for FIELD, marking urls, categories, etc. when applicable." [driver field] - {:pre [driver - field]} - (sync-field->> field - (maybe-driver-specific-sync-field! driver) - set-field-display-name-if-needed! - (mark-url-field! driver) - (mark-no-preview-display-field! driver) - mark-category-field-or-update-field-values! - (mark-json-field! driver) - auto-assign-field-special-type-by-name! - (sync-field-nested-fields! driver))) + {:pre [driver field]} + (loop [field field, [f & more] [(partial maybe-driver-specific-sync-field! driver) + set-field-display-name-if-needed! + (partial mark-url-field! driver) + (partial mark-no-preview-display-field! driver) + mark-category-field-or-update-field-values! + (partial mark-json-field! driver) + auto-assign-field-special-type-by-name! + (partial sync-field-nested-fields! driver)]] + (let [field (or (u/try-apply f field) + field)] + (when (seq more) + (recur field more))))) ;; Each field-syncing function below should return FIELD with any updates that we made, or nil. @@ -453,7 +444,7 @@ ;; ### mark-category-field-or-update-field-values! -(def ^:const ^:private low-cardinality-threshold +(def ^:const low-cardinality-threshold "Fields with less than this many distinct values should automatically be marked with `special_type = :category`." 40) diff --git a/src/metabase/email/messages.clj b/src/metabase/email/messages.clj index 2be5670f36ceec49ccbcb6404c98be6647825507..9189a0d62bd8ed46e43d0bf5fb28cbc598710007 100644 --- a/src/metabase/email/messages.clj +++ b/src/metabase/email/messages.clj @@ -25,7 +25,7 @@ :joinUrl join-url :quotation (:quote data-quote) :quotationAuthor (:author data-quote) - :today (u/now-with-format "MMM' 'dd,' 'yyyy")} + :today (u/format-date "MMM' 'dd,' 'yyyy" (System/currentTimeMillis))} (stencil/render-string tmpl))] (email/send-message :subject (str "You're invited to join " company "'s Metabase") diff --git a/src/metabase/util.clj b/src/metabase/util.clj index 74f4ebcf9bc71be8a93ff8c307b128805d095c41..9b3a6756f3599973d43159f9e3ed4ea5d3e9e1c8 100644 --- a/src/metabase/util.clj +++ b/src/metabase/util.clj @@ -1,20 +1,195 @@ (ns metabase.util "Common utility functions useful throughout the codebase." - (:require [clojure.java.jdbc :as jdbc] + (:require [clj-time.coerce :as coerce] + [clj-time.format :as time] + [clojure.java.jdbc :as jdbc] [clojure.pprint :refer [pprint]] [clojure.tools.logging :as log] - [clj-time.coerce :as coerce] - [clj-time.format :as time] [colorize.core :as color] [medley.core :as m]) (:import (java.net Socket InetSocketAddress InetAddress) java.sql.Timestamp - javax.xml.bind.DatatypeConverter)) + java.util.Calendar + javax.xml.bind.DatatypeConverter + org.joda.time.format.DateTimeFormatter)) (set! *warn-on-reflection* true) +;;; ### Protocols + +(defprotocol ITimestampCoercible + "Coerce object to a `java.sql.Timestamp`." + (->Timestamp ^java.sql.Timestamp [this] + "Coerce this object to a `java.sql.Timestamp`. + Strings are parsed as ISO-8601.")) + +(extend-protocol ITimestampCoercible + nil (->Timestamp [_] + nil) + Timestamp (->Timestamp [this] + this) + java.util.Date (->Timestamp [this] + (Timestamp. (.getTime this))) + Number (->Timestamp [this] + (Timestamp. this)) + Calendar (->Timestamp [this] + (->Timestamp (.getTime this))) + ;; Strings are expected to be in ISO-8601 format. `YYYY-MM-DD` strings *are* valid ISO-8601 dates. + String (->Timestamp [this] + (->Timestamp (DatatypeConverter/parseDateTime this)))) + + +(defprotocol IDateTimeFormatterCoercible + "Protocol for converting objects to `DateTimeFormatters`." + (->DateTimeFormatter ^org.joda.time.format.DateTimeFormatter [this] + "Coerce object to a `DateTimeFormatter`.")) + +(extend-protocol IDateTimeFormatterCoercible + String (->DateTimeFormatter [this] (time/formatter this)) + DateTimeFormatter (->DateTimeFormatter [this] this)) + + +;;; ## Date Stuff + +(defn new-sql-timestamp + "`java.sql.Date` doesn't have an empty constructor so this is a convenience that lets you make one with the current date. + (Some DBs like Postgres will get snippy if you don't use a `java.sql.Timestamp`)." + ^java.sql.Timestamp [] + (->Timestamp (System/currentTimeMillis))) + +(defn format-date + "Format DATE using a given FORMATTER. + DATE is anything that can be passed `->Timestamp`, such as a `Long` or ISO-8601 `String`. + DATE-FORMAT is anything that can be passed to `->DateTimeFormatter`, including a `String` or `DateTimeFormatter`." + ^String [date-format date] + (time/unparse (->DateTimeFormatter date-format) (coerce/from-long (.getTime (->Timestamp date))))) + +(def ^{:arglists '([date])} date->yyyy-mm-dd + "Format DATE as a `YYYY-MM-DD` string." + (partial format-date "yyyy-MM-dd")) + +(def ^{:arglists '([date])} date->iso-8601 + "Format DATE a an ISO-8601 string." + (partial format-date (time/formatters :date-time))) + +(defn now-iso8601 + "Return the current date as an ISO-8601 formatted string." + [] + (date->iso-8601 (System/currentTimeMillis))) + +(defn date-string? + "Is S a valid ISO 8601 date string?" + [s] + (boolean (when (string? s) + (try (->Timestamp s) + (catch Throwable e))))) + +(defn ->Date + "Coerece DATE to a `java.util.Date`." + ^java.util.Date [date] + (java.util.Date. (.getTime (->Timestamp date)))) + +(defn ->Calendar + "Coerce DATE to a `java.util.Calendar`." + ^java.util.Calendar [date] + (doto (Calendar/getInstance) + (.setTimeInMillis (.getTime (->Timestamp date))))) + +(defn relative-date + "Return a new `Timestamp` relative to the current time using a relative date UNIT. + + (relative-date :year -1) -> #inst 2014-11-12 ..." + ^java.sql.Timestamp + ([unit amount] + (relative-date unit amount (Calendar/getInstance))) + ([unit amount date] + (let [cal (->Calendar date) + [unit multiplier] (case unit + :second [Calendar/SECOND 1] + :minute [Calendar/MINUTE 1] + :hour [Calendar/HOUR 1] + :day [Calendar/DATE 1] + :week [Calendar/DATE 7] + :month [Calendar/MONTH 1] + :quarter [Calendar/MONTH 3] + :year [Calendar/YEAR 1])] + (.set cal unit (+ (.get cal unit) + (* amount multiplier))) + (->Timestamp cal)))) + + +(def ^:private ^:const date-extract-units + #{:minute-of-hour :hour-of-day :day-of-week :day-of-month :day-of-year :week-of-year :month-of-year :quarter-of-year :year}) + +(defn date-extract + "Extract UNIT from DATE. DATE defaults to now. + + (date-extract :year) -> 2015" + ([unit] + (date-extract unit (System/currentTimeMillis))) + ([unit date] + (let [cal (->Calendar date)] + (case unit + :minute-of-hour (.get cal Calendar/MINUTE) + :hour-of-day (.get cal Calendar/HOUR) + :day-of-week (.get cal Calendar/DAY_OF_WEEK) ; 1 = Sunday, etc. + :day-of-month (.get cal Calendar/DAY_OF_MONTH) + :day-of-year (.get cal Calendar/DAY_OF_YEAR) + :week-of-year (.get cal Calendar/WEEK_OF_YEAR) + :month-of-year (.get cal Calendar/MONTH) + :quarter-of-year (let [month (.get cal Calendar/MONTH)] + (int (/ (+ 2 month) + 3))) + :year (.get cal Calendar/YEAR))))) + + +(def ^:private ^:const date-trunc-units + #{:minute :hour :day :week :month :quarter}) + +(defn date-trunc + "Truncate DATE to UNIT. DATE defaults to now. + + (date-trunc :month). + ;; -> #inst \"2015-11-01T00:00:00\"" + ([unit] + (date-trunc unit (System/currentTimeMillis))) + ([unit date] + (let [trunc-with-format (fn trunc-with-format + ([format-string] + (trunc-with-format format-string date)) + ([format-string d] + (->Timestamp (format-date format-string d))))] + (case unit + :minute (trunc-with-format "yyyy-MM-dd'T'HH:mm:00") + :hour (trunc-with-format "yyyy-MM-dd'T'HH:00:00") + :day (trunc-with-format "yyyy-MM-dd") + :week (let [day-of-week (date-extract :day-of-week date) + date (relative-date :day (- (dec day-of-week)) date)] + (trunc-with-format "yyyy-MM-dd" date)) + :month (trunc-with-format "yyyy-MM") + :quarter (let [year (date-extract :year date) + quarter (date-extract :quarter date)] + (->Timestamp (format "%d-%02d" year (* 3 quarter)))))))) + +(defn date-trunc-or-extract + "Apply date bucketing with UNIT to DATE. DATE defaults to now." + ([unit] + (date-trunc-or-extract unit (System/currentTimeMillis))) + ([unit date] + (cond + (= unit :default) date + + (contains? date-extract-units unit) + (date-extract unit date) + + (contains? date-trunc-units unit) + (date-trunc unit date)))) + + +;;; ## Etc + (defmacro -assoc* "Internal. Don't use this directly; use `assoc*` instead." [k v & more] @@ -34,49 +209,6 @@ (-assoc* ~@kvs)) ~object)) -(defn new-sql-timestamp - "`java.sql.Date` doesn't have an empty constructor so this is a convenience that lets you make one with the current date. - (Some DBs like Postgres will get snippy if you don't use a `java.sql.Timestamp`)." - [] - (Timestamp. (System/currentTimeMillis))) - -;; Actually this only supports [RFC 3339](https://tools.ietf.org/html/rfc3339), which is basically a subset of ISO 8601 -(defn parse-iso8601 - "Parse a string value expected in the iso8601 format into a `java.sql.Timestamp`. - NOTE: `YYYY-MM-DD` dates *are* valid iso8601 dates." - ^java.sql.Timestamp - [^String datetime] - (some->> datetime - DatatypeConverter/parseDateTime - .getTime ; Calendar -> Date - .getTime ; Date -> ms - Timestamp.)) - -(def ^:private ^java.text.SimpleDateFormat yyyy-mm-dd-simple-date-format - (java.text.SimpleDateFormat. "yyyy-MM-dd")) - -(defn date->yyyy-mm-dd - "Convert a date to a `YYYY-MM-DD` string." - ^String [^java.util.Date date] - (.format yyyy-mm-dd-simple-date-format date)) - -(defn date-string? - "Is S a valid ISO 8601 date string?" - [s] - (boolean (when (string? s) - (try (parse-iso8601 s) - (catch Throwable e))))) - -(defn now-iso8601 - "format the current time as iso8601 date/time string." - [] - (time/unparse (time/formatters :date-time) (coerce/from-long (System/currentTimeMillis)))) - -(defn now-with-format - "format the current time using a custom format." - [format-string] - (time/unparse (time/formatter format-string) (coerce/from-long (System/currentTimeMillis)))) - (defn format-num "format a number into a more human readable form." [number] @@ -294,9 +426,20 @@ (pprint-to-str (filtered-stacktrace e)))))))))) (defn try-apply - "Like `apply`, but wraps F inside a `try-catch` block and logs exceptions caught." + "Like `apply`, but wraps F inside a `try-catch` block and logs exceptions caught. + (This is actaully more flexible than `apply` -- the last argument doesn't have to be + a sequence: + + (try-apply vector :a :b [:c :d]) -> [:a :b :c :d] + (apply vector :a :b [:c :d]) -> [:a :b :c :d] + (try-apply vector :a :b :c :d) -> [:a :b :c :d] + (apply vector :a :b :c :d) -> Not ok - :d is not a sequence + + This allows us to use `try-apply` in more situations than we'd otherwise be able to." [^clojure.lang.IFn f & args] - (apply (wrap-try-catch f) args)) + (apply (wrap-try-catch f) (concat (butlast args) (if (sequential? (last args)) + (last args) + [(last args)])))) (defn wrap-try-catch! "Re-intern FN-SYMB as a new fn that wraps the original with a `try-catch`. Intended for debugging. diff --git a/test/metabase/api/activity_test.clj b/test/metabase/api/activity_test.clj index cdf13149ce48f9cf53efdfef42bb8dbffbee32d6..995aa229deb305b54b35651b40c8bfa56ac573e4 100644 --- a/test/metabase/api/activity_test.clj +++ b/test/metabase/api/activity_test.clj @@ -24,7 +24,7 @@ activity1 (db/ins Activity :topic "install" :details {} - :timestamp (u/parse-iso8601 "2015-09-09T12:13:14.888Z")) + :timestamp (u/->Timestamp "2015-09-09T12:13:14.888Z")) activity2 (db/ins Activity :topic "dashboard-create" :user_id (user->id :crowberto) @@ -33,13 +33,13 @@ :details {:description "Because I can!" :name "Bwahahaha" :public_perms 2} - :timestamp (u/parse-iso8601 "2015-09-10T18:53:01.632Z")) + :timestamp (u/->Timestamp "2015-09-10T18:53:01.632Z")) activity3 (db/ins Activity :topic "user-joined" :user_id (user->id :rasta) :model "user" :details {} - :timestamp (u/parse-iso8601 "2015-09-10T05:33:43.641Z"))] + :timestamp (u/->Timestamp "2015-09-10T05:33:43.641Z"))] [(match-$ (db/sel :one Activity :id (:id activity2)) {:id $ :topic "dashboard-create" diff --git a/test/metabase/driver/query_processor_test.clj b/test/metabase/driver/query_processor_test.clj index 6136414b384f194a4a7de5875094f80ee4b2c20d..b4bd00560d93f820ca1f7ce72d39ba5f121cf0b0 100644 --- a/test/metabase/driver/query_processor_test.clj +++ b/test/metabase/driver/query_processor_test.clj @@ -95,7 +95,8 @@ :last_login {:special_type :category :base_type (timestamp-field-type) :name (format-name "last_login") - :display_name "Last Login"}))) + :display_name "Last Login" + :unit :day}))) ;; #### venues (defn- venues-columns @@ -429,10 +430,20 @@ breakout user_id order user_id+)) +;; ### BREAKOUT w/o AGGREGATION +;; This should act as a "distinct values" query and return ordered results +(qp-expect-with-all-datasets + {:cols [(checkins-col :user_id)] + :columns [(format-name "user_id")] + :rows [[1] [2] [3] [4] [5] [6] [7] [8] [9] [10]]} + (Q breakout user_id of checkins + limit 10)) + + ;; ### "BREAKOUT" - MULTIPLE COLUMNS W/ IMPLICT "ORDER_BY" ;; Fields should be implicitly ordered :ASC for all the fields in `breakout` that are not specified in `order_by` (qp-expect-with-all-datasets - {:rows [[1 1 1] [1 5 1] [1 7 1] [1 10 1] [1 13 1] [1 16 1] [1 26 1] [1 31 1] [1 35 1] [1 36 1]], + {:rows [[1 1 1] [1 5 1] [1 7 1] [1 10 1] [1 13 1] [1 16 1] [1 26 1] [1 31 1] [1 35 1] [1 36 1]] :columns [(format-name "user_id") (format-name "venue_id") "count"] @@ -711,17 +722,22 @@ ;; | UNIX TIMESTAMP SPECIAL_TYPE FIELDS | ;; +------------------------------------------------------------------------------------------------------------------------+ +(defmacro if-questionable-timezone-support [then else] + `(if (contains? #{:sqlserver :mongo} *engine*) + ~then + ~else)) + (defmacro if-sqlserver "SQLServer lacks timezone support; the groupings in sad-toucan-incidents happen in UTC rather than US/Pacfic time. This macro is provided as a convenience for specifying the *slightly* different expected results in the multi-driver unit tests below." [then else] - `(if (= *engine* :sqlserver) + `(if (= :sqlserver *engine*) ~then ~else)) ;; There were 9 "sad toucan incidents" on 2015-06-02 -(datasets/expect-with-datasets sql-engines - (if-sqlserver +(datasets/expect-with-all-datasets + (if-questionable-timezone-support 10 9) (Q dataset sad-toucan-incidents @@ -731,10 +747,8 @@ order timestamp+ return rows count)) - -;;; Unix timestamp breakouts -- SQL only -(datasets/expect-with-datasets sql-engines - (if-sqlserver +(datasets/expect-with-all-datasets + (if-questionable-timezone-support ;; SQL Server doesn't have a concept of timezone so results are all grouped by UTC ;; This is technically correct but the results differ from less-wack DBs [[#inst "2015-06-01T07" 6] @@ -817,8 +831,7 @@ [897 "Wearing a Biggie Shirt"] [499 "In the Expa Office"]] (Q dataset tupac-sightings - return rows - of sightings + return rows of sightings fields id category_id->categories.name order timestamp- limit 10)) @@ -952,22 +965,24 @@ ;;; Nested Field in FIELDS ;; Return the first 10 tips with just tip.venue.name (datasets/expect-when-testing-dataset :mongo - [[{:name "Lucky's Gluten-Free Café"} 1] - [{:name "Joe's Homestyle Eatery"} 2] - [{:name "Lower Pac Heights Cage-Free Coffee House"} 3] - [{:name "Oakland European Liquor Store"} 4] - [{:name "Tenderloin Gormet Restaurant"} 5] - [{:name "Marina Modern Sushi"} 6] - [{:name "Sunset Homestyle Grill"} 7] - [{:name "Kyle's Low-Carb Grill"} 8] - [{:name "Mission Homestyle Churros"} 9] - [{:name "Sameer's Pizza Liquor Store"} 10]] - (Q dataset geographical-tips use mongo - return rows - aggregate rows of tips - order id - fields venue...name - limit 10)) + {:columns ["venue.name"] + :rows [["Lucky's Gluten-Free Café"] + ["Joe's Homestyle Eatery"] + ["Lower Pac Heights Cage-Free Coffee House"] + ["Oakland European Liquor Store"] + ["Tenderloin Gormet Restaurant"] + ["Marina Modern Sushi"] + ["Sunset Homestyle Grill"] + ["Kyle's Low-Carb Grill"] + ["Mission Homestyle Churros"] + ["Sameer's Pizza Liquor Store"]]} + (select-keys (Q dataset geographical-tips use mongo + return :data + aggregate rows of tips + order id + fields venue...name + limit 10) + [:columns :rows])) ;;; Nested Field w/ ordering by aggregation @@ -1068,7 +1083,7 @@ limit 10 return rows))) -(datasets/expect-with-datasets sql-engines +(datasets/expect-with-all-datasets (if-sqlserver [[#inst "2015-06-01T17:31" 1] [#inst "2015-06-01T23:06" 1] @@ -1093,8 +1108,8 @@ [#inst "2015-06-02T11:11" 1]]) (sad-toucan-incidents-with-bucketing :default)) -(datasets/expect-with-datasets sql-engines - (if-sqlserver +(datasets/expect-with-all-datasets + (if-questionable-timezone-support [[#inst "2015-06-01T17:31" 1] [#inst "2015-06-01T23:06" 1] [#inst "2015-06-02T00:23" 1] @@ -1118,7 +1133,7 @@ [#inst "2015-06-02T11:11" 1]]) (sad-toucan-incidents-with-bucketing :minute)) -(datasets/expect-with-datasets sql-engines +(datasets/expect-with-all-datasets [[0 5] [1 4] [2 2] @@ -1156,14 +1171,14 @@ [#inst "2015-06-02T13" 1]]) (sad-toucan-incidents-with-bucketing :hour)) -(datasets/expect-with-datasets sql-engines - (if-sqlserver +(datasets/expect-with-all-datasets + (if-questionable-timezone-support [[0 13] [1 8] [2 4] [3 7] [4 5] [5 13] [6 10] [7 8] [8 9] [9 7]] [[0 8] [1 9] [2 7] [3 10] [4 10] [5 9] [6 6] [7 5] [8 7] [9 7]]) (sad-toucan-incidents-with-bucketing :hour-of-day)) -(datasets/expect-with-datasets sql-engines - (if-sqlserver +(datasets/expect-with-all-datasets + (if-questionable-timezone-support [[#inst "2015-06-01T07" 6] [#inst "2015-06-02T07" 10] [#inst "2015-06-03T07" 4] @@ -1187,26 +1202,26 @@ [#inst "2015-06-10T07" 10]]) (sad-toucan-incidents-with-bucketing :day)) -(datasets/expect-with-datasets sql-engines - (if-sqlserver +(datasets/expect-with-all-datasets + (if-questionable-timezone-support [[1 28] [2 38] [3 29] [4 27] [5 24] [6 30] [7 24]] [[1 29] [2 36] [3 33] [4 29] [5 13] [6 38] [7 22]]) (sad-toucan-incidents-with-bucketing :day-of-week)) -(datasets/expect-with-datasets sql-engines - (if-sqlserver +(datasets/expect-with-all-datasets + (if-questionable-timezone-support [[1 6] [2 10] [3 4] [4 9] [5 9] [6 8] [7 8] [8 9] [9 7] [10 9]] [[1 8] [2 9] [3 9] [4 4] [5 11] [6 8] [7 6] [8 10] [9 6] [10 10]]) (sad-toucan-incidents-with-bucketing :day-of-month)) -(datasets/expect-with-datasets sql-engines - (if-sqlserver +(datasets/expect-with-all-datasets + (if-questionable-timezone-support [[152 6] [153 10] [154 4] [155 9] [156 9] [157 8] [158 8] [159 9] [160 7] [161 9]] [[152 8] [153 9] [154 9] [155 4] [156 11] [157 8] [158 6] [159 10] [160 6] [161 10]]) (sad-toucan-incidents-with-bucketing :day-of-year)) -(datasets/expect-with-datasets sql-engines - (if-sqlserver +(datasets/expect-with-all-datasets + (if-questionable-timezone-support [[#inst "2015-05-31T07" 46] [#inst "2015-06-07T07" 47] [#inst "2015-06-14T07" 40] @@ -1220,29 +1235,33 @@ [#inst "2015-06-28T07" 7]]) (sad-toucan-incidents-with-bucketing :week)) -(datasets/expect-with-datasets sql-engines - (if-sqlserver - [[23 54] [24 46] [25 39] [26 61]] - [[23 49] [24 47] [25 39] [26 58] [27 7]]) - (sad-toucan-incidents-with-bucketing :week-of-year)) +(datasets/expect-with-all-datasets + (datasets/dataset-case + :sqlserver [[23 54] [24 46] [25 39] [26 61]] + :mongo [[23 46] [24 47] [25 40] [26 60] [27 7]] ; why are these different then ? + :h2 [[23 49] [24 47] [25 39] [26 58] [27 7]] + :postgres [[23 49] [24 47] [25 39] [26 58] [27 7]] + :mysql [[23 49] [24 47] [25 39] [26 58] [27 7]]) + (sad-toucan-incidents-with-bucketing :week-of-year)) -(datasets/expect-with-datasets sql-engines +(datasets/expect-with-all-datasets [[#inst "2015-06-01T07" 200]] (sad-toucan-incidents-with-bucketing :month)) -(datasets/expect-with-datasets sql-engines +(datasets/expect-with-all-datasets [[6 200]] (sad-toucan-incidents-with-bucketing :month-of-year)) -(datasets/expect-with-datasets sql-engines +(datasets/expect-with-all-datasets [[#inst "2015-04-01T07" 200]] (sad-toucan-incidents-with-bucketing :quarter)) -(datasets/expect-with-datasets sql-engines - [[2 200]] - (sad-toucan-incidents-with-bucketing :quarter-of-year)) +(datasets/expect-with-all-datasets + [[(datasets/dataset-case :h2 2, :postgres 2, :mysql 2, :sqlserver 2, :mongo 2.0) + 200]] + (sad-toucan-incidents-with-bucketing :quarter-of-year)) -(datasets/expect-with-datasets sql-engines +(datasets/expect-with-all-datasets [[2015 200]] (sad-toucan-incidents-with-bucketing :year)) @@ -1268,22 +1287,22 @@ filter = ["datetime_field" (id :checkins :timestamp) "as" (name field-grouping)] (apply vector "relative_datetime" relative-datetime-args) return first-row first))) -(datasets/expect-with-datasets sql-engines 4 (count-of-grouping (checkins:4-per-minute) :minute "current")) -(datasets/expect-with-datasets sql-engines 4 (count-of-grouping (checkins:4-per-minute) :minute -1 "minute")) -(datasets/expect-with-datasets sql-engines 4 (count-of-grouping (checkins:4-per-minute) :minute 1 "minute")) +(datasets/expect-with-all-datasets 4 (count-of-grouping (checkins:4-per-minute) :minute "current")) +(datasets/expect-with-all-datasets 4 (count-of-grouping (checkins:4-per-minute) :minute -1 "minute")) +(datasets/expect-with-all-datasets 4 (count-of-grouping (checkins:4-per-minute) :minute 1 "minute")) -(datasets/expect-with-datasets sql-engines 4 (count-of-grouping (checkins:4-per-hour) :hour "current")) -(datasets/expect-with-datasets sql-engines 4 (count-of-grouping (checkins:4-per-hour) :hour -1 "hour")) -(datasets/expect-with-datasets sql-engines 4 (count-of-grouping (checkins:4-per-hour) :hour 1 "hour")) +(datasets/expect-with-all-datasets 4 (count-of-grouping (checkins:4-per-hour) :hour "current")) +(datasets/expect-with-all-datasets 4 (count-of-grouping (checkins:4-per-hour) :hour -1 "hour")) +(datasets/expect-with-all-datasets 4 (count-of-grouping (checkins:4-per-hour) :hour 1 "hour")) -(datasets/expect-with-datasets sql-engines 1 (count-of-grouping (checkins:1-per-day) :day "current")) -(datasets/expect-with-datasets sql-engines 1 (count-of-grouping (checkins:1-per-day) :day -1 "day")) -(datasets/expect-with-datasets sql-engines 1 (count-of-grouping (checkins:1-per-day) :day 1 "day")) +(datasets/expect-with-all-datasets 1 (count-of-grouping (checkins:1-per-day) :day "current")) +(datasets/expect-with-all-datasets 1 (count-of-grouping (checkins:1-per-day) :day -1 "day")) +(datasets/expect-with-all-datasets 1 (count-of-grouping (checkins:1-per-day) :day 1 "day")) -(datasets/expect-with-datasets sql-engines 7 (count-of-grouping (checkins:1-per-day) :week "current")) +(datasets/expect-with-all-datasets 7 (count-of-grouping (checkins:1-per-day) :week "current")) ;; SYNTACTIC SUGAR -(datasets/expect-with-datasets sql-engines +(datasets/expect-with-all-datasets 1 (with-temp-db [_ (checkins:1-per-day)] (-> (driver/process-query @@ -1294,7 +1313,7 @@ :filter ["TIME_INTERVAL" (id :checkins :timestamp) "current" "day"]}}) :data :rows first first))) -(datasets/expect-with-datasets sql-engines +(datasets/expect-with-all-datasets 7 (with-temp-db [_ (checkins:1-per-day)] (-> (driver/process-query @@ -1321,22 +1340,22 @@ {:rows (-> results :row_count) :unit (-> results :data :cols first :unit)}))) -(datasets/expect-with-datasets sql-engines +(datasets/expect-with-all-datasets {:rows 1, :unit :day} (date-bucketing-unit-when-you :breakout-by "day", :filter-by "day")) -(datasets/expect-with-datasets sql-engines +(datasets/expect-with-all-datasets {:rows 7, :unit :day} (date-bucketing-unit-when-you :breakout-by "day", :filter-by "week")) -(datasets/expect-with-datasets sql-engines +(datasets/expect-with-all-datasets {:rows 1, :unit :week} (date-bucketing-unit-when-you :breakout-by "week", :filter-by "day")) -(datasets/expect-with-datasets sql-engines +(datasets/expect-with-all-datasets {:rows 1, :unit :quarter} (date-bucketing-unit-when-you :breakout-by "quarter", :filter-by "day")) -(datasets/expect-with-datasets sql-engines +(datasets/expect-with-all-datasets {:rows 1, :unit :hour} (date-bucketing-unit-when-you :breakout-by "hour", :filter-by "day")) diff --git a/test/metabase/http_client.clj b/test/metabase/http_client.clj index a66fe17ad272a87633b7bf81a834797dc2cdc689..aa5f16327ce8ad2b1c78359c325c797969c2cff0 100644 --- a/test/metabase/http_client.clj +++ b/test/metabase/http_client.clj @@ -131,11 +131,6 @@ (def auto-deserialize-dates-keys #{:created_at :updated_at :last_login :date_joined :started_at :finished_at}) -(defn- deserialize-date [date] - (some->> (u/parse-iso8601 date) - .getTime - java.sql.Timestamp.)) - (defn- auto-deserialize-dates "Automatically recurse over RESPONSE and look for keys that are known to correspond to dates. Parse their values and convert to `java.sql.Timestamps`." @@ -144,7 +139,7 @@ (map? response) (->> response (map (fn [[k v]] {k (cond - (contains? auto-deserialize-dates-keys k) (deserialize-date v) + (contains? auto-deserialize-dates-keys k) (u/->Timestamp v) (coll? v) (auto-deserialize-dates v) :else v)})) (into {})) diff --git a/test/metabase/models/field_test.clj b/test/metabase/models/field_test.clj index 7ead0d666327f10522dfe8c6cbc114641a368067..05bb383ab55ad1cf11b88e23d78484e4f178e88c 100644 --- a/test/metabase/models/field_test.clj +++ b/test/metabase/models/field_test.clj @@ -8,7 +8,7 @@ ;; Check that setting a Field's special_type to :category will cause a corresponding FieldValues to be created asynchronously (expect [nil - 75 + 40 :done] (let [orig-special-type (sel :one :field [Field :special_type] :id (id :categories :name)) set-field-special-type (fn [special-type]