From ba2b723321407ae9f455cd40f704fc5e7b7be9d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cam=20Sau=CC=88l?= <cammsaul@gmail.com> Date: Thu, 2 Jul 2015 17:27:55 -0700 Subject: [PATCH] fix duplicate query processing --- src/metabase/driver/mongo.clj | 3 + src/metabase/driver/mongo/query_processor.clj | 11 +-- src/metabase/driver/query_processor.clj | 29 ++++++- .../driver/query_processor/expand.clj | 87 +++++++++++++------ test/metabase/test/data.clj | 46 ++++++---- test/metabase/test/util/mql.clj | 3 +- 6 files changed, 127 insertions(+), 52 deletions(-) diff --git a/src/metabase/driver/mongo.clj b/src/metabase/driver/mongo.clj index 1c10b2288d2..6f7a5cc045d 100644 --- a/src/metabase/driver/mongo.clj +++ b/src/metabase/driver/mongo.clj @@ -161,3 +161,6 @@ ;; 4. API ;; 4A. API Tweaks as Needed ;; 5. Cleanup + Tests +;; 5A. Cleanup / Dox +;; 5B. Tests +;; 5C. $ notation doesn't handle nested Fields (yet) (or id ? ) diff --git a/src/metabase/driver/mongo/query_processor.clj b/src/metabase/driver/mongo/query_processor.clj index 7cd9fe88081..b589a151e2a 100644 --- a/src/metabase/driver/mongo/query_processor.clj +++ b/src/metabase/driver/mongo/query_processor.clj @@ -13,6 +13,7 @@ [metabase.driver :as driver] (metabase.driver [interface :as i] [query-processor :as qp]) + [metabase.driver.query-processor.expand :as expand] [metabase.driver.mongo.util :refer [with-mongo-connection *mongo-connection* values->base-type]] [metabase.models.field :refer [Field]] [metabase.util :as u]) @@ -39,7 +40,7 @@ (case (keyword query-type) :query (let [generated-query (process-structured (:query query))] (when-not qp/*disable-qp-logging* - (log/debug (u/format-color 'green "\nMONGER FORM:\n\n%s\n" + (log/debug (u/format-color 'green "\nMONGER FORM:\n%s\n" (->> generated-query (walk/postwalk #(if (symbol? %) (symbol (name %)) %)) ; strip namespace qualifiers from Monger form u/pprint-to-str) "\n"))) ; so it's easier to read @@ -85,14 +86,14 @@ ~@(filter identity forms)])) (defn- field->name - [{:keys [field-name subfield]}] - (if subfield (format "%s.%s" field-name subfield) - field-name)) + "Return qualified string name of FIELD, e.g. `venue` or `venue.address`." + ^String [field] + (apply str (interpose "." (expand/qualified-name-components field)))) (defn- field->$str "Given a FIELD, return a `$`-qualified field name for use in a Mongo aggregate query, e.g. `\"$user_id\"`." [field] - (format "$%s" (name (field->name field)))) + (format "$%s" (field->name field))) (defn- aggregation:rows [] `(doall (with-collection ^DBApiLayer *mongo-connection* ~*collection-name* diff --git a/src/metabase/driver/query_processor.clj b/src/metabase/driver/query_processor.clj index 88769531b69..cf77e58adc3 100644 --- a/src/metabase/driver/query_processor.clj +++ b/src/metabase/driver/query_processor.clj @@ -50,7 +50,8 @@ (defn- pre-expand [qp] (fn [query] - (qp (expand/expand *driver* query)))) + (qp (assoc (expand/expand *driver* query) + :query-id (str (java.util.UUID/randomUUID)))))) (defn- post-add-row-count-and-status @@ -171,8 +172,7 @@ (defn- cumulative-sum [qp] (fn [query] - (let [[cumulative-sum-field query] (pre-cumulative-sum query) - results (qp query)] + (let [[cumulative-sum-field query] (pre-cumulative-sum query)] (cond->> (qp query) cumulative-sum-field (post-cumulative-sum cumulative-sum-field))))) @@ -400,7 +400,25 @@ ;; ;; Pre-processing then happens in order from bottom-to-top; i.e. POST-ANNOTATE gets to modify the results, then LIMIT, then CUMULATIVE-SUM, etc. +(defn- wrap-guard-multiple-calls + "Throw an exception if a QP function accidentally calls (QP QUERY) more than once." + [qp] + (let [called? (atom false)] + (fn [query] + (assert (not @called?) "(QP QUERY) IS BEING CALLED MORE THAN ONCE!") + (reset! called? true) + (qp query)))) + +(defn- post-log-results [qp] + (fn [query] + (let [results (qp query)] + (when-not *disable-qp-logging* + (log/debug "\nRESULTS:\n" (u/pprint-to-str 'cyan results))) + results))) + (defn- process-structured [driver query] + (when-not *disable-qp-logging* + (println "PROCESS STRUCTURED!")) (let [driver-process-query (partial i/process-query driver)] ((<<- wrap-catch-exceptions pre-expand @@ -412,6 +430,8 @@ limit post-annotate pre-log-query + post-log-results + wrap-guard-multiple-calls driver-process-query) query))) (defn- process-native [driver query] @@ -420,11 +440,14 @@ post-add-row-count-and-status post-convert-unix-timestamps-to-dates limit + wrap-guard-multiple-calls driver-process-query) query))) (defn process "Process a QUERY and return the results." [driver query] + (when-not *disable-qp-logging* + (log/info (u/format-color 'blue "\nQUERY:\n%s" (u/pprint-to-str query)))) (binding [*driver* driver] ((case (keyword (:type query)) :native process-native diff --git a/src/metabase/driver/query_processor/expand.clj b/src/metabase/driver/query_processor/expand.clj index 53bb21c8ec1..948d6edebd6 100644 --- a/src/metabase/driver/query_processor/expand.clj +++ b/src/metabase/driver/query_processor/expand.clj @@ -54,7 +54,8 @@ parse-breakout parse-fields parse-filter - parse-order-by) + parse-order-by + ph) ;; ## -------------------- Protocols -------------------- @@ -91,7 +92,7 @@ (and clause (or (not (sequential? clause)) (and (seq clause) - (every? identity clause))))) + (not (every? nil? clause)))))) (defn- parse [query-dict] (update-in query-dict [:query] #(-<> (assoc % @@ -123,18 +124,28 @@ :name :field-name :special_type :special-type :base_type :base-type - :table_id :table-id})) + :table_id :table-id + :parent_id :parent-id})) (defn- resolve-fields "Resolve the `Fields` in an EXPANDED-QUERY-DICT." [expanded-query-dict field-ids] - (if-not (seq field-ids) expanded-query-dict ; No need to do a DB call or walk expanded-query-dict if we didn't see any Field IDs - (let [fields (->> (sel :many :id->fields [field/Field :name :base_type :special_type :table_id] :id [in field-ids]) - (m/map-vals rename-mb-field-keys))] - (reset! *table-ids* (set (map :table-id (vals fields)))) - ;; This is performed depth-first so we don't end up walking the newly-created Field/Value objects - ;; they may have nil values; this was we don't have to write an implementation of resolve-field for nil - (walk/postwalk #(resolve-field % fields) expanded-query-dict)))) + (if-not (seq field-ids) + ;; Base case: if there's no field-ids to expand we're done + expanded-query-dict + + ;; Re-bind *field-ids* in case we need to do recursive Field resolution + (binding [*field-ids* (atom #{})] + (let [fields (->> (sel :many :id->fields [field/Field :name :base_type :special_type :table_id :parent_id] :id [in field-ids]) + (m/map-vals rename-mb-field-keys) + (m/map-vals #(assoc % :parent (when (:parent-id %) + (ph (:parent-id %))))))] + (swap! *table-ids* set/union (set (map :table-id (vals fields)))) + + ;; Recurse in case any new [nested] Field placeholders were emitted and we need to do recursive Field resolution + ;; We can't use recur here because binding wraps body in try/catch + (resolve-fields (walk/postwalk #(resolve-field % fields) expanded-query-dict) + @*field-ids*))))) (defn- resolve-database "Resolve the `Database` in question for an EXPANDED-QUERY-DICT." @@ -206,17 +217,40 @@ ;; ## -------------------- Field + Value -------------------- +(defprotocol IField + "Methods specific to the Query Expander `Field` record type." + (qualified-name-components [this] + "Return a vector of name components of the form `[table-name parent-names... field-name]`")) + ;; Field is the expansion of a Field ID in the standard QL (defrecord Field [^Integer field-id ^String field-name ^Keyword base-type ^Keyword special-type ^Integer table-id - ^String table-name] + ^String table-name + ^Integer parent-id + parent] ; Field once its resolved; FieldPlaceholder before that IResolve + (resolve-field [this field-id->fields] + (cond + parent (if (= (type parent) Field) + this + (resolve-field parent field-id->fields)) + parent-id (assoc this :parent (or (field-id->fields parent-id) + (ph parent-id))) + :else this)) + (resolve-table [this table-id->table] (assoc this :table-name (:name (or (table-id->table table-id) - (throw (Exception. (format "Query expansion failed: could not find table %d." table-id)))))))) + (throw (Exception. (format "Query expansion failed: could not find table %d." table-id))))))) + + IField + (qualified-name-components [this] + (conj (if parent + (qualified-name-components parent) + [table-name]) + field-name))) (defn- Field? "Is this a valid value for a `Field` ID in an unexpanded query? (i.e. an integer or `fk->` form)." @@ -243,9 +277,13 @@ (defrecord FieldPlaceholder [^Integer field-id] IResolve (resolve-field [this field-id->fields] - (->> (field-id->fields field-id) - (merge this) - map->Field))) + (or + ;; try to resolve the Field with the ones available in field-id->fields + (some->> (field-id->fields field-id) + (merge this) + map->Field) + ;; If that fails just return ourselves as-is + this))) (defn- parse-value "Convert the `value` of a `Value` to a date or timestamp if needed. @@ -286,12 +324,6 @@ (swap! *fk-field-ids* conj fk-field-id) (->FieldPlaceholder dest-field-id)) - ["." (id :guard integer?) subfield] - (do (assert-driver-supports :nested-fields) - (swap! *field-ids* conj id) - (map->FieldPlaceholder {:field-id id - :subfield subfield})) - _ (throw (Exception. (str "Invalid field: " field-id))))) ([field-id value] (->ValuePlaceholder (:field-id (ph field-id)) value))) @@ -367,7 +399,7 @@ ;; ### Parsers (defparser parse-filter-subclause - ["INSIDE" lat-field lon-field lat-max lon-min lat-min lon-max] + ["INSIDE" (lat-field :guard integer?) (lon-field :guard integer?) (lat-max :guard number?) (lon-min :guard number?) (lat-min :guard number?) (lon-max :guard number?)] (map->Filter:Inside {:filter-type :inside :lat {:field (ph lat-field) :min (ph lat-field lat-min) @@ -376,22 +408,25 @@ :min (ph lon-field lon-min) :max (ph lon-field lon-max)}}) - ["BETWEEN" field-id min max] + ["BETWEEN" (field-id :guard integer?) (min :guard number?) (max :guard number?)] (map->Filter:Between {:filter-type :between :field (ph field-id) :min-val (ph field-id min) :max-val (ph field-id max)}) - [filter-type field-id val] + [(filter-type :guard (partial contains? #{"=" "!=" "<" ">" "<=" ">="})) (field-id :guard integer?) val] (map->Filter:Field+Value {:filter-type (keyword filter-type) :field (ph field-id) :value (ph field-id val)}) - [filter-type field-id] + [(filter-type :guard string?) (field-id :guard integer?)] (map->Filter:Field {:filter-type (case filter-type "NOT_NULL" :not-null "IS_NULL" :is-null) - :field (ph field-id)})) + :field (ph field-id)}) + + clause + (throw (Exception. (format "Invalid filter clause: %s" clause)))) (defparser parse-filter ["AND" & subclauses] (map->Filter {:compound-type :and diff --git a/test/metabase/test/data.clj b/test/metabase/test/data.clj index e752a685a8d..350df2cbada 100644 --- a/test/metabase/test/data.clj +++ b/test/metabase/test/data.clj @@ -151,17 +151,20 @@ (defn- table-id->field-name->field "Return a map of lowercased `Field` names -> fields for `Table` with TABLE-ID." [table-id] - (->> (sel :many :field->obj [Field :name], :table_id table-id, :parent_id nil) + {:pre [(integer? table-id)]} + (->> (binding [*sel-disable-logging* true] + (sel :many :field->obj [Field :name], :table_id table-id, :parent_id nil)) (m/map-keys s/lower-case))) (defn- db-id->table-name->table "Return a map of lowercased `Table` names -> Tables for `Database` with DATABASE-ID. Add a delay `:field-name->field` to each Table that calls `table-id->field-name->field` for that Table." [database-id] - (->> (sel :many :field->obj [Table :name] :db_id database-id) + {:pre [(integer? database-id)]} + (->> (binding [*sel-disable-logging* true] + (sel :many :field->obj [Table :name] :db_id database-id)) (m/map-keys s/lower-case) - (m/map-vals (fn [table] - (assoc table :field-name->field (delay (table-id->field-name->field (:id table)))))))) + (m/map-vals #(assoc % :field-name->field (delay (table-id->field-name->field (:id %))))))) (defn -temp-db-add-getter-delay "Add a delay `:table-name->table` to DB that calls `db-id->table-name->table`." @@ -174,14 +177,20 @@ With three args, fetch `Field` with FIELD-NAME by recursively fetching `Table` and using its `:field-name->field` delay." ([temp-db table-name] {:pre [(map? temp-db) - (string? table-name)]} + (string? table-name)] + :post [(map? %)]} (@(:table-name->table temp-db) table-name)) + ([temp-db table-name field-name] - {:pre [(string? field-name)]} + {:pre [(string? field-name)] + :post [(map? %)]} (@(:field-name->field (-temp-get temp-db table-name)) field-name)) + ([temp-db table-name parent-field-name & nested-field-names] - {:pre [(string? (last nested-field-names))]} - (sel :one :id Field, :name (last nested-field-names), :parent_id (:id (apply -temp-get temp-db table-name parent-field-name (butlast nested-field-names)))))) + {:pre [(every? string? nested-field-names)] + :post [(map? %)]} + (binding [*sel-disable-logging* true] + (sel :one Field, :name (last nested-field-names), :parent_id (:id (apply -temp-get temp-db table-name parent-field-name (butlast nested-field-names))))))) (defn- walk-expand-& "Walk BODY looking for symbols like `&table` or `&table.field` and expand them to appropriate `-temp-get` forms. @@ -201,17 +210,20 @@ form)) body)) -(defn with-temp-db* [loader ^DatabaseDefinition dbdef f] +(defn -with-temp-db [loader ^DatabaseDefinition dbdef f] (let [dbdef (map->DatabaseDefinition (assoc dbdef :short-lived? true))] (try - (remove-database! loader dbdef) - (let [db (-> (get-or-create-database! loader dbdef) - -temp-db-add-getter-delay)] - (assert db) - (assert (exists? Database :id (:id db))) - (f db)) + (binding [*sel-disable-logging* true] + (remove-database! loader dbdef) + (let [db (-> (get-or-create-database! loader dbdef) + -temp-db-add-getter-delay)] + (assert db) + (assert (exists? Database :id (:id db))) + (binding [*sel-disable-logging* false] + (f db)))) (finally - (remove-database! loader dbdef))))) + (binding [*sel-disable-logging* true] + (remove-database! loader dbdef)))))) (defmacro with-temp-db "Load and sync DATABASE-DEFINITION with DATASET-LOADER and execute BODY with @@ -232,6 +244,6 @@ :aggregation [\"count\"] :filter [\"<\" (:id &events.timestamp) \"1765-01-01\"]}}))" [[db-binding dataset-loader ^DatabaseDefinition database-definition] & body] - `(with-temp-db* ~dataset-loader ~database-definition + `(-with-temp-db ~dataset-loader ~database-definition (fn [~db-binding] ~@(walk-expand-& db-binding body)))) diff --git a/test/metabase/test/util/mql.clj b/test/metabase/test/util/mql.clj index 39657484fc2..361177ebeb5 100644 --- a/test/metabase/test/util/mql.clj +++ b/test/metabase/test/util/mql.clj @@ -11,7 +11,8 @@ (defn- partition-tokens [keywords tokens] (->> (loop [all [], current-split nil, [token & more] tokens] (cond - (not token) (conj all current-split) + (and (not token) + (not (seq more))) (conj all current-split) (contains? keywords token) (recur (or (when (seq current-split) (conj all current-split)) all) -- GitLab