Skip to content
Snippets Groups Projects
Commit 5492d5fd authored by Cam Saul's avatar Cam Saul
Browse files

more fixes

parent bbdbf9f3
No related branches found
No related tags found
No related merge requests found
(ns metabase.driver.mongo.query-processor
(:refer-clojure :exclude [find sort])
(:require [clojure.core.match :refer [match]]
(clojure [set :as set]
[string :as s])
[clojure.tools.logging :as log]
[clojure.walk :as walk]
[colorize.core :as color]
......@@ -39,8 +41,8 @@
(with-mongo-connection [_ database]
(case (keyword query-type)
:query (let [generated-query (process-structured (:query query))]
(when-not qp/*disable-qp-logging*
(log/debug (u/format-color 'green "\nMONGER FORM:\n%s\n"
(when-not false #_qp/*disable-qp-logging*
(log/info (u/format-color 'green "\nMONGER FORM:\n%s\n"
(->> generated-query
(walk/postwalk #(if (symbol? %) (symbol (name %)) %)) ; strip namespace qualifiers from Monger form
u/pprint-to-str) "\n"))) ; so it's easier to read
......@@ -87,8 +89,10 @@
(defn- field->name
"Return qualified string name of FIELD, e.g. `venue` or `venue.address`."
^String [field]
(apply str (interpose "." (rest (expand/qualified-name-components field))))) ; drop the first part, :table-name
(^String [field separator]
(apply str (interpose separator (rest (expand/qualified-name-components field))))) ; drop the first part, :table-name
(^String [field]
(field->name field ".")))
(defn- field->$str
"Given a FIELD, return a `$`-qualified field name for use in a Mongo aggregate query, e.g. `\"$user_id\"`."
......@@ -178,36 +182,63 @@
:avg ["avg" {$avg (field->$str field)}]
:sum ["sum" {$sum (field->$str field)}])))
(defn do-breakout
;;; BREAKOUT FIELD NAME ESCAPING FOR $GROUP
;; We're not allowed to use field names that contain a period in the Mongo aggregation $group stage.
;; Not OK:
;; {"$group" {"source.username" {"$first" {"$source.username"}, "_id" "$source.username"}}, ...}
;;
;; For *nested* Fields, we'll replace the '.' with '___', and restore the original names afterward.
;; Escaped:
;; {"$group" {"source___username" {"$first" {"$source.username"}, "_id" "$source.username"}}, ...}
(defn ag-unescape-nested-field-names
"Restore the original, unescaped nested Field names in the keys of RESULTS.
E.g. `:source___service` becomes `:source.service`"
[results]
;; Build a map of escaped key -> unescaped key by looking at the keys in the first result
;; e.g. {:source___username :source.username}
(let [replacements (into {} (for [k (keys (first results))]
(let [k-str (name k)
unescaped (s/replace k-str #"___" ".")]
(when-not (= k-str unescaped)
{k (keyword unescaped)}))))]
;; If the map is non-empty then map set/rename-keys over the results with it
(if-not (seq replacements)
results
(for [row results]
(set/rename-keys row replacements)))))
(defn- do-breakout
"Generate a Monger query from a structured QUERY dictionary that contains a `breakout` clause.
Since the Monger query we generate looks very different from ones we generate when no `breakout` clause
is present, this is essentialy a separate implementation :/"
[{aggregation :aggregation, breakout-fields :breakout, order-by :order-by, limit :limit, :as query}]
(let [[ag-field ag-clause] (breakout-aggregation->field-name+expression aggregation)
(let [;; Shadow the top-level definition of field->name with one that will use "___" as the separator instead of "."
field->name (u/rpartial field->name "___")
[ag-field ag-clause] (breakout-aggregation->field-name+expression aggregation)
fields (map field->name breakout-fields)
$fields (map field->$str breakout-fields)
fields->$fields (zipmap fields $fields)]
(aggregate {$group (merge {"_id" (if (= (count fields) 1) (first $fields)
fields->$fields)}
(when (and ag-field ag-clause)
{ag-field ag-clause})
(->> fields->$fields
(map (fn [[field $field]]
(when-not (= field "_id")
{field {$first $field}})))
(into {})))}
{$sort (->> order-by
(mapcat (fn [{:keys [field direction]}]
[(field->name field) (case direction
:ascending 1
:descending -1)]))
(apply sorted-map))}
{$project (merge {"_id" false}
(when ag-field
{ag-field true})
(zipmap fields (repeat true)))}
(when limit
{$limit limit}))))
`(ag-unescape-nested-field-names
~(aggregate {$group (merge {"_id" (if (= (count fields) 1) (first $fields)
fields->$fields)}
(when (and ag-field ag-clause)
{ag-field ag-clause})
(into {} (for [[field $field] fields->$fields]
(when-not (= field "_id")
{field {$first $field}}))))}
{$sort (->> order-by
(mapcat (fn [{:keys [field direction]}]
[(field->name field) (case direction
:ascending 1
:descending -1)]))
(apply sorted-map))}
{$project (merge {"_id" false}
(when ag-field
{ag-field true})
(zipmap fields (repeat true)))}
(when limit
{$limit limit})))))
;; ## PROCESS-STRUCTURED
......
......@@ -9,7 +9,7 @@
[metabase.db :refer :all]
[metabase.driver.interface :as i]
[metabase.driver.query-processor.expand :as expand]
(metabase.models [field :refer [Field]]
(metabase.models [field :refer [Field], :as field]
[foreign-key :refer [ForeignKey]])
[metabase.util :as u]))
......@@ -326,6 +326,20 @@
(and (seq join-table-ids)
(sel :one :fields [Field :id :table_id :name :description :base_type :special_type], :name (name col-kw), :table_id [in join-table-ids]))
;; Otherwise if this is a nested Field recursively find the appropriate info
(let [name-components (s/split (name col-kw) #"\.")]
(when (> (count name-components) 1)
;; Find the nested Field by recursing through each Field's :children
(loop [field-kw->field field-kw->field, [component & more] (map keyword name-components)]
(when-let [f (field-kw->field component)]
(if-not (seq more)
;; If the are no more components to recurse through give the resulting Field a qualified name like "source.service" and return it
(assoc f :name (apply str (interpose "." name-components)))
;; Otherwise recurse with a map of child-name-kw -> child and the rest of the name components
(recur (zipmap (map (comp keyword :name) (:children f))
(:children f))
more))))))
;; Otherwise it is an aggregation column like :sum, build a map of information to return
(merge (assert ag-type)
{:name (name col-kw)
......@@ -339,6 +353,7 @@
;; count should always be IntegerField/number
(= col-kw :count) {:base_type :IntegerField
:special_type :number}
;; Otherwise something went wrong !
:else (do (log/error (u/format-color 'red "Annotation failed: don't know what to do with Field '%s'.\nExpected these Fields:\n%s"
col-kw
......@@ -346,7 +361,10 @@
{:base_type :UnknownField
:special_type nil})))))
;; Add FK info the the resulting Fields
add-fields-extra-info)))
add-fields-extra-info
;; Remove extra data from the resulting Fields
(map (u/rpartial dissoc :children :parent_id)))))
(defn- post-annotate
"Take a sequence of RESULTS of executing QUERY and return the \"annotated\" results we pass to postprocessing -- the map with `:cols`, `:columns`, and `:rows`.
......@@ -360,11 +378,14 @@
_ (when-not *disable-qp-logging*
(log/debug (u/format-color 'magenta "\nDriver QP returned results with keys: %s." (vec (keys (first results))))))
join-table-ids (set (map :table-id join-tables))
fields (sel :many :fields [Field :id :table_id :name :description :base_type :special_type],
:table_id source-table-id, :active true, :parent_id nil)
fields (->> (sel :many :fields [Field :id :table_id :name :description :base_type :special_type :parent_id],
:table_id source-table-id, :active true)
field/unflatten-nested-fields)
ordered-col-kws (order-cols query results fields)]
(assert (= (count (keys (first results))) (count ordered-col-kws))
(format "Order-cols returned an invalid number of keys. Expected: %d, got: %d" (count (keys (first results))) (count ordered-col-kws)))
(format "Order-cols returned an invalid number of keys.\nExpected: %d %s\nGot: %d %s"
(count (keys (first results))) (vec (keys (first results)))
(count ordered-col-kws) (vec ordered-col-kws)))
{:rows (for [row results]
(mapv row ordered-col-kws)) ; might as well return each row and col info as vecs because we're not worried about making
:columns (mapv name ordered-col-kws) ; making them lazy, and results are easier to play with in the REPL / paste into unit tests
......
......@@ -447,12 +447,13 @@
;; mark existing nested fields as inactive if they didn't come back from active-nested-field-name->type
(doseq [[nested-field-name nested-field-id] existing-nested-field-name->id]
(when-not (contains? (set (map keyword (keys nested-field-name->type))) (keyword nested-field-name))
(log/info (format "Marked nested field %s.%s as inactive." @(:qualified-name field) nested-field-name))
(upd Field nested-field-id :active false)))
;; OK, now create new Field objects for ones that came back from active-nested-field-name->type but *aren't* in existing-nested-field-name->id
(doseq [[nested-field-name nested-field-type] nested-field-name->type]
(when-not (contains? (set (map keyword (keys existing-nested-field-name->id))) (keyword nested-field-name))
(log/info (u/format-color 'blue "Found new nested field: %s.%s.%s" (:name @(:table field)) (:name field) (name nested-field-name)))
(log/info (u/format-color 'blue "Found new nested field: %s.%s" @(:qualified-name field) (name nested-field-name)))
(let [nested-field (ins Field, :table_id (:table_id field), :parent_id (:id field), :name (name nested-field-name) :base_type (name nested-field-type), :active true)]
;; Now recursively sync this nested Field
;; Replace parent so deref doesn't need to do a DB call
......
......@@ -1033,27 +1033,44 @@
order venue...name))
;; Nested Field in AGGREGATION
;; Let's see how many *distinct* venue names are mentioned
(expect 99
(Q run against geographical-tips using mongo
return :data :rows first first
aggregate distinct venue...name of tips))
;;; Nested Field in BREAKOUT
;; Now let's just get the regular count
(expect 500
(Q run against geographical-tips using mongo
return :data :rows first first
aggregate count venue...name of tips))
;; TODO - id/$ don't handle nested Fields ?
;;; Nested Field in BREAKOUT
;; Let's see how many tips we have by source.service
(expect
{:rows [["facebook" 107]
["flare" 105]
["foursquare" 100]
["twitter" 98]
["yelp" 90]]
:columns ["source.service" "count"]}
(Q run against geographical-tips using mongo
return :data (#(dissoc % :cols))
aggregate count of tips
breakout source...service))
;;; Nested Field in FIELDS
;; Return the first 10 tips with just tip.venue.name
(expect
[[1 {:name "Lucky's Gluten-Free Café"}]
[2 {:name "Joe's Homestyle Eatery"}]
[3 {:name "Lower Pac Heights Cage-Free Coffee House"}]
[4 {:name "Oakland European Liquor Store"}]
[5 {:name "Tenderloin Gormet Restaurant"}]
[6 {:name "Marina Modern Sushi"}]
[7 {:name "Sunset Homestyle Grill"}]
[8 {:name "Kyle's Low-Carb Grill"}]
[9 {:name "Mission Homestyle Churros"}]
[[1 {:name "Lucky's Gluten-Free Café"}]
[2 {:name "Joe's Homestyle Eatery"}]
[3 {:name "Lower Pac Heights Cage-Free Coffee House"}]
[4 {:name "Oakland European Liquor Store"}]
[5 {:name "Tenderloin Gormet Restaurant"}]
[6 {:name "Marina Modern Sushi"}]
[7 {:name "Sunset Homestyle Grill"}]
[8 {:name "Kyle's Low-Carb Grill"}]
[9 {:name "Mission Homestyle Churros"}]
[10 {:name "Sameer's Pizza Liquor Store"}]]
(Q run against geographical-tips using mongo
return :data :rows
......@@ -1061,26 +1078,3 @@
order _id
fields venue...name
lim 10))
;;; Nested-Nested Fields
(defn y []
(datasets/with-dataset :mongo
(with-temp-db [_ (dataset-loader) defs/geographical-tips]
(sel :many Field :table_id &tips:id))))
(defn x []
(datasets/with-dataset :mongo
(query-with-temp-db defs/geographical-tips
:aggregation ["rows"]
:source_table &tips:id
:filter ["=" ["." &tips.venue:id "name"] "Kyle's Low-Carb Grill"]
:limit 10)))
(defn z []
(datasets/with-dataset :mongo
(metabase.driver.mongo.util/with-mongo-connection [^com.mongodb.DBApiLayer db (metabase.test.data/get-or-create-database! defs/geographical-tips)]
(doall (monger.query/with-collection db "tips"
(monger.query/find {:venue {:name "Kyle's Low-Carb Grill"}})
(monger.query/limit 10))))))
......@@ -39,142 +39,3 @@
(def-database-definition-edn tupac-sightings)
(def-database-definition-edn geographical-tips)
(defn random-venue []
(let [cat-1 (rand-nth ["Paleo" "Free-Range" "Chinese" "Gluten-Free" "Mexican" "Afgan" "American" "BBQ" "Taquería" "Pizza" "Irish" "Low-Carb" "Gormet" "Red White & Blue"
"Japanese" "Korean" "Cage-Free" "GMO-Free" "No-MSG" "Deep-Dish" "Soul Food" "British" "European" "Homestyle" "Old-Fashioned" "Modern"])
cat-2 (rand-nth ["Bakery" "Restaurant" "Café" "Gastro Pub" "Eatery" "Pizzeria" "Taqueria" "Bar & Grill" "Coffee House" "Cupcakes" "Sushi" "Liquor Store"
"Grill" "Diner" "Hotel & Restaurant" "Food Truck" "Pop-Up Food Stand" "Churros" "Ice Cream Truck"])]
{:name (str (rand-nth ["Cam's" "Rasta's" "Joe's" "Kyle's" "Sameer's" "Lucky's" "SF" "Alcatraz" "Oakland" "Mission" "Chinatown" "Pacific Heights" "Nob Hill" "Marina"
"Lower Pac Heights" "Polk St." "Sunset" "Tenderloin" "SoMa" "Market St." "Haight"])
" " cat-1 " " cat-2)
:categories [cat-1 cat-2]
:phone (str "415-" (apply str (repeatedly 3 #(rand-int 10))) "-" (apply str (repeatedly 4 #(rand-int 10))))
:id (str (java.util.UUID/randomUUID))}))
(def venues (repeatedly 100 random-venue))
(defn random-source [venue]
(let [username (rand-nth ["cam_saul" "rasta_toucan" "lucky_pigeon" "sameer" "joe" "bob" "amy" "jane" "jessica" "mandy" "kyle" "tupac" "biggie"])]
((rand-nth [(fn []
{:service "twitter"
:mentions [(str "@" (-> (:name venue)
clojure.string/lower-case
(clojure.string/replace #"\s|-" "_")
(clojure.string/replace #"'" "")))]
:tags (->> (:categories venue)
(interpose " ")
(apply str)
(#(clojure.string/split % #" "))
(map clojure.string/lower-case)
(mapv (partial str "#")))
:username username})
(fn []
{:service "flare"
:username username})
(fn []
{:service "foursquare"
:foursquare-photo-id (str (java.util.UUID/randomUUID))
:mayor username})
(fn []
(let [fb-id (str (java.util.UUID/randomUUID))]
{:service "facebook"
:facebook-photo-id fb-id
:url (str "http://facebook.com/photos/" fb-id)}))
(fn []
{:service "yelp"
:yelp-photo-id (str (java.util.UUID/randomUUID))
:categories (:categories venue)})]))))
(defn random-tip [venue]
(let [adjectives ["great"
"decent"
"acceptable"
"fantastic"
"wonderful"
"amazing"
"delicious"
"atmospheric"
"family-friendly"
"exclusive"
"well-decorated"
"modern"
"classic"
"world-famous"
"popular"
"underappreciated"
"historical"
"swell"
"groovy"
"underground"
"horrible"
"overrated"]]
(str (:name venue)
" is a "
(rand-nth adjectives)
" and "
(rand-nth adjectives)
" "
(if (= (rand-int 1) 0)
"place"
(rand-nth ["local landmark"
"tourist destination"
"hidden gem"
"traditional hippie hangout"
"hipster spot"]))
" to "
(rand-nth ["catch a bite to eat"
"have a after-work cocktail"
"conduct a business meeting"
"pitch an investor"
"have brunch"
"people-watch"
"take visiting friends and relatives"
"meet new friends"
"have a birthday party"
"have breakfast"
"take a date"
"nurse a hangover"
"have a drink"
"drink a craft beer"
"sip a glass of expensive wine"
"sip Champagne"
"watch the Giants game"
"watch the Warriors game"])
" "
(rand-nth ["with friends"
"on a Tuesday afternoon"
"weekend mornings"
"weekday afternoons"
"weekend evenings"
"on Taco Tuesday"
"Friday nights"
"the first Sunday of the month"
"the second Saturday of the month"
"during summer"
"during winter"
"in July"
"in June"
"after baseball games"
"when hungover"
"in the spring"
"in the fall"
"with your pet dog"
"with your pet toucan"
"on Thursdays"
"on Saturday night"
"on public holidays"
])
".")))
(defn random-photo []
(let [url (str "http://cloudfront.net/" (java.util.UUID/randomUUID) "/%s.jpg")
venue (rand-nth venues)]
[(random-tip venue)
{:small (format url "small")
:medium (format url "med")
:large (format url "large")}
(-> venue
(dissoc :cat-1 :cat-2))
(random-source venue)]))
......@@ -97,9 +97,11 @@
(let [[outer-tokens inner-tokens] (split-with (complement (partial contains? inner-q-tokens)) tokens)
outer-tokens (partition-tokens outer-q-tokens outer-tokens)
inner-tokens (partition-tokens inner-q-tokens inner-tokens)
query (macroexpand-all `(Q:expand-inner ~@inner-tokens))]
query (macroexpand-all `(Q:expand-inner ~@inner-tokens))
table (second (:source_table (:query query)))]
(assert table "No table specified. Did you include a `tbl`/`of` clause?")
`(Q:wrap-fallback-captures (Q:expand-outer* ~outer-tokens
(symbol-macrolet [~'table ~(second (:source_table (:query query)))
(symbol-macrolet [~'table ~table
~'fl Q:field]
~(macroexpand-all query))))))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment