Skip to content
Snippets Groups Projects
Commit ac93c6ba authored by Cam Saul's avatar Cam Saul
Browse files

more work on mongo subfield stuff

parent c9f8cf6e
No related branches found
No related tags found
No related merge requests found
......@@ -70,12 +70,19 @@
* dest-column-name"))
(defprotocol ISyncDriverFieldSubFields
"Optional protocol that should provide information about the subfields of a FIELD when applicable.
Drivers that declare support for `:nested-fields` should implement this protocol."
(active-subfield-names->type [this field]
"Return a map of string names of active child `Fields` of FIELD -> `Field.base_type`."))
;; ## ISyncDriverField Protocols
;; Sync drivers need to implement either ISyncDriverFieldValues or ISyncDriverFieldAvgLength *and* ISyncDriverFieldPercentUrls.
;;
;; ISyncDriverFieldValues is used to provide a generic fallback implementation of the other two that calculate these values by
;; iterating over *every* value of the Field in Clojure-land. Since that's slower, it's preferable to provide implementations
;; iterating over a few thousand values of the Field in Clojure-land. Since that's slower, it's preferable to provide implementations
;; of ISyncDriverFieldAvgLength/ISyncDriverFieldPercentUrls when possible. (You can also implement ISyncDriverFieldValues and
;; *one* of the other two; the optimized implementation will be used for that and the fallback implementation for the other)
......
......@@ -4,6 +4,7 @@
[clojure.set :as set]
[clojure.tools.logging :as log]
[colorize.core :as color]
[medley.core :as m]
(monger [collection :as mc]
[command :as cmd]
[conversion :as conv]
......@@ -96,7 +97,30 @@
(map (keyword (:name field))
(with-mongo-connection [^com.mongodb.DBApiLayer conn @(:db table)]
(mq/with-collection conn (:name table)
(mq/fields [(:name field)]))))))))
(mq/fields [(:name field)])))))))
ISyncDriverFieldSubFields
(active-subfield-names->type [this field]
;; shared part of driver should be enforcing this actually !
(assert (= (:base_type field) :UnknownField))
;; Build a map of nested-field-key -> type -> count
;; TODO - using an atom isn't the *fastest* thing in the world (but is the easiest); this takes ~50ms for a smallish Field in the Geographic Tips dataset;
;; if I run out of important things to do re-write this recursively or with transient collections
(let [field->type->count (atom {})]
;; Look at the first 1000 values
(doseq [val (take 1000 (field-values-lazy-seq this field))]
(when (map? val)
(doseq [[k v] val]
(swap! field->type->count update-in [k (type v)] #(if % (inc %) 1)))))
;; (seq types) will give us a seq of pairs like [java.lang.String 500]
(->> @field->type->count
(m/map-vals (fn [type->count]
(->> (seq type->count) ; convert to pairs of [type count]
(sort-by second) ; source by count
last ; take last item (highest count)
first ; keep just the type
(#(or (driver/class->base-type %) ; convert to corresponding Field base_type if possible
:UnknownField))))))))) ; fall back to :UnknownField for things like clojure.lang.PersistentVector
(def driver
"Concrete instance of the MongoDB driver."
......@@ -111,17 +135,6 @@
'[metabase.test.data.dataset-definitions :as defs]
'[metabase.test.util.mql :refer [Q]])
(defn- field-collect-subfields
"Fetch all the subfields for a FIELD's first 1000 values."
[field]
(->> (field-values-lazy-seq driver field)
(take 1000)
(filter map?)
(map keys)
(map set)
(reduce set/union #{})
doall))
(defn x []
(Q run with db geographical-tips
with dataset mongo
......@@ -134,4 +147,5 @@
(datasets/with-dataset :mongo
(data/with-temp-db [db (data/dataset-loader) defs/geographical-tips]
(with-mongo-connection [_ db]
(field-collect-subfields &tips.venue)))))
;; 61 ms ???
(active-subfield-names->type driver &tips.venue)))))
......@@ -43,18 +43,22 @@
"Run F with a new connection (bound to `*mongo-connection*`) to DATABASE.
Don't use this directly; use `with-mongo-connection`."
[f database]
(println (metabase.util/format-color 'red "<<OPENING A NEW MONGO CONNECTION>>"))
(let [connection-string (cond
(string? database) database
(:dbname (:details database)) (details-map->connection-string (:details database)) ; new-style -- entire Database obj
(:dbname database) (details-map->connection-string database) ; new-style -- connection details map only
:else (throw (Exception. (str "with-mongo-connection failed: bad connection details:" (:details database)))))
{conn :conn mongo-connection :db} (mg/connect-via-uri connection-string)]
{conn :conn, mongo-connection :db} (mg/connect-via-uri connection-string)]
(log/debug (color/cyan "<< OPENED NEW MONGODB CONNECTION >>"))
(try
(binding [*mongo-connection* mongo-connection]
(f *mongo-connection*))
(finally
(mg/disconnect conn)))))
(println (metabase.util/format-color 'red "DISCONNECTING!"))
(assert conn)
(mg/disconnect conn)
(println (metabase.util/format-color 'green "OK."))))))
(defmacro with-mongo-connection
"Open a new MongoDB connection to DATABASE-OR-CONNECTION-STRING, bind connection to BINDING, execute BODY, and close the connection.
......
......@@ -24,6 +24,7 @@
sync-table-active-fields-and-pks!
sync-table-fks!
sync-table-fields-metadata!
update-field-subfields!
update-table-row-count!)
;; ## sync-database! and sync-table!
......@@ -249,7 +250,8 @@
(mark-url-field! driver)
mark-category-field!
(mark-no-preview-display-field! driver)
auto-assign-field-special-type-by-name!))
auto-assign-field-special-type-by-name!
(update-field-subfields! driver)))
;; Each field-syncing function below should return FIELD with any updates that we made, or nil.
......@@ -353,7 +355,7 @@
;; ### auto-assign-field-special-type-by-name!
(def ^{:arglists '([field])}
(def ^:private ^{:arglists '([field])}
field->name-inferred-special-type
"If FIELD has a `name` and `base_type` that matches a known pattern, return the `special_type` we should assign to it."
(let [bool-or-int #{:BooleanField :BigIntegerField :IntegerField}
......@@ -415,7 +417,7 @@
(re-matches name-pattern (s/lower-case field-name))))
pattern+base-types+special-type))))
(defn auto-assign-field-special-type-by-name!
(defn- auto-assign-field-special-type-by-name!
"If FIELD doesn't have a special type, but has a name that matches a known pattern like `latitude`, mark it as having the specified special type."
[field]
(when-not (:special_type field)
......@@ -424,3 +426,11 @@
(name (:base_type field)) (:name @(:table field)) (:name field) pattern (name special-type)))
(upd Field (:id field) :special_type special-type)
(assoc field :special_type special-type))))
(defn- update-field-subfields! [driver field]
(when (and (= (:base_type field) :UnknownField)
(supports? driver :nested-fields) ; if one of these is true
(satisfies? ISyncDriverFieldSubFields driver)) ; the other should be :wink:
(let [subfield-name->type (active-subfield-names->type driver field)]
(log/info "Syncing subfields for '%s.%s': %s" (:name @(:table field)) (:name field) (keys subfield-name->type)))))
......@@ -22,8 +22,8 @@
(create-physical-db! [_ _])
(drop-physical-db! [this database-definition]
(mg/drop-db (mg/connect (database->connection-details this database-definition))
(escaped-name database-definition)))
(with-open [mongo-connection (mg/connect (database->connection-details this database-definition))]
(mg/drop-db mongo-connection (escaped-name database-definition))))
;; Nothing to do here, collection is created when we add documents to it
(create-physical-table! [_ _ _])
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment