From f9f7895004600e3d4e32a7cd039d7ba68462b8bd Mon Sep 17 00:00:00 2001 From: Cam Saul <cammsaul@gmail.com> Date: Wed, 6 Dec 2017 14:24:21 -0800 Subject: [PATCH] Clarify some of the code in the Mongo driver. [ci drivers] --- src/metabase/driver/generic_sql.clj | 2 +- src/metabase/driver/mongo.clj | 49 +++++++++++++++++------------ 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/src/metabase/driver/generic_sql.clj b/src/metabase/driver/generic_sql.clj index 1cc011eca28..c416e4ac9a5 100644 --- a/src/metabase/driver/generic_sql.clj +++ b/src/metabase/driver/generic_sql.clj @@ -344,7 +344,7 @@ (defn- describe-table-fields [^DatabaseMetaData metadata, driver, {schema :schema, table-name :name}] (set (for [{database-type :type_name, column-name :column_name} (jdbc/result-set-seq (.getColumns metadata nil schema table-name nil))] (merge {:name column-name - :database-type (name database-type) + :database-type database-type :base-type (database-type->base-type driver database-type)} (when-let [special-type (calculated-special-type driver column-name database-type)] {:special-type special-type}))))) diff --git a/src/metabase/driver/mongo.clj b/src/metabase/driver/mongo.clj index 28b51c0a996..87bf18118f5 100644 --- a/src/metabase/driver/mongo.clj +++ b/src/metabase/driver/mongo.clj @@ -106,17 +106,19 @@ (find-nested-fields field-value nested-fields) nested-fields))))) -(s/defn ^:private ^:always-validate most-common-object-type :- Class - [field-types :- [(s/pair Class "Class" s/Int "Int")]] +(s/defn ^:private most-common-object-type :- Class + "Given a sequence of tuples like [Class <number-of-occurances>] return the Class with the highest number of + occurances. The basic idea here is to take a sample of values for a Field and then determine the most common type + for its values, and use that as the Metabase base type. For example if we have a Field called `zip_code` and it's a + number 90% of the time and a string the other 10%, we'll just call it a `:type/Number`." + [field-types :- [(s/pair Class "Class", s/Int "Int")]] (->> field-types (sort-by second) last first)) (defn- describe-table-field [field-kw field-info] - ;; :types (let [most-common-object-type (most-common-object-type (vec (:types field-info)))] - ;; TODO: indicate preview-display status based on :len (cond-> {:name (name field-kw) :database-type (.getName most-common-object-type) :base-type (driver/class->base-type most-common-object-type)} @@ -134,26 +136,33 @@ {:tables (set (for [collection (disj (mdb/get-collection-names conn) "system.indexes")] {:schema nil, :name collection}))})) +(defn- table-sample-column-info + "Sample the rows (i.e., documents) in `table` and return a map of information about the column keys we found in that + sample. The results will look something like: + + {:_id {:count 200, :len nil, :types {java.lang.Long 200}, :special-types nil, :nested-fields nil}, + :severity {:count 200, :len nil, :types {java.lang.Long 200}, :special-types nil, :nested-fields nil}}" + [^com.mongodb.DB conn, table] + (try + (->> (mc/find-maps conn (:name table)) + (take driver/max-sample-rows) + (reduce + (fn [field-defs row] + (loop [[k & more-keys] (keys row), fields field-defs] + (if-not k + fields + (recur more-keys (update fields k (partial update-field-attrs (k row))))))) + {})) + (catch Throwable t + (log/error (format "Error introspecting collection: %s" (:name table)) t)))) + (defn- describe-table [database table] (with-mongo-connection [^com.mongodb.DB conn database] - ;; TODO: ideally this would take the LAST set of rows added to the table so we could ensure this data changes on reruns - (let [parsed-rows (try - (->> (mc/find-maps conn (:name table)) - (take driver/max-sample-rows) - (reduce - (fn [field-defs row] - (loop [[k & more-keys] (keys row) - fields field-defs] - (if-not k - fields - (recur more-keys (update fields k (partial update-field-attrs (k row))))))) - {})) - (catch Throwable t - (log/error (format "Error introspecting collection: %s" (:name table)) t)))] + (let [column-info (table-sample-column-info conn table)] {:schema nil :name (:name table) - :fields (set (for [field (keys parsed-rows)] - (describe-table-field field (field parsed-rows))))}))) + :fields (set (for [[field info] column-info] + (describe-table-field field info)))}))) (defrecord MongoDriver [] -- GitLab