Skip to content
Snippets Groups Projects
Commit 8e6697e0 authored by Cam Saül's avatar Cam Saül
Browse files

cleanup

parent 8d6908ec
No related branches found
No related tags found
No related merge requests found
......@@ -85,6 +85,7 @@
:jvm-opts ["-Dmb.db.file=target/metabase-test"
"-Dmb.jetty.join=false"
"-Dmb.jetty.port=3001"
"-Dmb.api.key=test-api-key"]}
"-Dmb.api.key=test-api-key"
"-Xverify:none"]} ; disable bytecode verification when running tests so they start slightly faster
:uberjar {:aot :all
:prep-tasks ^:replace ["npm" "webpack" "javac" "compile"]}})
......@@ -19,6 +19,13 @@
(declare driver)
;; TODO - this isn't necessarily Mongo-specific
(def ^:private ^:const document-scanning-limit
"The maximum number of documents to scan to look for Fields.
We can't feasibly scan every document in a million+ document collection, so scan the first `document-scanning-limit`
documents and hope that the rest follow the same schema."
10000)
;;; ### Driver Helper Fns
(defn- table->column-names
......@@ -26,7 +33,7 @@
[table]
(with-mongo-connection [^com.mongodb.DBApiLayer conn @(:db table)]
(->> (mc/find-maps conn (:name table))
(take 10000) ; it's probably enough to only consider the first 10,000 docs in the collection instead of iterating over potentially millions of them
(take document-scanning-limit)
(map keys)
(map set)
(reduce set/union))))
......@@ -108,20 +115,19 @@
;; Build a map of nested-field-key -> type -> count
;; TODO - using an atom isn't the *fastest* thing in the world (but is the easiest); consider alternate implementation
(let [field->type->count (atom {})]
;; Look at the first 1000 values
(doseq [val (take 1000 (field-values-lazy-seq this field))]
(doseq [val (take document-scanning-limit (field-values-lazy-seq this field))]
(when (map? val)
(doseq [[k v] val]
(swap! field->type->count update-in [k (type v)] #(if % (inc %) 1)))))
;; (seq types) will give us a seq of pairs like [java.lang.String 500]
(->> @field->type->count
(m/map-vals (fn [type->count]
(->> (seq type->count) ; convert to pairs of [type count]
(sort-by second) ; source by count
last ; take last item (highest count)
first ; keep just the type
(->> (seq type->count) ; convert to pairs of [type count]
(sort-by second) ; source by count
last ; take last item (highest count)
first ; keep just the type
(#(or (driver/class->base-type %) ; convert to corresponding Field base_type if possible
:UnknownField))))))))) ; fall back to :UnknownField for things like clojure.lang.PersistentVector
:UnknownField))))))))) ; fall back to :UnknownField for things like clojure.lang.PersistentVector
(def driver
"Concrete instance of the MongoDB driver."
......
......@@ -76,22 +76,29 @@
(if *mongo-connection* (f# *mongo-connection*)
(-with-mongo-connection f# ~database))))
;; TODO - this is actually more sophisticated than the one used for annotation in the GenericSQL driver, which just takes the
;; types of the values in the first row.
;; We should move this somewhere where it can be shared amongst the drivers and rewrite GenericSQL to use it instead.
;; TODO - this isn't neccesarily Mongo-specific; consider moving
(defn values->base-type
"Given a sequence of values, return `Field` `base_type` in the most ghetto way possible.
"Given a sequence of values, return `Field.base_type` in the most ghetto way possible.
This just gets counts the types of *every* value and returns the `base_type` for class whose count was highest."
[values-seq]
{:pre [(sequential? values-seq)]}
(println (first values-seq))
(or (->> values-seq
(filter identity) ; TODO - why not do a query to return non-nil values of this column instead
(take 1000) ; it's probably fine just to consider the first 1,000 non-nil values when trying to type a column instead of iterating over the whole collection
;; TODO - why not do a query to return non-nil values of this column instead
(filter identity)
;; it's probably fine just to consider the first 1,000 *non-nil* values when trying to type a column instead
;; of iterating over the whole collection. (VALUES-SEQ should be up to 10,000 values, but we don't know how many are
;; nil)
(take 1000)
(group-by type)
(map (fn [[type valus]]
[type (count valus)]))
(map (fn [[klass valus]]
(println [klass (count valus)])
[klass (count valus)]))
(sort-by second)
first
first
((fn [klass]
(println klass)
klass))
driver/class->base-type)
:UnknownField))
......@@ -419,8 +419,8 @@
(and (or (nil? valid-base-types)
(contains? valid-base-types base-type))
(re-matches name-pattern (s/lower-case field-name))
(if top-level-only? (nil? (:parent_id field))
true)))
(or (not top-level-only?)
(nil? (:parent_id field)))))
pattern+base-types+special-type+top-level-only?)))))
(defn- auto-assign-field-special-type-by-name!
......@@ -436,7 +436,7 @@
(defn- sync-field-nested-fields! [driver field]
(when (and (= (:base_type field) :DictionaryField)
(supports? driver :nested-fields) ; if one of these is true
(supports? driver :nested-fields) ; if one of these is true
(satisfies? ISyncDriverFieldNestedFields driver)) ; the other should be :wink:
(let [nested-field-name->type (active-nested-field-name->type driver field)]
(log/info (u/format-color 'green "Syncing subfields for '%s.%s': %s" (:name @(:table field)) (:name field) (keys nested-field-name->type)))
......
......@@ -46,8 +46,7 @@
:base_type "TextField"
:parent_id nil
:parent nil})
((user->client :rasta) :get
200 (format "meta/field/%d" (id :users :name))))
((user->client :rasta) :get 200 (format "meta/field/%d" (id :users :name))))
;; ## GET /api/meta/field/:id/summary
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment