Skip to content
Snippets Groups Projects
Unverified Commit 2ff44770 authored by metamben's avatar metamben Committed by GitHub
Browse files

When sampling MongoDB collections, look at the oldest records too (#27838)

parent f98dcaf3
Branches
Tags
No related merge requests found
......@@ -189,6 +189,13 @@
{:tables (set (for [collection (disj (mdb/get-collection-names conn) "system.indexes")]
{:schema nil, :name collection}))}))
(defn- sample-documents [^com.mongodb.DB conn table sort-direction]
(-> (.getCollection conn (:name table))
mq/empty-query
(assoc :sort {:_id sort-direction}
:limit metadata-queries/nested-field-sample-limit)
mq/exec))
(defn- table-sample-column-info
"Sample the rows (i.e., documents) in `table` and return a map of information about the column keys we found in that
sample. The results will look something like:
......@@ -204,11 +211,7 @@
fields
(recur more-keys (update fields k (partial update-field-attrs (k row)))))))
{}
(-> (.getCollection conn (:name table))
mq/empty-query
(assoc :sort {:_id -1}
:limit metadata-queries/nested-field-sample-limit)
mq/exec))
(concat (sample-documents conn table 1) (sample-documents conn table -1)))
(catch Throwable t
(log/error (format "Error introspecting collection: %s" (:name table)) t))))
......
......@@ -203,29 +203,32 @@
:table_id (mt/id :bird_species)
{:order-by [:name]})))))))
(tx/defdataset beginning-null-columns
[["bird_species"
[{:field-name "name", :base-type :type/Text}
{:field-name "favorite_snack", :base-type :type/Text}]
[["House Finch" nil]
["Mourning Dove" nil]
["Common Blackbird" "earthworms"]
["Silvereye" "cherries"]]]])
(deftest new-rows-take-precedence-when-collecting-metadata
(deftest new-rows-take-precedence-when-collecting-metadata-test
(mt/test-driver :mongo
(with-redefs [metadata-queries/nested-field-sample-limit 2]
(mt/dataset beginning-null-columns
;; do a full sync on the DB to get the correct semantic type info
(sync/sync-database! (mt/db))
(is (= [{:name "_id", :database_type "java.lang.Long", :base_type :type/Integer, :semantic_type :type/PK}
{:name "favorite_snack", :database_type "java.lang.String", :base_type :type/Text, :semantic_type :type/Category}
{:name "name", :database_type "java.lang.String", :base_type :type/Text, :semantic_type :type/Name}]
(map
(partial into {})
(db/select [Field :name :database_type :base_type :semantic_type]
:table_id (mt/id :bird_species)
{:order-by [:name]}))))))))
(binding [tdm/*remove-nil?* true]
(mt/with-temp-test-data
["bird_species"
[{:field-name "name", :base-type :type/Text}
{:field-name "favorite_snack", :base-type :type/Text}
{:field-name "max_wingspan", :base-type :type/Integer}]
[["Sharp-shinned Hawk" nil 68]
["Tropicbird" nil 112]
["House Finch" nil nil]
["Mourning Dove" nil nil]
["Common Blackbird" "earthworms" nil]
["Silvereye" "cherries" nil]]]
;; do a full sync on the DB to get the correct semantic type info
(sync/sync-database! (mt/db))
(is (= #{{:name "_id", :database_type "java.lang.Long", :base_type :type/Integer, :semantic_type :type/PK}
{:name "favorite_snack", :database_type "java.lang.String", :base_type :type/Text, :semantic_type :type/Category}
{:name "name", :database_type "java.lang.String", :base_type :type/Text, :semantic_type :type/Name}
{:name "max_wingspan", :database_type "java.lang.Long", :base_type :type/Integer, :semantic_type nil}}
(into #{}
(map (partial into {}))
(db/select [Field :name :database_type :base_type :semantic_type]
:table_id (mt/id :bird_species)
{:order-by [:name]})))))))))
(deftest table-rows-sample-test
(mt/test-driver :mongo
......
......@@ -3,6 +3,7 @@
[cheshire.generate :as json.generate]
[clojure.java.io :as io]
[clojure.test :refer :all]
[medley.core :as m]
[metabase.driver.ddl.interface :as ddl.i]
[metabase.driver.mongo.util :refer [with-mongo-connection]]
[metabase.test.data.interface :as tx]
......@@ -50,6 +51,10 @@
(with-mongo-connection [mongo-connection (tx/dbdef->connection-details driver :server dbdef)]
(mg/drop-db (.getMongo mongo-connection) (tx/escaped-database-name dbdef))))
(def ^:dynamic *remove-nil?*
"When creating a dataset, omit any nil-valued fields from the documents."
false)
(defmethod tx/create-db! :mongo
[driver {:keys [table-definitions], :as dbdef} & {:keys [skip-drop-db?], :or {skip-drop-db? false}}]
(when-not skip-drop-db?
......@@ -63,7 +68,8 @@
(try
;; Insert each row
(mcoll/insert mongo-db (name table-name) (into {:_id (inc i)}
(zipmap field-names row)))
(cond->> (zipmap field-names row)
*remove-nil?* (m/remove-vals nil?))))
;; If row already exists then nothing to do
(catch com.mongodb.MongoException _)))))))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment