Skip to content
Snippets Groups Projects
Unverified Commit 1ba2d10b authored by Cam Saul's avatar Cam Saul
Browse files

Fix bug with list classification logic + tests

parent 4c0b0a30
Branches
Tags
No related merge requests found
......@@ -31,7 +31,7 @@
(defn- field-should-be-category? [distinct-count field]
;; only mark a Field as a Category if it doesn't already have a special type
(when-not (:special_type field)
(when (< distinct-count field-values/category-cardinality-threshold)
(when (<= distinct-count field-values/category-cardinality-threshold)
(log/debug (format "%s has %d distinct values. Since that is less than %d, we're marking it as a category."
(sync-util/name-for-logging field)
distinct-count
......@@ -43,12 +43,12 @@
;; only update has_field_values if it hasn't been set yet. If it's already been set then it was probably done so
;; manually by an admin, and we don't want to stomp over their choices.
(when (nil? (:has_field_values field))
(when (< distinct-count field-values/list-cardinality-threshold)
(when (<= distinct-count field-values/list-cardinality-threshold)
(log/debug (format "%s has %d distinct values. Since that is less than %d, it should have cached FieldValues."
(sync-util/name-for-logging field)
distinct-count
field-values/list-cardinality-threshold)))
true))
field-values/list-cardinality-threshold))
true)))
(s/defn infer-is-category-or-list :- (s/maybe i/FieldInstance)
......
(ns metabase.sample-dataset-test
"Tests to make sure the Sample Dataset syncs the way we would expect."
(:require [expectations :refer :all]
[metabase.models
[database :refer [Database]]
[field :refer [Field]]
[table :refer [Table]]]
[metabase.sample-data :as sample-data]
[metabase.sync :as sync]
[metabase.util :as u]
[toucan.db :as db]
[toucan.hydrate :refer [hydrate]]
[toucan.util.test :as tt]))
;;; ---------------------------------------------------- Tooling -----------------------------------------------------
;; These tools are pretty sophisticated for the amount of
(defn- sample-dataset-db []
{:details (#'sample-data/db-details)
:engine :h2
:name "Sample Dataset"})
(defmacro ^:private with-temp-sample-dataset-db
"Execute `body` with a temporary Sample Dataset DB bound to `db-binding`."
{:style/indent 1}
[[db-binding] & body]
`(tt/with-temp Database [db# (sample-dataset-db)]
(sync/sync-database! db#)
(let [~db-binding db#]
~@body)))
(defn- table
"Get the Table in a `db` with `table-name`."
[db table-name]
(db/select-one Table :name table-name, :db_id (u/get-id db)))
(defn- field
"Get the Field in a `db` with `table-name` and `field-name.`"
[db table-name field-name]
(db/select-one Field :name field-name, :table_id (u/get-id (table db table-name))))
;;; ----------------------------------------------------- Tests ------------------------------------------------------
;; Make sure the Sample Dataset is getting synced correctly. For example PEOPLE.NAME should be has_field_values = search
;; instead of `list`.
(expect
{:description "The name of the user who owns an account"
:database_type "VARCHAR"
:special_type :type/Name
:name "NAME"
:has_field_values :search
:active true
:visibility_type :normal
:preview_display true
:display_name "Name"
:fingerprint {:global {:distinct-count 2500}
:type {:type/Text {:percent-json 0.0
:percent-url 0.0
:percent-email 0.0
:average-length 13.516}}}
:base_type :type/Text}
(with-temp-sample-dataset-db [db]
(-> (field db "PEOPLE" "NAME")
;; it should be `nil` after sync but get set to `search` by the auto-inference. We only set `list` in sync and
;; setting anything else is reserved for admins, however we fill in what we think should be the appropiate value
;; with the hydration fn
(hydrate :has_field_values)
(select-keys [:name :description :database_type :special_type :has_field_values :active :visibility_type
:preview_display :display_name :fingerprint :base_type]))))
(ns metabase.sync.analyze.classifiers.category-test
"Tests for the category classifier."
(:require [expectations :refer :all]
[metabase.sync.analyze.classifiers.category :as category-classifier]))
;; make sure the logic for deciding whether a Field should be a list works as expected
(expect
nil
(#'category-classifier/field-should-be-list?
2500
{:database_type "VARCHAR"
:special_type :type/Name
:name "NAME"
:fingerprint_version 1
:has_field_values nil
:active true
:visibility_type :normal
:preview_display true
:display_name "Name"
:fingerprint {:global {:distinct-count 2500}
:type
{:type/Text
{:percent-json 0.0
:percent-url 0.0
:percent-email 0.0
:average-length 13.516}}}
:base_type :type/Text}))
......@@ -147,22 +147,20 @@
:base_type :type/Text
:fk_target_field_id true})
(merge field-defaults
{:special_type nil
:name "title"
:display_name "Title"
:database_type "VARCHAR"
:base_type :type/Text
:has_field_values "list"})]})
{:special_type nil
:name "title"
:display_name "Title"
:database_type "VARCHAR"
:base_type :type/Text})]})
(merge table-defaults
{:name "studio"
:display_name "Studio"
:fields [(merge field-defaults
{:special_type :type/Name
:name "name"
:display_name "Name"
:database_type "VARCHAR"
:base_type :type/Text
:has_field_values "list"})
{:special_type :type/Name
:name "name"
:display_name "Name"
:database_type "VARCHAR"
:base_type :type/Text})
(merge field-defaults
{:special_type :type/PK
:name "studio"
......@@ -191,19 +189,17 @@
:database_type "SERIAL"
:base_type :type/Integer})
(merge field-defaults
{:special_type nil
:name "studio"
:display_name "Studio"
:database_type "VARCHAR"
:base_type :type/Text
:has_field_values "list"})
{:special_type nil
:name "studio"
:display_name "Studio"
:database_type "VARCHAR"
:base_type :type/Text})
(merge field-defaults
{:special_type nil
:name "title"
:display_name "Title"
:database_type "VARCHAR"
:base_type :type/Text
:has_field_values "list"})]})
{:special_type nil
:name "title"
:display_name "Title"
:database_type "VARCHAR"
:base_type :type/Text})]})
(tt/with-temp* [Database [db {:engine :sync-test}]
Table [table {:name "movie"
:schema "default"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment