Skip to content
Snippets Groups Projects
Unverified Commit 331f6a96 authored by Cam Saul's avatar Cam Saul Committed by GitHub
Browse files

Merge pull request #7556 from metabase/allow-field-values-for-list-fields-over-limit

Allow saving FieldValues for list Fields if over the limit
parents 59f79c5f 9bc7204a
No related branches found
No related tags found
No related merge requests found
Showing
with 342 additions and 160 deletions
......@@ -14,15 +14,11 @@
[metabase.api.common.internal :refer :all]
[metabase.models.interface :as mi]
[puppetlabs.i18n.core :refer [trs tru]]
[ring.util
[io :as rui]
[response :as rr]]
[ring.core.protocols :as protocols]
[ring.util.response :as response]
[schema.core :as s]
[toucan.db :as db])
(:import [java.io BufferedWriter OutputStream OutputStreamWriter]
[java.nio.charset Charset StandardCharsets]))
(:import java.io.OutputStream))
(declare check-403 check-404)
......
......@@ -75,7 +75,7 @@
points_of_interest (s/maybe su/NonBlankString)
special_type (s/maybe FieldType)
visibility_type (s/maybe FieldVisibilityType)
has_field_values (s/maybe (s/enum "search" "list" "none"))}
has_field_values (s/maybe (apply s/enum (map name field/has-field-values-options)))}
(let [field (hydrate (api/write-check Field id) :dimensions)
new-special-type (keyword (get body :special_type (:special_type field)))
removed-fk? (removed-fk-special-type? (:special_type field) new-special-type)
......
......@@ -4,11 +4,11 @@
[metabase
[query-processor :as qp]
[util :as u]]
[metabase.models
[field-values :as field-values]
[table :refer [Table]]]
[metabase.models.table :refer [Table]]
[metabase.query-processor.interface :as qpi]
[metabase.query-processor.middleware.expand :as ql]
[metabase.util.schema :as su]
[schema.core :as s]
[toucan.db :as db]))
(defn- qp-query [db-id query]
......@@ -42,15 +42,32 @@
(u/pprint-to-str results))
(throw e)))))
(defn field-distinct-values
(def ^:private ^Integer absolute-max-distinct-values-limit
"The absolute maximum number of results to return for a `field-distinct-values` query. Normally Fields with 100 or
less values (at the time of this writing) get marked as `auto-list` Fields, meaning we save all their distinct
values in a FieldValues object, which powers a list widget in the FE when using the Field for filtering in the QB.
Admins can however manually mark any Field as `list`, which is effectively ordering Metabase to keep FieldValues for
the Field regardless of its cardinality.
Of course, if a User does something crazy, like mark a million-arity Field as List, we don't want Metabase to
explode trying to make their dreams a reality; we need some sort of hard limit to prevent catastrophes. So this
limit is effectively a safety to prevent Users from nuking their own instance for Fields that really shouldn't be
List Fields at all. For these very-high-cardinality Fields, we're effectively capping the number of
FieldValues that get could saved.
This number should be a balance of:
* Not being too low, which would definitly result in GitHub issues along the lines of 'My 500-distinct-value Field
that I marked as List is not showing all values in the List Widget'
* Not being too high, which would result in Metabase running out of memory dealing with too many values"
(int 5000))
(s/defn field-distinct-values
"Return the distinct values of FIELD.
This is used to create a `FieldValues` object for `:type/Category` Fields."
([field]
;; fetch up to one more value than allowed for FieldValues. e.g. if the max is 100 distinct values fetch up to 101.
;; That way we will know if we're over the limit
(field-distinct-values field (inc field-values/list-cardinality-threshold)))
([field max-results]
{:pre [(integer? max-results)]}
(field-distinct-values field absolute-max-distinct-values-limit))
([field, max-results :- su/IntGreaterThanZero]
(mapv first (field-query field (-> {}
(ql/breakout (ql/field-id (u/get-id field)))
(ql/limit max-results))))))
......
......@@ -15,7 +15,7 @@
;;; ------------------------------------------------- Type Mappings --------------------------------------------------
(def ^:const visibility-types
(def visibility-types
"Possible values for `Field.visibility_type`."
#{:normal ; Default setting. field has no visibility restrictions.
:details-only ; For long blob like columns such as JSON. field is not shown in some places on the frontend.
......@@ -23,6 +23,37 @@
:sensitive ; Strict removal of field from all places except data model listing. queries should error if someone attempts to access.
:retired}) ; For fields that no longer exist in the physical db. automatically set by Metabase. QP should error if encountered in a query.
(def has-field-values-options
"Possible options for `has_field_values`. This column is used to determine whether we keep FieldValues for a Field,
and which type of widget should be used to pick values of this Field when filtering by it in the Query Builder."
;; AUTOMATICALLY-SET VALUES, SET DURING SYNC
;;
;; `nil` -- means infer which widget to use based on logic in `with-has-field-values`; this will either return
;; `:search` or `:none`.
;;
;; This is the default state for Fields not marked `auto-list`. Admins cannot explicitly mark a Field as
;; `has_field_values` `nil`. This value is also subject to automatically change in the future if the values of a
;; Field change in such a way that it can now be marked `auto-list`. Fields marked `nil` do *not* have FieldValues
;; objects.
;;
#{;; The other automatically-set option. Automatically marked as a 'List' Field based on cardinality and other factors
;; during sync. Store a FieldValues object; use the List Widget. If this Field goes over the distinct value
;; threshold in a future sync, the Field will get switched back to `has_field_values = nil`.
:auto-list
;;
;; EXPLICITLY-SET VALUES, SET BY AN ADMIN
;;
;; Admin explicitly marked this as a 'Search' Field, which means we should *not* keep FieldValues, and should use
;; Search Widget.
:search
;; Admin explicitly marked this as a 'List' Field, which means we should keep FieldValues, and use the List
;; Widget. Unlike `auto-list`, if this Field grows past the normal cardinality constraints in the future, it will
;; remain `List` until explicitly marked otherwise.
:list
;; Admin explicitly marked that this Field shall always have a plain-text widget, neither allowing search, nor
;; showing a list of possible values. FieldValues not kept.
:none})
;;; ----------------------------------------------- Entity & Lifecycle -----------------------------------------------
......@@ -97,7 +128,7 @@
:special_type :keyword
:visibility_type :keyword
:description :clob
:has_field_values :clob
:has_field_values :keyword
:fingerprint :json})
:properties (constantly {:timestamped? true})
:pre-insert pre-insert
......@@ -149,7 +180,7 @@
{:batched-hydrate :normal_values}
[fields]
(let [id->field-values (select-field-id->instance (filter fv/field-should-have-field-values? fields)
[FieldValues :id :human_readable_values :values :field_id])]
[FieldValues :id :human_readable_values :values :field_id])]
(for [field fields]
(assoc field :values (get id->field-values (:id field) [])))))
......@@ -175,30 +206,30 @@
(or (isa? base-type :type/Text)
(isa? base-type :type/TextLike)))
(defn with-has-field-values
"Infer what the value of the `has_field_values` should be for Fields where it's not set. Admins can set this to one
of the values below, but if it's `nil` in the DB we'll infer it automatically.
(defn- infer-has-field-values
"Determine the value of `has_field_values` we should return for a `Field` As of 0.29.1 this doesn't require any DB
calls! :D"
[{has-field-values :has_field_values, :as field}]
(or
;; if `has_field_values` is set in the DB, use that value; but if it's `auto-list`, return the value as `list` to
;; avoid confusing FE code, which can remain blissfully unaware that `auto-list` is a thing
(when has-field-values
(if (= (keyword has-field-values) :auto-list)
:list
has-field-values))
;; otherwise if it does not have value set in DB we will infer it
(if (is-searchable? field)
:search
:none)))
* `list` = has an associated FieldValues object
* `search` = does not have FieldValues
* `none` = admin has explicitly disabled search behavior for this Field"
(defn with-has-field-values
"Infer what the value of the `has_field_values` should be for Fields where it's not set. See documentation for
`has-field-values-options` above for a more detailed explanation of what these values mean."
{:batched-hydrate :has_field_values}
[fields]
(let [fields-without-has-field-values-ids (set (for [field fields
:when (nil? (:has_field_values field))]
(:id field)))
fields-with-fieldvalues-ids (when (seq fields-without-has-field-values-ids)
(db/select-field :field_id FieldValues
:field_id [:in fields-without-has-field-values-ids]))]
(for [field fields]
(when field
(assoc field
:has_field_values (or
(:has_field_values field)
(cond
(contains? fields-with-fieldvalues-ids (u/get-id field)) :list
(is-searchable? field) :search
:else :none)))))))
(for [field fields]
(when field
(assoc field :has_field_values (infer-has-field-values field)))))
(defn readable-fields-only
"Efficiently checks if each field is readable and returns only readable fields"
......
(ns metabase.models.field-values
(:require [clojure.tools.logging :as log]
[metabase.util :as u]
[metabase.util.schema :as su]
[puppetlabs.i18n.core :refer [trs]]
[schema.core :as s]
[toucan
[db :as db]
[models :as models]]))
......@@ -11,9 +14,9 @@
frontend behavior such as widget choices."
(int 30))
(def ^:const ^Integer list-cardinality-threshold
"Fields with less than this many distincy values should be given a `has_field_values` value of `list`, which means the
Field should have FieldValues."
(def ^:const ^Integer auto-list-cardinality-threshold
"Fields with less than this many distincy values should be given a `has_field_values` value of `list`, which means
the Field should have FieldValues."
(int 100))
(def ^:private ^:const ^Integer entry-max-length
......@@ -22,7 +25,7 @@
(def ^:private ^:const ^Integer total-max-length
"Maximum total length for a `FieldValues` entry (combined length of all values for the field)."
(int (* list-cardinality-threshold entry-max-length)))
(int (* auto-list-cardinality-threshold entry-max-length)))
;; ## Entity + DB Multimethods
......@@ -39,16 +42,18 @@
;; ## `FieldValues` Helper Functions
(defn field-should-have-field-values?
"Should this `Field` be backed by a corresponding `FieldValues` object?"
(s/defn field-should-have-field-values? :- s/Bool
"Should this `field` be backed by a corresponding `FieldValues` object?"
{:arglists '([field])}
[{base-type :base_type, visibility-type :visibility_type, has-field-values :has_field_values, :as field}]
{:pre [visibility-type
(contains? field :base_type)
(contains? field :has_field_values)]}
(and (not (contains? #{:retired :sensitive :hidden :details-only} (keyword visibility-type)))
(not (isa? (keyword base-type) :type/DateTime))
(= has-field-values "list")))
[{base-type :base_type, visibility-type :visibility_type, has-field-values :has_field_values, :as field}
:- {:visibility_type su/KeywordOrString
:base_type (s/maybe su/KeywordOrString)
:has_field_values (s/maybe su/KeywordOrString)
s/Keyword s/Any}]
(boolean
(and (not (contains? #{:retired :sensitive :hidden :details-only} (keyword visibility-type)))
(not (isa? (keyword base-type) :type/DateTime))
(#{:list :auto-list} (keyword has-field-values)))))
(defn- values-less-than-total-max-length?
......@@ -63,32 +68,19 @@
"FieldValues are allowed for this Field."
"FieldValues are NOT allowed for this Field.")))))
(defn- cardinality-less-than-threshold?
"`true` if the number of DISTINCT-VALUES is less that `list-cardinality-threshold`.
Does some logging as well."
[distinct-values]
(let [num-values (count distinct-values)]
(u/prog1 (<= num-values list-cardinality-threshold)
(log/debug (if <>
(format "Field has %d distinct values (max %d). FieldValues are allowed for this Field."
num-values list-cardinality-threshold)
(format "Field has over %d values. FieldValues are NOT allowed for this Field."
list-cardinality-threshold))))))
(defn- distinct-values
"Fetch a sequence of distinct values for FIELD that are below the `total-max-length` threshold. If the values are past
the threshold, this returns `nil`."
"Fetch a sequence of distinct values for `field` that are below the `total-max-length` threshold. If the values are
past the threshold, this returns `nil`."
[field]
(require 'metabase.db.metadata-queries)
(let [values ((resolve 'metabase.db.metadata-queries/field-distinct-values) field)]
(when (cardinality-less-than-threshold? values)
(when (values-less-than-total-max-length? values)
values))))
(when (values-less-than-total-max-length? values)
values)))
(defn- fixup-human-readable-values
"Field values and human readable values are lists that are zipped together. If the field values have changes, the
human readable values will need to change too. This function reconstructs the human_readable_values to reflect
human readable values will need to change too. This function reconstructs the `human_readable_values` to reflect
`NEW-VALUES`. If a new field value is found, a string version of that is used"
[{old-values :values, old-hrv :human_readable_values} new-values]
(when (seq old-hrv)
......@@ -96,24 +88,41 @@
(map #(get orig-remappings % (str %)) new-values))))
(defn create-or-update-field-values!
"Create or update the FieldValues object for FIELD. If the FieldValues object already exists, then update values for
"Create or update the FieldValues object for `field`. If the FieldValues object already exists, then update values for
it; otherwise create a new FieldValues object with the newly fetched values."
[field & [human-readable-values]]
(let [field-values (FieldValues :field_id (u/get-id field))
values (distinct-values field)
field-name (or (:name field) (:id field))]
(cond
;; If this Field is marked `auto-list`, and the number of values in now over the list threshold, we need to
;; unmark it as `auto-list`. Switch it to `has_field_values` = `nil` and delete the FieldValues; this will
;; result in it getting a Search Widget in the UI when `has_field_values` is automatically inferred by the
;; `metabase.models.field/infer-has-field-values` hydration function (see that namespace for more detailed
;; discussion)
;;
;; It would be nicer if we could do this in analysis where it gets marked `:auto-list` in the first place, but
;; Fingerprints don't get updated regularly enough that we could detect the sudden increase in cardinality in a
;; way that could make this work. Thus, we are stuck doing it here :(
(and (> (count values) auto-list-cardinality-threshold)
(= :auto-list (keyword (:has_field_values field))))
(do
(log/info (trs "Field {0} was previously automatically set to show a list widget, but now has {1} values."
field-name (count values))
(trs "Switching Field to use a search widget instead."))
(db/update! 'Field (u/get-id field) :has_field_values nil)
(db/delete! FieldValues :field_id (u/get-id field)))
;; if the FieldValues object already exists then update values in it
(and field-values values)
(do
(log/debug (format "Storing updated FieldValues for Field %s..." field-name))
(log/debug (trs "Storing updated FieldValues for Field {0}..." field-name))
(db/update-non-nil-keys! FieldValues (u/get-id field-values)
:values values
:human_readable_values (fixup-human-readable-values field-values values)))
;; if FieldValues object doesn't exist create one
values
(do
(log/debug (format "Storing FieldValues for Field %s..." field-name))
(log/debug (trs "Storing FieldValues for Field {0}..." field-name))
(db/insert! FieldValues
:field_id (u/get-id field)
:values values
......
......@@ -7,7 +7,9 @@
[encryption :as encryption]]
[schema.core :as s]
[taoensso.nippy :as nippy]
[toucan.models :as models])
[toucan
[models :as models]
[util :as toucan-util]])
(:import java.sql.Blob))
;;; +----------------------------------------------------------------------------------------------------------------+
......@@ -78,6 +80,13 @@
:in validate-cron-string
:out identity)
;; Toucan ships with a Keyword type, but on columns that are marked 'TEXT' it doesn't work properly since the values
;; might need to get de-CLOB-bered first. So replace the default Toucan `:keyword` implementation with one that
;; handles those cases.
(models/add-type! :keyword
:in toucan-util/keyword->qualified-name
:out (comp keyword u/jdbc-clob->str))
;;; properties
......
......@@ -69,9 +69,13 @@
cases where more than one name Field exists for a Table, this just adds the first one it finds."
[fields]
(when-let [table-ids (seq (map :table_id fields))]
(u/key-by :table_id (db/select Field:params-columns-only
:table_id [:in table-ids]
:special_type (mdb/isa :type/Name)))))
(u/key-by :table_id (-> (db/select Field:params-columns-only
:table_id [:in table-ids]
:special_type (mdb/isa :type/Name))
;; run `metabase.models.field/infer-has-field-values` on these Fields so their values of
;; `has_field_values` will be consistent with what the FE expects. (e.g. we'll return
;; `list` instead of `auto-list`.)
(hydrate :has_field_values)))))
(defn add-name-field
"For all `fields` that are `:type/PK` Fields, look for a `:type/Name` Field belonging to the same Table. For each
......
......@@ -11,7 +11,9 @@
determined by the cardinality of the Field, like Category status. Thus it is entirely possibly for a Field to be
both a Category and a `list` Field."
(:require [clojure.tools.logging :as log]
[metabase.models.field-values :as field-values]
[metabase.models
[field :as field]
[field-values :as field-values]]
[metabase.sync
[interface :as i]
[util :as sync-util]]
......@@ -28,7 +30,8 @@
(isa? special-type :type/PK)
(isa? special-type :type/FK)))
(defn- field-should-be-category? [distinct-count field]
(s/defn ^:private field-should-be-category? :- (s/maybe s/Bool)
[distinct-count :- s/Int, field :- su/Map]
;; only mark a Field as a Category if it doesn't already have a special type
(when-not (:special_type field)
(when (<= distinct-count field-values/category-cardinality-threshold)
......@@ -38,19 +41,20 @@
field-values/category-cardinality-threshold))
true)))
(defn- field-should-be-list? [distinct-count field]
{:pre [(contains? field :has_field_values)]}
(s/defn ^:private field-should-be-auto-list? :- (s/maybe s/Bool)
"Based on `distinct-count`, should we mark this `field` as `has_field_values` = `auto-list`?"
[distinct-count :- s/Int, field :- {:has_field_values (s/maybe (apply s/enum field/has-field-values-options))
s/Keyword s/Any}]
;; only update has_field_values if it hasn't been set yet. If it's already been set then it was probably done so
;; manually by an admin, and we don't want to stomp over their choices.
(when (nil? (:has_field_values field))
(when (<= distinct-count field-values/list-cardinality-threshold)
(when (<= distinct-count field-values/auto-list-cardinality-threshold)
(log/debug (format "%s has %d distinct values. Since that is less than %d, it should have cached FieldValues."
(sync-util/name-for-logging field)
distinct-count
field-values/list-cardinality-threshold))
field-values/auto-list-cardinality-threshold))
true)))
(s/defn infer-is-category-or-list :- (s/maybe i/FieldInstance)
"Classifier that attempts to determine whether FIELD ought to be marked as a Category based on its distinct count."
[field :- i/FieldInstance, fingerprint :- (s/maybe i/Fingerprint)]
......@@ -58,5 +62,5 @@
(when-not (cannot-be-category-or-list? (:base_type field) (:special_type field))
(when-let [distinct-count (get-in fingerprint [:global :distinct-count])]
(cond-> field
(field-should-be-category? distinct-count field) (assoc :special_type :type/Category)
(field-should-be-list? distinct-count field) (assoc :has_field_values "list"))))))
(field-should-be-category? distinct-count field) (assoc :special_type :type/Category)
(field-should-be-auto-list? distinct-count field) (assoc :has_field_values :auto-list))))))
......@@ -7,7 +7,7 @@
"Generate a fingerprint of global information for Fields of all types."
[values :- i/FieldSample]
;; TODO - this logic isn't as nice as the old logic that actually called the DB
;; We used to do (queries/field-distinct-count field field-values/list-cardinality-threshold)
;; We used to do (queries/field-distinct-count field field-values/auto-list-cardinality-threshold)
;; Consider whether we are so married to the idea of only generating fingerprints from samples that we
;; are ok with inaccurate counts like the one we'll surely be getting here
{:distinct-count (count (distinct values))})
......@@ -16,19 +16,19 @@
;;; ---------------------------------------- field-should-have-field-values? -----------------------------------------
(expect (field-should-have-field-values? {:has_field_values "list"
(expect (field-should-have-field-values? {:has_field_values :list
:visibility_type :normal
:base_type :type/Text}))
(expect false (field-should-have-field-values? {:has_field_values "list"
(expect false (field-should-have-field-values? {:has_field_values :list
:visibility_type :sensitive
:base_type :type/Text}))
(expect false (field-should-have-field-values? {:has_field_values "list"
(expect false (field-should-have-field-values? {:has_field_values :list
:visibility_type :hidden
:base_type :type/Text}))
(expect false (field-should-have-field-values? {:has_field_values "list"
(expect false (field-should-have-field-values? {:has_field_values :list
:visibility_type :details-only
:base_type :type/Text}))
......@@ -36,11 +36,11 @@
:visibility_type :normal
:base_type :type/Text}))
(expect (field-should-have-field-values? {:has_field_values "list"
(expect (field-should-have-field-values? {:has_field_values :list
:visibility_type :normal
:base_type :type/Text}))
(expect (field-should-have-field-values? {:has_field_values "list"
(expect (field-should-have-field-values? {:has_field_values :list
:special_type :type/Category
:visibility_type :normal
:base_type "type/Boolean"}))
......@@ -48,32 +48,32 @@
;; retired/sensitive/hidden/details-only fields should always be excluded
(expect false (field-should-have-field-values? {:base_type :type/Boolean
:has_field_values "list"
:has_field_values :list
:visibility_type :retired}))
(expect false (field-should-have-field-values? {:base_type :type/Boolean
:has_field_values "list"
:has_field_values :list
:visibility_type :sensitive}))
(expect false (field-should-have-field-values? {:base_type :type/Boolean
:has_field_values "list"
:has_field_values :list
:visibility_type :hidden}))
(expect false (field-should-have-field-values? {:base_type :type/Boolean
:has_field_values "list"
:has_field_values :list
:visibility_type :details-only}))
;; date/time based fields should always be excluded
(expect false (field-should-have-field-values? {:base_type :type/Date
:has_field_values "list"
:has_field_values :list
:visibility_type :normal}))
(expect false (field-should-have-field-values? {:base_type :type/DateTime
:has_field_values "list"
:has_field_values :list
:visibility_type :normal}))
(expect false (field-should-have-field-values? {:base_type :type/Time
:has_field_values "list"
:has_field_values :list
:visibility_type :normal}))
......
......@@ -4,8 +4,7 @@
[metabase.api.public-test :as public-test]
[metabase.models
[card :refer [Card]]
[field :refer [Field]]
[params :as params]]
[field :refer [Field]]]
[metabase.test.data :as data]
[toucan
[db :as db]
......@@ -24,7 +23,7 @@
:display_name "Name"
:base_type :type/Text
:special_type :type/Name
:has_field_values "list"}}
:has_field_values :list}}
(-> (db/select-one [Field :name :table_id :special_type], :id (data/id :venues :id))
(hydrate :name_field)))
......@@ -70,7 +69,7 @@
:display_name "Name"
:base_type :type/Text
:special_type :type/Name
:has_field_values "list"}
:has_field_values :list}
:dimensions []}}
(tt/with-temp Card [card {:dataset_query
{:database (data/id)
......@@ -96,7 +95,7 @@
:display_name "Name"
:base_type :type/Text
:special_type :type/Name
:has_field_values "list"}
:has_field_values :list}
:dimensions []}}
(public-test/with-sharing-enabled-and-temp-dashcard-referencing :venues :id [dashboard]
(-> (hydrate dashboard :param_fields)
......
......@@ -3,25 +3,34 @@
(:require [expectations :refer :all]
[metabase.sync.analyze.classifiers.category :as category-classifier]))
(defn- field-with-distinct-count [distinct-count]
{:database_type "VARCHAR"
:special_type :type/Name
:name "NAME"
:fingerprint_version 1
:has_field_values nil
:active true
:visibility_type :normal
:preview_display true
:display_name "Name"
:fingerprint {:global {:distinct-count distinct-count}
:type
{:type/Text
{:percent-json 0.0
:percent-url 0.0
:percent-email 0.0
:average-length 13.516}}}
:base_type :type/Text})
;; make sure the logic for deciding whether a Field should be a list works as expected
(expect
nil
(#'category-classifier/field-should-be-list?
(#'category-classifier/field-should-be-auto-list?
2500
{:database_type "VARCHAR"
:special_type :type/Name
:name "NAME"
:fingerprint_version 1
:has_field_values nil
:active true
:visibility_type :normal
:preview_display true
:display_name "Name"
:fingerprint {:global {:distinct-count 2500}
:type
{:type/Text
{:percent-json 0.0
:percent-url 0.0
:percent-email 0.0
:average-length 13.516}}}
:base_type :type/Text}))
(field-with-distinct-count 2500)))
(expect
true
(#'category-classifier/field-should-be-auto-list?
99
(field-with-distinct-count 99)))
(ns metabase.sync.field-values-test
"Tests around the way Metabase syncs FieldValues, and sets the values of `field.has_field_values`."
(:require [clojure.java.jdbc :as jdbc]
[clojure.string :as str]
[expectations :refer :all]
[metabase
[db :as mdb]
[driver :as driver]
[sync :as sync :refer :all]]
[metabase.driver.generic-sql :as sql]
[metabase.models
[database :refer [Database]]
[field :refer [Field]]
[field-values :as field-values :refer [FieldValues]]]
[metabase.test
[data :as data]
[util :as tu]]
[toucan.db :as db]
[toucan.util.test :as tt]))
;;; --------------------------------------------------- Helper Fns ---------------------------------------------------
(defn- insert-range-sql
"Generate SQL to insert a row for each number in `rang`."
[rang]
(str "INSERT INTO blueberries_consumed (num) VALUES "
(str/join ", " (for [n rang]
(str "(" n ")")))))
(def ^:private ^:dynamic *conn* nil)
(defn- do-with-blueberries-db
"An empty canvas upon which you may paint your dreams.
Creates a database with a single table, `blueberries_consumed`, with one column, `num`; binds this DB to
`data/*get-db*` so you can use `data/db` and `data/id` to access it."
{:style/indent 0}
[f]
(let [details {:db (str "mem:" (tu/random-name) ";DB_CLOSE_DELAY=10")}]
(binding [mdb/*allow-potentailly-unsafe-connections* true]
(tt/with-temp Database [db {:engine :h2, :details details}]
(jdbc/with-db-connection [conn (sql/connection-details->spec (driver/engine->driver :h2) details)]
(jdbc/execute! conn ["CREATE TABLE blueberries_consumed (num INTEGER NOT NULL);"])
(binding [data/*get-db* (constantly db)]
(binding [*conn* conn]
(f))))))))
(defmacro ^:private with-blueberries-db {:style/indent 0} [& body]
`(do-with-blueberries-db (fn [] ~@body)))
(defn- insert-blueberries-and-sync!
"With the temp blueberries db from above, insert a `range` of values and re-sync the DB.
(insert-blueberries-and-sync! [0 1 2 3]) ; insert 4 rows"
[rang]
(jdbc/execute! *conn* [(insert-range-sql rang)])
(sync-database! (data/db)))
;;; ---------------------------------------------- The Tests Themselves ----------------------------------------------
;; A Field with 50 values should get marked as `auto-list` on initial sync, because it should be 'list', but was
;; marked automatically, as opposed to explicitly (`list`)
(expect
:auto-list
(with-blueberries-db
;; insert 50 rows & sync
(insert-blueberries-and-sync! (range 50))
;; has_field_values should be auto-list
(db/select-one-field :has_field_values Field :id (data/id :blueberries_consumed :num))))
;; ... and it should also have some FieldValues
(expect
#metabase.models.field_values.FieldValuesInstance
{:values [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49]
:human_readable_values {}}
(with-blueberries-db
(insert-blueberries-and-sync! (range 50))
(db/select-one [FieldValues :values :human_readable_values], :field_id (data/id :blueberries_consumed :num))))
;; ok, but if the number grows past the threshold & we sync again it should get unmarked as auto-list and set back to
;; `nil` (#3215)
(expect
nil
(with-blueberries-db
;; insert 50 bloobs & sync. has_field_values should be auto-list
(insert-blueberries-and-sync! (range 50))
(assert (= (db/select-one-field :has_field_values Field :id (data/id :blueberries_consumed :num))
:auto-list))
;; now insert enough bloobs to put us over the limit and re-sync.
(insert-blueberries-and-sync! (range 50 (+ 100 field-values/auto-list-cardinality-threshold)))
;; has_field_values should have been set to nil.
(db/select-one-field :has_field_values Field :id (data/id :blueberries_consumed :num))))
;; ...its FieldValues should also get deleted.
(expect
nil
(with-blueberries-db
;; do the same steps as the test above...
(insert-blueberries-and-sync! (range 50))
(insert-blueberries-and-sync! (range 50 (+ 100 field-values/auto-list-cardinality-threshold)))
;; ///and FieldValues should also have been deleted.
(db/select-one [FieldValues :values :human_readable_values], :field_id (data/id :blueberries_consumed :num))))
;; If we had explicitly marked the Field as `list` (instead of `auto-list`), adding extra values shouldn't change
;; anything!
(expect
:list
(with-blueberries-db
;; insert 50 bloobs & sync
(insert-blueberries-and-sync! (range 50))
;; change has_field_values to list
(db/update! Field (data/id :blueberries_consumed :num) :has_field_values "list")
;; insert more bloobs & re-sync
(insert-blueberries-and-sync! (range 50 (+ 100 field-values/auto-list-cardinality-threshold)))
;; has_field_values shouldn't change
(db/select-one-field :has_field_values Field :id (data/id :blueberries_consumed :num))))
;; it should still have FieldValues, and have new ones for the new Values. It should have 200 values even though this
;; is past the normal limit of 100 values!
(expect
#metabase.models.field_values.FieldValuesInstance
{:values [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
189 190 191 192 193 194 195 196 197 198 199]
:human_readable_values {}}
(with-blueberries-db
;; follow the same steps as the test above...
(insert-blueberries-and-sync! (range 50))
(db/update! Field (data/id :blueberries_consumed :num) :has_field_values "list")
(insert-blueberries-and-sync! (range 50 (+ 100 field-values/auto-list-cardinality-threshold)))
;; ... and FieldValues should still be there, but this time updated to include the new values!
(db/select-one [FieldValues :values :human_readable_values], :field_id (data/id :blueberries_consumed :num))))
(ns metabase.sync-database-test
"Tests for sync behavior that use a imaginary `SyncTestDriver`. These are kept around mainly because they've already
been written. For newer sync tests see `metabase.sync.*` test namespaces."
(:require [clojure.java.jdbc :as jdbc]
[clojure.string :as str]
[expectations :refer :all]
(:require [expectations :refer :all]
[metabase
[db :as mdb]
[driver :as driver]
[sync :refer :all]
[util :as u]]
[metabase.driver.generic-sql :as sql]
[metabase.models
[database :refer [Database]]
[field :refer [Field]]
......@@ -22,7 +18,6 @@
[toucan.db :as db]
[toucan.util.test :as tt]))
(def ^:private ^:const sync-test-tables
{"movie" {:name "movie"
:schema "default"
......@@ -357,38 +352,8 @@
(do (sync-table! (Table (data/id :venues)))
(get-field-values))]))
;; Make sure that if a Field's cardinality passes `list-cardinality-threshold` (currently 100) the corresponding
;; FieldValues entry will be deleted (#3215)
(defn- insert-range-sql
"Generate SQL to insert a row for each number in `rang`."
[rang]
(str "INSERT INTO blueberries_consumed (num) VALUES "
(str/join ", " (for [n rang]
(str "(" n ")")))))
(expect
false
(let [details {:db (str "mem:" (tu/random-name) ";DB_CLOSE_DELAY=10")}]
(binding [mdb/*allow-potentailly-unsafe-connections* true]
(tt/with-temp Database [db {:engine :h2, :details details}]
(jdbc/with-db-connection [conn (sql/connection-details->spec (driver/engine->driver :h2) details)]
(let [exec! #(doseq [statement %]
(jdbc/execute! conn [statement]))]
;; create the `blueberries_consumed` table and insert 50 values
(exec! ["CREATE TABLE blueberries_consumed (num INTEGER NOT NULL);"
(insert-range-sql (range 50))])
(sync-database! db)
(let [table-id (db/select-one-id Table :db_id (u/get-id db))
field-id (db/select-one-id Field :table_id table-id)]
;; field values should exist...
(assert (= (count (db/select-one-field :values FieldValues :field_id field-id))
50))
;; ok, now insert enough rows to push the field past the `list-cardinality-threshold` and sync again,
;; there should be no more field values
(exec! [(insert-range-sql (range 50 (+ 100 field-values/list-cardinality-threshold)))])
(sync-database! db)
(db/exists? FieldValues :field_id field-id))))))))
;; TODO - hey, what is this testing? If you wrote this test, please explain what's going on here
(defn- narrow-to-min-max [row]
(-> row
(get-in [:type :type/Number])
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment