diff --git a/src/metabase/api/common.clj b/src/metabase/api/common.clj index 0371d49afc174d1eb10fa4c345d2e29a21782ca3..9162a54c4a36d647ea92e8ff2ce27f1ffa439914 100644 --- a/src/metabase/api/common.clj +++ b/src/metabase/api/common.clj @@ -14,15 +14,11 @@ [metabase.api.common.internal :refer :all] [metabase.models.interface :as mi] [puppetlabs.i18n.core :refer [trs tru]] - [ring.util - [io :as rui] - [response :as rr]] [ring.core.protocols :as protocols] [ring.util.response :as response] [schema.core :as s] [toucan.db :as db]) - (:import [java.io BufferedWriter OutputStream OutputStreamWriter] - [java.nio.charset Charset StandardCharsets])) + (:import java.io.OutputStream)) (declare check-403 check-404) diff --git a/src/metabase/api/field.clj b/src/metabase/api/field.clj index 8a5d17741cc7fe0a31bc50b16764bb596c29442b..a1ea3eec15b4ba298387c1acaa1c1e597624608d 100644 --- a/src/metabase/api/field.clj +++ b/src/metabase/api/field.clj @@ -75,7 +75,7 @@ points_of_interest (s/maybe su/NonBlankString) special_type (s/maybe FieldType) visibility_type (s/maybe FieldVisibilityType) - has_field_values (s/maybe (s/enum "search" "list" "none"))} + has_field_values (s/maybe (apply s/enum (map name field/has-field-values-options)))} (let [field (hydrate (api/write-check Field id) :dimensions) new-special-type (keyword (get body :special_type (:special_type field))) removed-fk? (removed-fk-special-type? (:special_type field) new-special-type) diff --git a/src/metabase/db/metadata_queries.clj b/src/metabase/db/metadata_queries.clj index caae266a05e7b0e6b30295ec7c1d21dd1e25143a..7dc23248e6e32ab127ba3eec332ef2284a06f591 100644 --- a/src/metabase/db/metadata_queries.clj +++ b/src/metabase/db/metadata_queries.clj @@ -4,11 +4,11 @@ [metabase [query-processor :as qp] [util :as u]] - [metabase.models - [field-values :as field-values] - [table :refer [Table]]] + [metabase.models.table :refer [Table]] [metabase.query-processor.interface :as qpi] [metabase.query-processor.middleware.expand :as ql] + [metabase.util.schema :as su] + [schema.core :as s] [toucan.db :as db])) (defn- qp-query [db-id query] @@ -42,15 +42,32 @@ (u/pprint-to-str results)) (throw e))))) -(defn field-distinct-values +(def ^:private ^Integer absolute-max-distinct-values-limit + "The absolute maximum number of results to return for a `field-distinct-values` query. Normally Fields with 100 or + less values (at the time of this writing) get marked as `auto-list` Fields, meaning we save all their distinct + values in a FieldValues object, which powers a list widget in the FE when using the Field for filtering in the QB. + Admins can however manually mark any Field as `list`, which is effectively ordering Metabase to keep FieldValues for + the Field regardless of its cardinality. + + Of course, if a User does something crazy, like mark a million-arity Field as List, we don't want Metabase to + explode trying to make their dreams a reality; we need some sort of hard limit to prevent catastrophes. So this + limit is effectively a safety to prevent Users from nuking their own instance for Fields that really shouldn't be + List Fields at all. For these very-high-cardinality Fields, we're effectively capping the number of + FieldValues that get could saved. + + This number should be a balance of: + + * Not being too low, which would definitly result in GitHub issues along the lines of 'My 500-distinct-value Field + that I marked as List is not showing all values in the List Widget' + * Not being too high, which would result in Metabase running out of memory dealing with too many values" + (int 5000)) + +(s/defn field-distinct-values "Return the distinct values of FIELD. This is used to create a `FieldValues` object for `:type/Category` Fields." ([field] - ;; fetch up to one more value than allowed for FieldValues. e.g. if the max is 100 distinct values fetch up to 101. - ;; That way we will know if we're over the limit - (field-distinct-values field (inc field-values/list-cardinality-threshold))) - ([field max-results] - {:pre [(integer? max-results)]} + (field-distinct-values field absolute-max-distinct-values-limit)) + ([field, max-results :- su/IntGreaterThanZero] (mapv first (field-query field (-> {} (ql/breakout (ql/field-id (u/get-id field))) (ql/limit max-results)))))) diff --git a/src/metabase/models/field.clj b/src/metabase/models/field.clj index 51cdcbb016cf0985f94ed7f11fbe87ea4509f70a..09ae4b590779315749401fab31743bd0b81500d0 100644 --- a/src/metabase/models/field.clj +++ b/src/metabase/models/field.clj @@ -15,7 +15,7 @@ ;;; ------------------------------------------------- Type Mappings -------------------------------------------------- -(def ^:const visibility-types +(def visibility-types "Possible values for `Field.visibility_type`." #{:normal ; Default setting. field has no visibility restrictions. :details-only ; For long blob like columns such as JSON. field is not shown in some places on the frontend. @@ -23,6 +23,37 @@ :sensitive ; Strict removal of field from all places except data model listing. queries should error if someone attempts to access. :retired}) ; For fields that no longer exist in the physical db. automatically set by Metabase. QP should error if encountered in a query. +(def has-field-values-options + "Possible options for `has_field_values`. This column is used to determine whether we keep FieldValues for a Field, + and which type of widget should be used to pick values of this Field when filtering by it in the Query Builder." + ;; AUTOMATICALLY-SET VALUES, SET DURING SYNC + ;; + ;; `nil` -- means infer which widget to use based on logic in `with-has-field-values`; this will either return + ;; `:search` or `:none`. + ;; + ;; This is the default state for Fields not marked `auto-list`. Admins cannot explicitly mark a Field as + ;; `has_field_values` `nil`. This value is also subject to automatically change in the future if the values of a + ;; Field change in such a way that it can now be marked `auto-list`. Fields marked `nil` do *not* have FieldValues + ;; objects. + ;; + #{;; The other automatically-set option. Automatically marked as a 'List' Field based on cardinality and other factors + ;; during sync. Store a FieldValues object; use the List Widget. If this Field goes over the distinct value + ;; threshold in a future sync, the Field will get switched back to `has_field_values = nil`. + :auto-list + ;; + ;; EXPLICITLY-SET VALUES, SET BY AN ADMIN + ;; + ;; Admin explicitly marked this as a 'Search' Field, which means we should *not* keep FieldValues, and should use + ;; Search Widget. + :search + ;; Admin explicitly marked this as a 'List' Field, which means we should keep FieldValues, and use the List + ;; Widget. Unlike `auto-list`, if this Field grows past the normal cardinality constraints in the future, it will + ;; remain `List` until explicitly marked otherwise. + :list + ;; Admin explicitly marked that this Field shall always have a plain-text widget, neither allowing search, nor + ;; showing a list of possible values. FieldValues not kept. + :none}) + ;;; ----------------------------------------------- Entity & Lifecycle ----------------------------------------------- @@ -97,7 +128,7 @@ :special_type :keyword :visibility_type :keyword :description :clob - :has_field_values :clob + :has_field_values :keyword :fingerprint :json}) :properties (constantly {:timestamped? true}) :pre-insert pre-insert @@ -149,7 +180,7 @@ {:batched-hydrate :normal_values} [fields] (let [id->field-values (select-field-id->instance (filter fv/field-should-have-field-values? fields) - [FieldValues :id :human_readable_values :values :field_id])] + [FieldValues :id :human_readable_values :values :field_id])] (for [field fields] (assoc field :values (get id->field-values (:id field) []))))) @@ -175,30 +206,30 @@ (or (isa? base-type :type/Text) (isa? base-type :type/TextLike))) -(defn with-has-field-values - "Infer what the value of the `has_field_values` should be for Fields where it's not set. Admins can set this to one - of the values below, but if it's `nil` in the DB we'll infer it automatically. +(defn- infer-has-field-values + "Determine the value of `has_field_values` we should return for a `Field` As of 0.29.1 this doesn't require any DB + calls! :D" + [{has-field-values :has_field_values, :as field}] + (or + ;; if `has_field_values` is set in the DB, use that value; but if it's `auto-list`, return the value as `list` to + ;; avoid confusing FE code, which can remain blissfully unaware that `auto-list` is a thing + (when has-field-values + (if (= (keyword has-field-values) :auto-list) + :list + has-field-values)) + ;; otherwise if it does not have value set in DB we will infer it + (if (is-searchable? field) + :search + :none))) - * `list` = has an associated FieldValues object - * `search` = does not have FieldValues - * `none` = admin has explicitly disabled search behavior for this Field" +(defn with-has-field-values + "Infer what the value of the `has_field_values` should be for Fields where it's not set. See documentation for + `has-field-values-options` above for a more detailed explanation of what these values mean." {:batched-hydrate :has_field_values} [fields] - (let [fields-without-has-field-values-ids (set (for [field fields - :when (nil? (:has_field_values field))] - (:id field))) - fields-with-fieldvalues-ids (when (seq fields-without-has-field-values-ids) - (db/select-field :field_id FieldValues - :field_id [:in fields-without-has-field-values-ids]))] - (for [field fields] - (when field - (assoc field - :has_field_values (or - (:has_field_values field) - (cond - (contains? fields-with-fieldvalues-ids (u/get-id field)) :list - (is-searchable? field) :search - :else :none))))))) + (for [field fields] + (when field + (assoc field :has_field_values (infer-has-field-values field))))) (defn readable-fields-only "Efficiently checks if each field is readable and returns only readable fields" diff --git a/src/metabase/models/field_values.clj b/src/metabase/models/field_values.clj index 703435d1293ea62eaf164dfe65034aed42bb5ce7..3f425ee713bbee014f5a16859627aba164eec1f7 100644 --- a/src/metabase/models/field_values.clj +++ b/src/metabase/models/field_values.clj @@ -1,6 +1,9 @@ (ns metabase.models.field-values (:require [clojure.tools.logging :as log] [metabase.util :as u] + [metabase.util.schema :as su] + [puppetlabs.i18n.core :refer [trs]] + [schema.core :as s] [toucan [db :as db] [models :as models]])) @@ -11,9 +14,9 @@ frontend behavior such as widget choices." (int 30)) -(def ^:const ^Integer list-cardinality-threshold - "Fields with less than this many distincy values should be given a `has_field_values` value of `list`, which means the - Field should have FieldValues." +(def ^:const ^Integer auto-list-cardinality-threshold + "Fields with less than this many distincy values should be given a `has_field_values` value of `list`, which means + the Field should have FieldValues." (int 100)) (def ^:private ^:const ^Integer entry-max-length @@ -22,7 +25,7 @@ (def ^:private ^:const ^Integer total-max-length "Maximum total length for a `FieldValues` entry (combined length of all values for the field)." - (int (* list-cardinality-threshold entry-max-length))) + (int (* auto-list-cardinality-threshold entry-max-length))) ;; ## Entity + DB Multimethods @@ -39,16 +42,18 @@ ;; ## `FieldValues` Helper Functions -(defn field-should-have-field-values? - "Should this `Field` be backed by a corresponding `FieldValues` object?" +(s/defn field-should-have-field-values? :- s/Bool + "Should this `field` be backed by a corresponding `FieldValues` object?" {:arglists '([field])} - [{base-type :base_type, visibility-type :visibility_type, has-field-values :has_field_values, :as field}] - {:pre [visibility-type - (contains? field :base_type) - (contains? field :has_field_values)]} - (and (not (contains? #{:retired :sensitive :hidden :details-only} (keyword visibility-type))) - (not (isa? (keyword base-type) :type/DateTime)) - (= has-field-values "list"))) + [{base-type :base_type, visibility-type :visibility_type, has-field-values :has_field_values, :as field} + :- {:visibility_type su/KeywordOrString + :base_type (s/maybe su/KeywordOrString) + :has_field_values (s/maybe su/KeywordOrString) + s/Keyword s/Any}] + (boolean + (and (not (contains? #{:retired :sensitive :hidden :details-only} (keyword visibility-type))) + (not (isa? (keyword base-type) :type/DateTime)) + (#{:list :auto-list} (keyword has-field-values))))) (defn- values-less-than-total-max-length? @@ -63,32 +68,19 @@ "FieldValues are allowed for this Field." "FieldValues are NOT allowed for this Field."))))) -(defn- cardinality-less-than-threshold? - "`true` if the number of DISTINCT-VALUES is less that `list-cardinality-threshold`. - Does some logging as well." - [distinct-values] - (let [num-values (count distinct-values)] - (u/prog1 (<= num-values list-cardinality-threshold) - (log/debug (if <> - (format "Field has %d distinct values (max %d). FieldValues are allowed for this Field." - num-values list-cardinality-threshold) - (format "Field has over %d values. FieldValues are NOT allowed for this Field." - list-cardinality-threshold)))))) - (defn- distinct-values - "Fetch a sequence of distinct values for FIELD that are below the `total-max-length` threshold. If the values are past - the threshold, this returns `nil`." + "Fetch a sequence of distinct values for `field` that are below the `total-max-length` threshold. If the values are + past the threshold, this returns `nil`." [field] (require 'metabase.db.metadata-queries) (let [values ((resolve 'metabase.db.metadata-queries/field-distinct-values) field)] - (when (cardinality-less-than-threshold? values) - (when (values-less-than-total-max-length? values) - values)))) + (when (values-less-than-total-max-length? values) + values))) (defn- fixup-human-readable-values "Field values and human readable values are lists that are zipped together. If the field values have changes, the - human readable values will need to change too. This function reconstructs the human_readable_values to reflect + human readable values will need to change too. This function reconstructs the `human_readable_values` to reflect `NEW-VALUES`. If a new field value is found, a string version of that is used" [{old-values :values, old-hrv :human_readable_values} new-values] (when (seq old-hrv) @@ -96,24 +88,41 @@ (map #(get orig-remappings % (str %)) new-values)))) (defn create-or-update-field-values! - "Create or update the FieldValues object for FIELD. If the FieldValues object already exists, then update values for + "Create or update the FieldValues object for `field`. If the FieldValues object already exists, then update values for it; otherwise create a new FieldValues object with the newly fetched values." [field & [human-readable-values]] (let [field-values (FieldValues :field_id (u/get-id field)) values (distinct-values field) field-name (or (:name field) (:id field))] (cond + ;; If this Field is marked `auto-list`, and the number of values in now over the list threshold, we need to + ;; unmark it as `auto-list`. Switch it to `has_field_values` = `nil` and delete the FieldValues; this will + ;; result in it getting a Search Widget in the UI when `has_field_values` is automatically inferred by the + ;; `metabase.models.field/infer-has-field-values` hydration function (see that namespace for more detailed + ;; discussion) + ;; + ;; It would be nicer if we could do this in analysis where it gets marked `:auto-list` in the first place, but + ;; Fingerprints don't get updated regularly enough that we could detect the sudden increase in cardinality in a + ;; way that could make this work. Thus, we are stuck doing it here :( + (and (> (count values) auto-list-cardinality-threshold) + (= :auto-list (keyword (:has_field_values field)))) + (do + (log/info (trs "Field {0} was previously automatically set to show a list widget, but now has {1} values." + field-name (count values)) + (trs "Switching Field to use a search widget instead.")) + (db/update! 'Field (u/get-id field) :has_field_values nil) + (db/delete! FieldValues :field_id (u/get-id field))) ;; if the FieldValues object already exists then update values in it (and field-values values) (do - (log/debug (format "Storing updated FieldValues for Field %s..." field-name)) + (log/debug (trs "Storing updated FieldValues for Field {0}..." field-name)) (db/update-non-nil-keys! FieldValues (u/get-id field-values) :values values :human_readable_values (fixup-human-readable-values field-values values))) ;; if FieldValues object doesn't exist create one values (do - (log/debug (format "Storing FieldValues for Field %s..." field-name)) + (log/debug (trs "Storing FieldValues for Field {0}..." field-name)) (db/insert! FieldValues :field_id (u/get-id field) :values values diff --git a/src/metabase/models/interface.clj b/src/metabase/models/interface.clj index 3b3dc7eb8ae41e0b5d9eb8975e8b97017267f66f..55ac4a67666bee3283e3411ca2a9afaaff9b4ba0 100644 --- a/src/metabase/models/interface.clj +++ b/src/metabase/models/interface.clj @@ -7,7 +7,9 @@ [encryption :as encryption]] [schema.core :as s] [taoensso.nippy :as nippy] - [toucan.models :as models]) + [toucan + [models :as models] + [util :as toucan-util]]) (:import java.sql.Blob)) ;;; +----------------------------------------------------------------------------------------------------------------+ @@ -78,6 +80,13 @@ :in validate-cron-string :out identity) +;; Toucan ships with a Keyword type, but on columns that are marked 'TEXT' it doesn't work properly since the values +;; might need to get de-CLOB-bered first. So replace the default Toucan `:keyword` implementation with one that +;; handles those cases. +(models/add-type! :keyword + :in toucan-util/keyword->qualified-name + :out (comp keyword u/jdbc-clob->str)) + ;;; properties diff --git a/src/metabase/models/params.clj b/src/metabase/models/params.clj index 19390eb6e02d6e6ae0b5c2d70e1f039ca11e82df..ce540903d2c7be95ebe40ab8049abf9ac2ef05ed 100644 --- a/src/metabase/models/params.clj +++ b/src/metabase/models/params.clj @@ -69,9 +69,13 @@ cases where more than one name Field exists for a Table, this just adds the first one it finds." [fields] (when-let [table-ids (seq (map :table_id fields))] - (u/key-by :table_id (db/select Field:params-columns-only - :table_id [:in table-ids] - :special_type (mdb/isa :type/Name))))) + (u/key-by :table_id (-> (db/select Field:params-columns-only + :table_id [:in table-ids] + :special_type (mdb/isa :type/Name)) + ;; run `metabase.models.field/infer-has-field-values` on these Fields so their values of + ;; `has_field_values` will be consistent with what the FE expects. (e.g. we'll return + ;; `list` instead of `auto-list`.) + (hydrate :has_field_values))))) (defn add-name-field "For all `fields` that are `:type/PK` Fields, look for a `:type/Name` Field belonging to the same Table. For each diff --git a/src/metabase/sync/analyze/classifiers/category.clj b/src/metabase/sync/analyze/classifiers/category.clj index 705d6268cc8a8c37d306bb6af8a35b38ee14b1b5..7c7a49373808884c643bd73ea3899d7de7ee1a80 100644 --- a/src/metabase/sync/analyze/classifiers/category.clj +++ b/src/metabase/sync/analyze/classifiers/category.clj @@ -11,7 +11,9 @@ determined by the cardinality of the Field, like Category status. Thus it is entirely possibly for a Field to be both a Category and a `list` Field." (:require [clojure.tools.logging :as log] - [metabase.models.field-values :as field-values] + [metabase.models + [field :as field] + [field-values :as field-values]] [metabase.sync [interface :as i] [util :as sync-util]] @@ -28,7 +30,8 @@ (isa? special-type :type/PK) (isa? special-type :type/FK))) -(defn- field-should-be-category? [distinct-count field] +(s/defn ^:private field-should-be-category? :- (s/maybe s/Bool) + [distinct-count :- s/Int, field :- su/Map] ;; only mark a Field as a Category if it doesn't already have a special type (when-not (:special_type field) (when (<= distinct-count field-values/category-cardinality-threshold) @@ -38,19 +41,20 @@ field-values/category-cardinality-threshold)) true))) -(defn- field-should-be-list? [distinct-count field] - {:pre [(contains? field :has_field_values)]} +(s/defn ^:private field-should-be-auto-list? :- (s/maybe s/Bool) + "Based on `distinct-count`, should we mark this `field` as `has_field_values` = `auto-list`?" + [distinct-count :- s/Int, field :- {:has_field_values (s/maybe (apply s/enum field/has-field-values-options)) + s/Keyword s/Any}] ;; only update has_field_values if it hasn't been set yet. If it's already been set then it was probably done so ;; manually by an admin, and we don't want to stomp over their choices. (when (nil? (:has_field_values field)) - (when (<= distinct-count field-values/list-cardinality-threshold) + (when (<= distinct-count field-values/auto-list-cardinality-threshold) (log/debug (format "%s has %d distinct values. Since that is less than %d, it should have cached FieldValues." (sync-util/name-for-logging field) distinct-count - field-values/list-cardinality-threshold)) + field-values/auto-list-cardinality-threshold)) true))) - (s/defn infer-is-category-or-list :- (s/maybe i/FieldInstance) "Classifier that attempts to determine whether FIELD ought to be marked as a Category based on its distinct count." [field :- i/FieldInstance, fingerprint :- (s/maybe i/Fingerprint)] @@ -58,5 +62,5 @@ (when-not (cannot-be-category-or-list? (:base_type field) (:special_type field)) (when-let [distinct-count (get-in fingerprint [:global :distinct-count])] (cond-> field - (field-should-be-category? distinct-count field) (assoc :special_type :type/Category) - (field-should-be-list? distinct-count field) (assoc :has_field_values "list")))))) + (field-should-be-category? distinct-count field) (assoc :special_type :type/Category) + (field-should-be-auto-list? distinct-count field) (assoc :has_field_values :auto-list)))))) diff --git a/src/metabase/sync/analyze/fingerprint/global.clj b/src/metabase/sync/analyze/fingerprint/global.clj index 3fa55fd999ce3cf03b3f9346c86d9358553d2449..81b1252df06bfc7e80203e58002f429da09d94d3 100644 --- a/src/metabase/sync/analyze/fingerprint/global.clj +++ b/src/metabase/sync/analyze/fingerprint/global.clj @@ -7,7 +7,7 @@ "Generate a fingerprint of global information for Fields of all types." [values :- i/FieldSample] ;; TODO - this logic isn't as nice as the old logic that actually called the DB - ;; We used to do (queries/field-distinct-count field field-values/list-cardinality-threshold) + ;; We used to do (queries/field-distinct-count field field-values/auto-list-cardinality-threshold) ;; Consider whether we are so married to the idea of only generating fingerprints from samples that we ;; are ok with inaccurate counts like the one we'll surely be getting here {:distinct-count (count (distinct values))}) diff --git a/test/metabase/models/field_values_test.clj b/test/metabase/models/field_values_test.clj index fb1e94c0d67a97b88a2664ccbd79cc0974ef82a7..e00c0525bee4701b18649340dee82185f4dbd876 100644 --- a/test/metabase/models/field_values_test.clj +++ b/test/metabase/models/field_values_test.clj @@ -16,19 +16,19 @@ ;;; ---------------------------------------- field-should-have-field-values? ----------------------------------------- -(expect (field-should-have-field-values? {:has_field_values "list" +(expect (field-should-have-field-values? {:has_field_values :list :visibility_type :normal :base_type :type/Text})) -(expect false (field-should-have-field-values? {:has_field_values "list" +(expect false (field-should-have-field-values? {:has_field_values :list :visibility_type :sensitive :base_type :type/Text})) -(expect false (field-should-have-field-values? {:has_field_values "list" +(expect false (field-should-have-field-values? {:has_field_values :list :visibility_type :hidden :base_type :type/Text})) -(expect false (field-should-have-field-values? {:has_field_values "list" +(expect false (field-should-have-field-values? {:has_field_values :list :visibility_type :details-only :base_type :type/Text})) @@ -36,11 +36,11 @@ :visibility_type :normal :base_type :type/Text})) -(expect (field-should-have-field-values? {:has_field_values "list" +(expect (field-should-have-field-values? {:has_field_values :list :visibility_type :normal :base_type :type/Text})) -(expect (field-should-have-field-values? {:has_field_values "list" +(expect (field-should-have-field-values? {:has_field_values :list :special_type :type/Category :visibility_type :normal :base_type "type/Boolean"})) @@ -48,32 +48,32 @@ ;; retired/sensitive/hidden/details-only fields should always be excluded (expect false (field-should-have-field-values? {:base_type :type/Boolean - :has_field_values "list" + :has_field_values :list :visibility_type :retired})) (expect false (field-should-have-field-values? {:base_type :type/Boolean - :has_field_values "list" + :has_field_values :list :visibility_type :sensitive})) (expect false (field-should-have-field-values? {:base_type :type/Boolean - :has_field_values "list" + :has_field_values :list :visibility_type :hidden})) (expect false (field-should-have-field-values? {:base_type :type/Boolean - :has_field_values "list" + :has_field_values :list :visibility_type :details-only})) ;; date/time based fields should always be excluded (expect false (field-should-have-field-values? {:base_type :type/Date - :has_field_values "list" + :has_field_values :list :visibility_type :normal})) (expect false (field-should-have-field-values? {:base_type :type/DateTime - :has_field_values "list" + :has_field_values :list :visibility_type :normal})) (expect false (field-should-have-field-values? {:base_type :type/Time - :has_field_values "list" + :has_field_values :list :visibility_type :normal})) diff --git a/test/metabase/models/params_test.clj b/test/metabase/models/params_test.clj index 743fd0d451850336c5cc42c2d4d1348975c3f0b5..c6c9e45898cc934de1d8377ccd96fd35214aa099 100644 --- a/test/metabase/models/params_test.clj +++ b/test/metabase/models/params_test.clj @@ -4,8 +4,7 @@ [metabase.api.public-test :as public-test] [metabase.models [card :refer [Card]] - [field :refer [Field]] - [params :as params]] + [field :refer [Field]]] [metabase.test.data :as data] [toucan [db :as db] @@ -24,7 +23,7 @@ :display_name "Name" :base_type :type/Text :special_type :type/Name - :has_field_values "list"}} + :has_field_values :list}} (-> (db/select-one [Field :name :table_id :special_type], :id (data/id :venues :id)) (hydrate :name_field))) @@ -70,7 +69,7 @@ :display_name "Name" :base_type :type/Text :special_type :type/Name - :has_field_values "list"} + :has_field_values :list} :dimensions []}} (tt/with-temp Card [card {:dataset_query {:database (data/id) @@ -96,7 +95,7 @@ :display_name "Name" :base_type :type/Text :special_type :type/Name - :has_field_values "list"} + :has_field_values :list} :dimensions []}} (public-test/with-sharing-enabled-and-temp-dashcard-referencing :venues :id [dashboard] (-> (hydrate dashboard :param_fields) diff --git a/test/metabase/sync/analyze/classifiers/category_test.clj b/test/metabase/sync/analyze/classifiers/category_test.clj index f258bdbd2a2246558794220c1f7d1f2ac1880cfa..96b6819fab128bc926b08f03d9e712da0bb87248 100644 --- a/test/metabase/sync/analyze/classifiers/category_test.clj +++ b/test/metabase/sync/analyze/classifiers/category_test.clj @@ -3,25 +3,34 @@ (:require [expectations :refer :all] [metabase.sync.analyze.classifiers.category :as category-classifier])) +(defn- field-with-distinct-count [distinct-count] + {:database_type "VARCHAR" + :special_type :type/Name + :name "NAME" + :fingerprint_version 1 + :has_field_values nil + :active true + :visibility_type :normal + :preview_display true + :display_name "Name" + :fingerprint {:global {:distinct-count distinct-count} + :type + {:type/Text + {:percent-json 0.0 + :percent-url 0.0 + :percent-email 0.0 + :average-length 13.516}}} + :base_type :type/Text}) + ;; make sure the logic for deciding whether a Field should be a list works as expected (expect nil - (#'category-classifier/field-should-be-list? + (#'category-classifier/field-should-be-auto-list? 2500 - {:database_type "VARCHAR" - :special_type :type/Name - :name "NAME" - :fingerprint_version 1 - :has_field_values nil - :active true - :visibility_type :normal - :preview_display true - :display_name "Name" - :fingerprint {:global {:distinct-count 2500} - :type - {:type/Text - {:percent-json 0.0 - :percent-url 0.0 - :percent-email 0.0 - :average-length 13.516}}} - :base_type :type/Text})) + (field-with-distinct-count 2500))) + +(expect + true + (#'category-classifier/field-should-be-auto-list? + 99 + (field-with-distinct-count 99))) diff --git a/test/metabase/sync/field_values_test.clj b/test/metabase/sync/field_values_test.clj new file mode 100644 index 0000000000000000000000000000000000000000..05e37bdf268c336d3914bc4fafdd88ecf4ebc544 --- /dev/null +++ b/test/metabase/sync/field_values_test.clj @@ -0,0 +1,139 @@ +(ns metabase.sync.field-values-test + "Tests around the way Metabase syncs FieldValues, and sets the values of `field.has_field_values`." + (:require [clojure.java.jdbc :as jdbc] + [clojure.string :as str] + [expectations :refer :all] + [metabase + [db :as mdb] + [driver :as driver] + [sync :as sync :refer :all]] + [metabase.driver.generic-sql :as sql] + [metabase.models + [database :refer [Database]] + [field :refer [Field]] + [field-values :as field-values :refer [FieldValues]]] + [metabase.test + [data :as data] + [util :as tu]] + [toucan.db :as db] + [toucan.util.test :as tt])) + +;;; --------------------------------------------------- Helper Fns --------------------------------------------------- + +(defn- insert-range-sql + "Generate SQL to insert a row for each number in `rang`." + [rang] + (str "INSERT INTO blueberries_consumed (num) VALUES " + (str/join ", " (for [n rang] + (str "(" n ")"))))) + +(def ^:private ^:dynamic *conn* nil) + +(defn- do-with-blueberries-db + "An empty canvas upon which you may paint your dreams. + + Creates a database with a single table, `blueberries_consumed`, with one column, `num`; binds this DB to + `data/*get-db*` so you can use `data/db` and `data/id` to access it." + {:style/indent 0} + [f] + (let [details {:db (str "mem:" (tu/random-name) ";DB_CLOSE_DELAY=10")}] + (binding [mdb/*allow-potentailly-unsafe-connections* true] + (tt/with-temp Database [db {:engine :h2, :details details}] + (jdbc/with-db-connection [conn (sql/connection-details->spec (driver/engine->driver :h2) details)] + (jdbc/execute! conn ["CREATE TABLE blueberries_consumed (num INTEGER NOT NULL);"]) + (binding [data/*get-db* (constantly db)] + (binding [*conn* conn] + (f)))))))) + +(defmacro ^:private with-blueberries-db {:style/indent 0} [& body] + `(do-with-blueberries-db (fn [] ~@body))) + +(defn- insert-blueberries-and-sync! + "With the temp blueberries db from above, insert a `range` of values and re-sync the DB. + + (insert-blueberries-and-sync! [0 1 2 3]) ; insert 4 rows" + [rang] + (jdbc/execute! *conn* [(insert-range-sql rang)]) + (sync-database! (data/db))) + + +;;; ---------------------------------------------- The Tests Themselves ---------------------------------------------- + +;; A Field with 50 values should get marked as `auto-list` on initial sync, because it should be 'list', but was +;; marked automatically, as opposed to explicitly (`list`) +(expect + :auto-list + (with-blueberries-db + ;; insert 50 rows & sync + (insert-blueberries-and-sync! (range 50)) + ;; has_field_values should be auto-list + (db/select-one-field :has_field_values Field :id (data/id :blueberries_consumed :num)))) + +;; ... and it should also have some FieldValues +(expect + #metabase.models.field_values.FieldValuesInstance + {:values [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 + 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49] + :human_readable_values {}} + (with-blueberries-db + (insert-blueberries-and-sync! (range 50)) + (db/select-one [FieldValues :values :human_readable_values], :field_id (data/id :blueberries_consumed :num)))) + +;; ok, but if the number grows past the threshold & we sync again it should get unmarked as auto-list and set back to +;; `nil` (#3215) +(expect + nil + (with-blueberries-db + ;; insert 50 bloobs & sync. has_field_values should be auto-list + (insert-blueberries-and-sync! (range 50)) + (assert (= (db/select-one-field :has_field_values Field :id (data/id :blueberries_consumed :num)) + :auto-list)) + ;; now insert enough bloobs to put us over the limit and re-sync. + (insert-blueberries-and-sync! (range 50 (+ 100 field-values/auto-list-cardinality-threshold))) + ;; has_field_values should have been set to nil. + (db/select-one-field :has_field_values Field :id (data/id :blueberries_consumed :num)))) + +;; ...its FieldValues should also get deleted. +(expect + nil + (with-blueberries-db + ;; do the same steps as the test above... + (insert-blueberries-and-sync! (range 50)) + (insert-blueberries-and-sync! (range 50 (+ 100 field-values/auto-list-cardinality-threshold))) + ;; ///and FieldValues should also have been deleted. + (db/select-one [FieldValues :values :human_readable_values], :field_id (data/id :blueberries_consumed :num)))) + +;; If we had explicitly marked the Field as `list` (instead of `auto-list`), adding extra values shouldn't change +;; anything! +(expect + :list + (with-blueberries-db + ;; insert 50 bloobs & sync + (insert-blueberries-and-sync! (range 50)) + ;; change has_field_values to list + (db/update! Field (data/id :blueberries_consumed :num) :has_field_values "list") + ;; insert more bloobs & re-sync + (insert-blueberries-and-sync! (range 50 (+ 100 field-values/auto-list-cardinality-threshold))) + ;; has_field_values shouldn't change + (db/select-one-field :has_field_values Field :id (data/id :blueberries_consumed :num)))) + +;; it should still have FieldValues, and have new ones for the new Values. It should have 200 values even though this +;; is past the normal limit of 100 values! +(expect + #metabase.models.field_values.FieldValuesInstance + {:values [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 + 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 + 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 + 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 + 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 + 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 + 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 + 189 190 191 192 193 194 195 196 197 198 199] + :human_readable_values {}} + (with-blueberries-db + ;; follow the same steps as the test above... + (insert-blueberries-and-sync! (range 50)) + (db/update! Field (data/id :blueberries_consumed :num) :has_field_values "list") + (insert-blueberries-and-sync! (range 50 (+ 100 field-values/auto-list-cardinality-threshold))) + ;; ... and FieldValues should still be there, but this time updated to include the new values! + (db/select-one [FieldValues :values :human_readable_values], :field_id (data/id :blueberries_consumed :num)))) diff --git a/test/metabase/sync_database_test.clj b/test/metabase/sync_database_test.clj index ec5ea00bc892350c0fb9957244bedee16b66c525..5f34d68e4eb73d9b9e6bd5d8e81a1b6a67aa5fab 100644 --- a/test/metabase/sync_database_test.clj +++ b/test/metabase/sync_database_test.clj @@ -1,15 +1,11 @@ (ns metabase.sync-database-test "Tests for sync behavior that use a imaginary `SyncTestDriver`. These are kept around mainly because they've already been written. For newer sync tests see `metabase.sync.*` test namespaces." - (:require [clojure.java.jdbc :as jdbc] - [clojure.string :as str] - [expectations :refer :all] + (:require [expectations :refer :all] [metabase - [db :as mdb] [driver :as driver] [sync :refer :all] [util :as u]] - [metabase.driver.generic-sql :as sql] [metabase.models [database :refer [Database]] [field :refer [Field]] @@ -22,7 +18,6 @@ [toucan.db :as db] [toucan.util.test :as tt])) - (def ^:private ^:const sync-test-tables {"movie" {:name "movie" :schema "default" @@ -357,38 +352,8 @@ (do (sync-table! (Table (data/id :venues))) (get-field-values))])) -;; Make sure that if a Field's cardinality passes `list-cardinality-threshold` (currently 100) the corresponding -;; FieldValues entry will be deleted (#3215) -(defn- insert-range-sql - "Generate SQL to insert a row for each number in `rang`." - [rang] - (str "INSERT INTO blueberries_consumed (num) VALUES " - (str/join ", " (for [n rang] - (str "(" n ")"))))) - -(expect - false - (let [details {:db (str "mem:" (tu/random-name) ";DB_CLOSE_DELAY=10")}] - (binding [mdb/*allow-potentailly-unsafe-connections* true] - (tt/with-temp Database [db {:engine :h2, :details details}] - (jdbc/with-db-connection [conn (sql/connection-details->spec (driver/engine->driver :h2) details)] - (let [exec! #(doseq [statement %] - (jdbc/execute! conn [statement]))] - ;; create the `blueberries_consumed` table and insert 50 values - (exec! ["CREATE TABLE blueberries_consumed (num INTEGER NOT NULL);" - (insert-range-sql (range 50))]) - (sync-database! db) - (let [table-id (db/select-one-id Table :db_id (u/get-id db)) - field-id (db/select-one-id Field :table_id table-id)] - ;; field values should exist... - (assert (= (count (db/select-one-field :values FieldValues :field_id field-id)) - 50)) - ;; ok, now insert enough rows to push the field past the `list-cardinality-threshold` and sync again, - ;; there should be no more field values - (exec! [(insert-range-sql (range 50 (+ 100 field-values/list-cardinality-threshold)))]) - (sync-database! db) - (db/exists? FieldValues :field_id field-id)))))))) +;; TODO - hey, what is this testing? If you wrote this test, please explain what's going on here (defn- narrow-to-min-max [row] (-> row (get-in [:type :type/Number])