Skip to content
Snippets Groups Projects
Commit 17f9f04b authored by Simon Belak's avatar Simon Belak
Browse files

More robust field values heuristics

parent 19bd8dcf
No related branches found
No related tags found
No related merge requests found
......@@ -115,6 +115,9 @@
(db/update! 'Field (u/get-id field) :has_field_values nil)
(db/delete! FieldValues :field_id (u/get-id field)))
(= (:values field-values) values)
(log/debug (trs "FieldValues for Field {0} remain unchanged. Skipping..." field-name))
;; if the FieldValues object already exists then update values in it
(and field-values values)
(do
......
......@@ -21,28 +21,24 @@
[schema.core :as s]))
(s/defn ^:private cannot-be-category-or-list? :- s/Bool
[base-type :- su/FieldType, special-type :- (s/maybe su/FieldType)]
(or (isa? base-type :type/DateTime)
(isa? base-type :type/Collection)
(defn- cannot-be-category-or-list?
[{:keys [base_type special_type]}]
(or (isa? base_type :type/DateTime)
(isa? base_type :type/Collection)
;; Don't let IDs become list Fields (they already can't become categories, because they already have a special
;; type). It just doesn't make sense to cache a sequence of numbers since they aren't inherently meaningful
(isa? special-type :type/PK)
(isa? special-type :type/FK)))
(defn- not-all-nil?
[fingerprint]
(or (some-> fingerprint :type :type/Number :min some?)
(some-> fingerprint :type :type/Text :average-length pos?)))
(isa? special_type :type/PK)
(isa? special_type :type/FK)))
(s/defn ^:private field-should-be-category? :- (s/maybe s/Bool)
[fingerprint :- (s/maybe i/Fingerprint), field :- su/Map]
(let [distinct-count (get-in fingerprint [:global :distinct-count])]
(let [distinct-count (get-in fingerprint [:global :distinct-count])
nil% (get-in fingerprint [:global :nil%])]
;; Only mark a Field as a Category if it doesn't already have a special type.
(when (and (nil? (:special_type field))
(or (not-all-nil? fingerprint)
(or (some-> nil% (< 1))
(isa? (:base_type field) :type/Boolean))
(<= distinct-count field-values/category-cardinality-threshold))
(some-> distinct-count (<= field-values/category-cardinality-threshold)))
(log/debug (format "%s has %d distinct values. Since that is less than %d, we're marking it as a category."
(sync-util/name-for-logging field)
distinct-count
......@@ -51,12 +47,13 @@
(s/defn ^:private field-should-be-auto-list? :- (s/maybe s/Bool)
"Based on `distinct-count`, should we mark this `field` as `has_field_values` = `auto-list`?"
[distinct-count :- s/Int, field :- {:has_field_values (s/maybe (apply s/enum field/has-field-values-options))
[fingerprint :- (s/maybe i/Fingerprint), field :- {:has_field_values (s/maybe (apply s/enum field/has-field-values-options))
s/Keyword s/Any}]
;; only update has_field_values if it hasn't been set yet. If it's already been set then it was probably done so
;; manually by an admin, and we don't want to stomp over their choices.
(when (nil? (:has_field_values field))
(when (<= distinct-count field-values/auto-list-cardinality-threshold)
(let [distinct-count (get-in fingerprint [:global :distinct-count])]
(when (and (nil? (:has_field_values field))
(some-> distinct-count (<= field-values/auto-list-cardinality-threshold)))
(log/debug (format "%s has %d distinct values. Since that is less than %d, it should have cached FieldValues."
(sync-util/name-for-logging field)
distinct-count
......@@ -66,9 +63,8 @@
(s/defn infer-is-category-or-list :- (s/maybe i/FieldInstance)
"Classifier that attempts to determine whether FIELD ought to be marked as a Category based on its distinct count."
[field :- i/FieldInstance, fingerprint :- (s/maybe i/Fingerprint)]
(when fingerprint
(when-not (cannot-be-category-or-list? (:base_type field) (:special_type field))
(when-let [distinct-count (get-in fingerprint [:global :distinct-count])]
(cond-> field
(field-should-be-category? fingerprint field) (assoc :special_type :type/Category)
(field-should-be-auto-list? distinct-count field) (assoc :has_field_values :auto-list))))))
(when (and fingerprint
(not (cannot-be-category-or-list? field)))
(cond-> field
(field-should-be-category? fingerprint field) (assoc :special_type :type/Category)
(field-should-be-auto-list? fingerprint field) (assoc :has_field_values :auto-list))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment