Skip to content
Snippets Groups Projects
Commit 9580c50f authored by Cam Saül's avatar Cam Saül Committed by GitHub
Browse files

Merge pull request #4677 from metabase/fix-3215

Make sure FieldValues get deleted if cardinality passes threshold
parents a7ec0074 80b29e40
No related branches found
No related tags found
No related merge requests found
......@@ -78,6 +78,7 @@
(create-field-values-if-needed! field))))
;; TODO - not sure this is used anymore
(defendpoint POST "/:id/value_map_update"
"Update the human-readable values for a `Field` whose special type is `category`/`city`/`state`/`country`
or whose base type is `type/Boolean`."
......
......@@ -54,15 +54,14 @@
{:pre [(integer? field-id)
(field-should-have-field-values? field)]}
(if-let [field-values (FieldValues :field_id field-id)]
(db/update! FieldValues (:id field-values)
(db/update! FieldValues (u/get-id field-values)
:values ((resolve 'metabase.db.metadata-queries/field-distinct-values) field))
(create-field-values! field)))
(defn create-field-values-if-needed!
"Create `FieldValues` for a `Field` if they *should* exist but don't already exist.
Returns the existing or newly created `FieldValues` for `Field`."
{:arglists '([field]
[field human-readable-values])}
{:arglists '([field] [field human-readable-values])}
[{field-id :id :as field} & [human-readable-values]]
{:pre [(integer? field-id)]}
(when (field-should-have-field-values? field)
......@@ -72,10 +71,9 @@
(defn save-field-values!
"Save the `FieldValues` for FIELD-ID, creating them if needed, otherwise updating them."
[field-id values]
{:pre [(integer? field-id)
(coll? values)]}
{:pre [(integer? field-id) (coll? values)]}
(if-let [field-values (FieldValues :field_id field-id)]
(db/update! FieldValues (:id field-values), :values values)
(db/update! FieldValues (u/get-id field-values), :values values)
(db/insert! FieldValues :field_id field-id, :values values)))
(defn clear-field-values!
......
......@@ -14,21 +14,23 @@
[metabase.sync-database.interface :as i]
[metabase.util :as u]))
(def ^:private ^:const percent-valid-url-threshold
(def ^:private ^:const ^Float percent-valid-url-threshold
"Fields that have at least this percent of values that are valid URLs should be given a special type of `:type/URL`."
0.95)
(def ^:private ^:const low-cardinality-threshold
(def ^:private ^:const ^Integer low-cardinality-threshold
"Fields with less than this many distinct values should automatically be given a special type of `:type/Category`."
300)
(def ^:private ^:const field-values-entry-max-length
(def ^:private ^:const ^Integer field-values-entry-max-length
"The maximum character length for a stored `FieldValues` entry."
100)
(def ^:private ^:const ^Integer field-values-total-max-length
"Maximum total length for a FieldValues entry (combined length of all values for the field)."
(* low-cardinality-threshold field-values-entry-max-length))
(def ^:private ^:const average-length-no-preview-threshold
(def ^:private ^:const ^Integer average-length-no-preview-threshold
"Fields whose values' average length is greater than this amount should be marked as `preview_display = false`."
50)
......@@ -52,6 +54,12 @@
(not (isa? (:base_type field) :type/Collection))
(not (= (:base_type field) :type/*)))))
(defn- field-values-below-low-cardinality-threshold? [non-nil-values]
(and (<= (count non-nil-values) low-cardinality-threshold)
;; very simple check to see if total length of field-values exceeds (total values * max per value)
(let [total-length (reduce + (map (comp count str) non-nil-values))]
(<= total-length field-values-total-max-length))))
(defn test:cardinality-and-extract-field-values
"Extract field-values for FIELD. If number of values exceeds `low-cardinality-threshold` then we return an empty set of values."
[field field-stats]
......@@ -59,10 +67,7 @@
;; for example, :type/Category fields with more than MAX values don't need to be rescanned all the time
(let [non-nil-values (filter identity (queries/field-distinct-values field (inc low-cardinality-threshold)))
;; only return the list if we didn't exceed our MAX values and if the the total character count of our values is reasable (#2332)
distinct-values (when-not (or (< low-cardinality-threshold (count non-nil-values))
;; very simple check to see if total length of field-values exceeds (total values * max per value)
(< (* low-cardinality-threshold
field-values-entry-max-length) (reduce + (map (comp count str) non-nil-values))))
distinct-values (when (field-values-below-low-cardinality-threshold? non-nil-values)
non-nil-values)]
(cond-> (assoc field-stats :values distinct-values)
(and (nil? (:special_type field))
......
(ns metabase.sync-database-test
(:require [expectations :refer :all]
(:require [clojure.java.jdbc :as jdbc]
[clojure.string :as str]
[expectations :refer :all]
(toucan [db :as db]
[hydrate :refer [hydrate]])
[toucan.util.test :as tt]
[metabase.driver :as driver]
(metabase [db :as mdb]
[driver :as driver])
[metabase.driver.generic-sql :as sql]
(metabase.models [database :refer [Database]]
[field :refer [Field]]
[field-values :refer [FieldValues]]
[raw-table :refer [RawTable]]
[table :refer [Table]])
[metabase.sync-database :refer :all]
(metabase.test [data :refer :all]
[util :refer [resolve-private-vars] :as tu])
metabase.sync-database.analyze
[metabase.test.data :refer :all]
[metabase.test.data.interface :as i]
[metabase.test.util :refer [resolve-private-vars] :as tu]
[metabase.util :as u]))
(def ^:private ^:const sync-test-tables
......@@ -317,3 +323,32 @@
;; 3. Now re-sync the table and make sure the value is back
(do (sync-table! @venues-table)
(get-field-values))]))
;;; -------------------- Make sure that if a Field's cardinality passes `metabase.sync-database.analyze/low-cardinality-threshold` (currently 300) (#3215) --------------------
(expect
false
(let [details {:db (str "mem:" (tu/random-name) ";DB_CLOSE_DELAY=10")}]
(binding [mdb/*allow-potentailly-unsafe-connections* true]
(tt/with-temp Database [db {:engine :h2, :details details}]
(let [driver (driver/engine->driver :h2)
spec (sql/connection-details->spec driver details)
exec! #(doseq [statement %]
(println "[H2]" statement) ; NOCOMMIT
(println (jdbc/execute! spec [statement])))
insert-range #(str "INSERT INTO blueberries_consumed (num) VALUES "
(str/join ", " (for [n %]
(str "(" n ")"))))]
;; create the `blueberries_consumed` table and insert a 100 values
(exec! ["CREATE TABLE blueberries_consumed (num INTEGER NOT NULL);"
(insert-range (range 100))])
(sync-database! db, :full-sync? true)
(let [table-id (db/select-one-id Table :db_id (u/get-id db))
field-id (db/select-one-id Field :table_id table-id)]
;; field values should exist...
(assert (= (count (db/select-one-field :values FieldValues :field_id field-id))
100))
;; ok, now insert enough rows to push the field past the `low-cardinality-threshold` and sync again, there should be no more field values
(exec! [(insert-range (range 100 (+ 100 @(resolve 'metabase.sync-database.analyze/low-cardinality-threshold))))])
(sync-database! db, :full-sync? true)
(db/exists? FieldValues :field_id field-id)))))))
......@@ -30,8 +30,9 @@
(defn escaped-name
"Return escaped version of database name suitable for use as a filename / database name / etc."
^String [^DatabaseDefinition database-definition]
(str/replace (:database-name database-definition) #"\s+" "_"))
^String [^DatabaseDefinition {:keys [database-name]}]
{:pre [(string? database-name)]}
(str/replace database-name #"\s+" "_"))
(defn db-qualified-table-name
"Return a combined table name qualified with the name of its database, suitable for use as an identifier.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment