diff --git a/docs/operations-guide/running-metabase-on-heroku.md b/docs/operations-guide/running-metabase-on-heroku.md index 4ef364c065b57c15af571904e7feae51328a7030..7895a2926f22f3595444fd1f319ad2bd62d9b2c9 100644 --- a/docs/operations-guide/running-metabase-on-heroku.md +++ b/docs/operations-guide/running-metabase-on-heroku.md @@ -11,7 +11,9 @@ If you've got a Heroku account then all there is to do is follow this one-click [](http://downloads.metabase.com/launch-heroku.html) -This will launch a Heroku deployment using a github repository that Metabase maintains. +This will launch a Heroku deployment using a GitHub repository that Metabase maintains. + +It should only take a few minutes for Metabase to start. You can check on the progress by viewing the logs at [https://dashboard.heroku.com/apps/YOUR_APPLICATION_NAME/logs](https://dashboard.heroku.com/apps/YOUR_APPLICATION_NAME/logs) or using the Heroku command line tool with the `heroku logs -t -a YOUR_APPLICATION_NAME` command. ### Upgrading beyond the `Free` tier @@ -27,6 +29,7 @@ Heroku is very kind and offers a free tier to be used for very small/non-critica * Heroku’s 30 second timeouts on all web requests can cause a few issues if you happen to have longer running database queries. Most people don’t run into this but be aware that it’s possible. * When using the `free` tier, if you don’t access the application for a while Heroku will sleep your Metabase environment. This prevents things like Pulses and Metabase background tasks from running when scheduled and at times makes the app appear to be slow when really it's just Heroku reloading your app. You can resolve this by upgrading to the `hobby` tier or higher. + * Sometimes Metabase may run out of memory and you will see messages like `Error R14 (Memory quota exceeded)` in the Heroku logs. If this happens regularly we recommend upgrading to the `standard-2x` tier dyno. Now that you’ve installed Metabase, it’s time to [set it up and connect it to your database](../setting-up-metabase.md). diff --git a/frontend/src/metabase/lib/query_time.js b/frontend/src/metabase/lib/query_time.js index 3f009e4ba9f2f6828c989aefacfe8b23b94ed269..d9baad1e73c313ddb5a23f5e85c6f84a633f1c93 100644 --- a/frontend/src/metabase/lib/query_time.js +++ b/frontend/src/metabase/lib/query_time.js @@ -3,6 +3,7 @@ import inflection from "inflection"; import { mbqlEq } from "metabase/lib/query/util"; import { formatTimeWithUnit } from "metabase/lib/formatting"; +import { parseTimestamp } from "metabase/lib/time"; export const DATETIME_UNITS = [ // "default", @@ -139,7 +140,7 @@ export function generateTimeIntervalDescription(n, unit) { export function generateTimeValueDescription(value, bucketing) { if (typeof value === "string") { - let m = moment(value); + const m = parseTimestamp(value, bucketing); if (bucketing) { return formatTimeWithUnit(value, bucketing); } else if (m.hours() || m.minutes()) { diff --git a/frontend/src/metabase/qb/lib/actions.js b/frontend/src/metabase/qb/lib/actions.js index 95d7108f314a4cb458fcefbb30b9a882e610880b..678956f2ee672b9326b4efe1a9ad58aff3e58e3d 100644 --- a/frontend/src/metabase/qb/lib/actions.js +++ b/frontend/src/metabase/qb/lib/actions.js @@ -104,13 +104,13 @@ export const filter = (card, operator, column, value) => { return newCard; }; -const drillFilter = (card, value, column) => { +export const drillFilter = (card, value, column) => { let filter; if (isDate(column)) { filter = [ "=", ["datetime-field", getFieldRefFromColumn(column), "as", column.unit], - parseTimestamp(value, column.unit).toISOString(), + parseTimestamp(value, column.unit).format(), ]; } else { const range = rangeForValue(value, column); diff --git a/frontend/test/__support__/sample_dataset_fixture.js b/frontend/test/__support__/sample_dataset_fixture.js index d810cee0e8589a40cbe8f4e67b97be380c5ae580..7b80db62bddaed590757c6e978a6339daaab700a 100644 --- a/frontend/test/__support__/sample_dataset_fixture.js +++ b/frontend/test/__support__/sample_dataset_fixture.js @@ -1484,7 +1484,7 @@ export const clickedDateTimeValue = { ...metadata.fields[ORDERS_CREATED_DATE_FIELD_ID], source: "fields", }, - value: "2018-01-01T00:00:00.000Z", + value: "2018-01-01T00:00:00Z", }; export const clickedMetric = { diff --git a/frontend/test/internal/__snapshots__/components.unit.spec.js.snap b/frontend/test/internal/__snapshots__/components.unit.spec.js.snap index 107f6ce66a041cabea027ba99c51e4f819cdc149..2106e4e76d6aa0df4c171ba28cb14749ae181fa5 100644 --- a/frontend/test/internal/__snapshots__/components.unit.spec.js.snap +++ b/frontend/test/internal/__snapshots__/components.unit.spec.js.snap @@ -436,6 +436,26 @@ exports[`EntityMenu should render "Share menu" correctly 1`] = ` </div> `; +exports[`ProgressBar should render "Animated" correctly 1`] = ` +<div + className="xt xu xv xn" +> + <div + className="xw xx xt xy xz x10 x11 x12 x13 x1d x1e x16 x17 x18 x1f x1a x1b x1g" + /> +</div> +`; + +exports[`ProgressBar should render "Default" correctly 1`] = ` +<div + className="xt xu xv xn" +> + <div + className="xw xx xt xy xz x10 x11 x12 x13 x14 x15 x16 x17 x18 x19 x1a x1b x1c" + /> +</div> +`; + exports[`Select should render "Default" correctly 1`] = ` <a className="no-decoration" diff --git a/frontend/test/modes/drills/UnderlyingRecordsDrill.unit.spec.js b/frontend/test/modes/drills/UnderlyingRecordsDrill.unit.spec.js index f1429e0838d570245b49db99f460cf72c235fd95..11ee419aea2a93b79a28ebb7e28d84e590387aa9 100644 --- a/frontend/test/modes/drills/UnderlyingRecordsDrill.unit.spec.js +++ b/frontend/test/modes/drills/UnderlyingRecordsDrill.unit.spec.js @@ -47,7 +47,7 @@ describe("UnderlyingRecordsDrill", () => { expect(UnderlyingRecordsDrill({ question })).toHaveLength(0); }); it("should be return correct new card for breakout by month", () => { - const value = "2018-01-01T00:00:00.000Z"; + const value = "2018-01-01T00:00:00Z"; const actions = UnderlyingRecordsDrill( getActionPropsForTimeseriesClick("month", value), ); diff --git a/frontend/test/modes/lib/actions.unit.spec.js b/frontend/test/modes/lib/actions.unit.spec.js new file mode 100644 index 0000000000000000000000000000000000000000..cbf0c0f81d5e2b05df4b7e7bd6c89d5cc59587ac --- /dev/null +++ b/frontend/test/modes/lib/actions.unit.spec.js @@ -0,0 +1,31 @@ +/* eslint-disable flowtype/require-valid-file-annotation */ + +import { drillFilter } from "metabase/qb/lib/actions"; + +describe("actions", () => { + describe("drillFilter", () => { + it("should add the filter with the same timezone", () => { + const newCard = drillFilter( + { + dataset_query: { + type: "query", + query: {}, + }, + }, + "2018-04-27T00:00:00.000+02:00", + { + base_type: "type/DateTime", + id: 123, + unit: "day", + }, + ); + expect(newCard.dataset_query.query).toEqual({ + filter: [ + "=", + ["datetime-field", ["field-id", 123], "as", "day"], + "2018-04-27T00:00:00+02:00", + ], + }); + }); + }); +}); diff --git a/src/metabase/api/common.clj b/src/metabase/api/common.clj index 0371d49afc174d1eb10fa4c345d2e29a21782ca3..9162a54c4a36d647ea92e8ff2ce27f1ffa439914 100644 --- a/src/metabase/api/common.clj +++ b/src/metabase/api/common.clj @@ -14,15 +14,11 @@ [metabase.api.common.internal :refer :all] [metabase.models.interface :as mi] [puppetlabs.i18n.core :refer [trs tru]] - [ring.util - [io :as rui] - [response :as rr]] [ring.core.protocols :as protocols] [ring.util.response :as response] [schema.core :as s] [toucan.db :as db]) - (:import [java.io BufferedWriter OutputStream OutputStreamWriter] - [java.nio.charset Charset StandardCharsets])) + (:import java.io.OutputStream)) (declare check-403 check-404) diff --git a/src/metabase/api/field.clj b/src/metabase/api/field.clj index 8a5d17741cc7fe0a31bc50b16764bb596c29442b..a1ea3eec15b4ba298387c1acaa1c1e597624608d 100644 --- a/src/metabase/api/field.clj +++ b/src/metabase/api/field.clj @@ -75,7 +75,7 @@ points_of_interest (s/maybe su/NonBlankString) special_type (s/maybe FieldType) visibility_type (s/maybe FieldVisibilityType) - has_field_values (s/maybe (s/enum "search" "list" "none"))} + has_field_values (s/maybe (apply s/enum (map name field/has-field-values-options)))} (let [field (hydrate (api/write-check Field id) :dimensions) new-special-type (keyword (get body :special_type (:special_type field))) removed-fk? (removed-fk-special-type? (:special_type field) new-special-type) diff --git a/src/metabase/db/metadata_queries.clj b/src/metabase/db/metadata_queries.clj index caae266a05e7b0e6b30295ec7c1d21dd1e25143a..7dc23248e6e32ab127ba3eec332ef2284a06f591 100644 --- a/src/metabase/db/metadata_queries.clj +++ b/src/metabase/db/metadata_queries.clj @@ -4,11 +4,11 @@ [metabase [query-processor :as qp] [util :as u]] - [metabase.models - [field-values :as field-values] - [table :refer [Table]]] + [metabase.models.table :refer [Table]] [metabase.query-processor.interface :as qpi] [metabase.query-processor.middleware.expand :as ql] + [metabase.util.schema :as su] + [schema.core :as s] [toucan.db :as db])) (defn- qp-query [db-id query] @@ -42,15 +42,32 @@ (u/pprint-to-str results)) (throw e))))) -(defn field-distinct-values +(def ^:private ^Integer absolute-max-distinct-values-limit + "The absolute maximum number of results to return for a `field-distinct-values` query. Normally Fields with 100 or + less values (at the time of this writing) get marked as `auto-list` Fields, meaning we save all their distinct + values in a FieldValues object, which powers a list widget in the FE when using the Field for filtering in the QB. + Admins can however manually mark any Field as `list`, which is effectively ordering Metabase to keep FieldValues for + the Field regardless of its cardinality. + + Of course, if a User does something crazy, like mark a million-arity Field as List, we don't want Metabase to + explode trying to make their dreams a reality; we need some sort of hard limit to prevent catastrophes. So this + limit is effectively a safety to prevent Users from nuking their own instance for Fields that really shouldn't be + List Fields at all. For these very-high-cardinality Fields, we're effectively capping the number of + FieldValues that get could saved. + + This number should be a balance of: + + * Not being too low, which would definitly result in GitHub issues along the lines of 'My 500-distinct-value Field + that I marked as List is not showing all values in the List Widget' + * Not being too high, which would result in Metabase running out of memory dealing with too many values" + (int 5000)) + +(s/defn field-distinct-values "Return the distinct values of FIELD. This is used to create a `FieldValues` object for `:type/Category` Fields." ([field] - ;; fetch up to one more value than allowed for FieldValues. e.g. if the max is 100 distinct values fetch up to 101. - ;; That way we will know if we're over the limit - (field-distinct-values field (inc field-values/list-cardinality-threshold))) - ([field max-results] - {:pre [(integer? max-results)]} + (field-distinct-values field absolute-max-distinct-values-limit)) + ([field, max-results :- su/IntGreaterThanZero] (mapv first (field-query field (-> {} (ql/breakout (ql/field-id (u/get-id field))) (ql/limit max-results)))))) diff --git a/src/metabase/driver/sparksql.clj b/src/metabase/driver/sparksql.clj index 0e15195ae021d1cdfc13a32172a02835a4c4c614..419dba5a3ba5e0d1fb82a810bba656cc18c16047 100644 --- a/src/metabase/driver/sparksql.clj +++ b/src/metabase/driver/sparksql.clj @@ -223,4 +223,7 @@ :string-length-fn (u/drop-first-arg hive-like/string-length-fn) :unix-timestamp->timestamp (u/drop-first-arg hive-like/unix-timestamp->timestamp)})) -(driver/register-driver! :sparksql (SparkSQLDriver.)) +(defn -init-driver + "Register the SparkSQL driver." + [] + (driver/register-driver! :sparksql (SparkSQLDriver.))) diff --git a/src/metabase/models/field.clj b/src/metabase/models/field.clj index 51cdcbb016cf0985f94ed7f11fbe87ea4509f70a..09ae4b590779315749401fab31743bd0b81500d0 100644 --- a/src/metabase/models/field.clj +++ b/src/metabase/models/field.clj @@ -15,7 +15,7 @@ ;;; ------------------------------------------------- Type Mappings -------------------------------------------------- -(def ^:const visibility-types +(def visibility-types "Possible values for `Field.visibility_type`." #{:normal ; Default setting. field has no visibility restrictions. :details-only ; For long blob like columns such as JSON. field is not shown in some places on the frontend. @@ -23,6 +23,37 @@ :sensitive ; Strict removal of field from all places except data model listing. queries should error if someone attempts to access. :retired}) ; For fields that no longer exist in the physical db. automatically set by Metabase. QP should error if encountered in a query. +(def has-field-values-options + "Possible options for `has_field_values`. This column is used to determine whether we keep FieldValues for a Field, + and which type of widget should be used to pick values of this Field when filtering by it in the Query Builder." + ;; AUTOMATICALLY-SET VALUES, SET DURING SYNC + ;; + ;; `nil` -- means infer which widget to use based on logic in `with-has-field-values`; this will either return + ;; `:search` or `:none`. + ;; + ;; This is the default state for Fields not marked `auto-list`. Admins cannot explicitly mark a Field as + ;; `has_field_values` `nil`. This value is also subject to automatically change in the future if the values of a + ;; Field change in such a way that it can now be marked `auto-list`. Fields marked `nil` do *not* have FieldValues + ;; objects. + ;; + #{;; The other automatically-set option. Automatically marked as a 'List' Field based on cardinality and other factors + ;; during sync. Store a FieldValues object; use the List Widget. If this Field goes over the distinct value + ;; threshold in a future sync, the Field will get switched back to `has_field_values = nil`. + :auto-list + ;; + ;; EXPLICITLY-SET VALUES, SET BY AN ADMIN + ;; + ;; Admin explicitly marked this as a 'Search' Field, which means we should *not* keep FieldValues, and should use + ;; Search Widget. + :search + ;; Admin explicitly marked this as a 'List' Field, which means we should keep FieldValues, and use the List + ;; Widget. Unlike `auto-list`, if this Field grows past the normal cardinality constraints in the future, it will + ;; remain `List` until explicitly marked otherwise. + :list + ;; Admin explicitly marked that this Field shall always have a plain-text widget, neither allowing search, nor + ;; showing a list of possible values. FieldValues not kept. + :none}) + ;;; ----------------------------------------------- Entity & Lifecycle ----------------------------------------------- @@ -97,7 +128,7 @@ :special_type :keyword :visibility_type :keyword :description :clob - :has_field_values :clob + :has_field_values :keyword :fingerprint :json}) :properties (constantly {:timestamped? true}) :pre-insert pre-insert @@ -149,7 +180,7 @@ {:batched-hydrate :normal_values} [fields] (let [id->field-values (select-field-id->instance (filter fv/field-should-have-field-values? fields) - [FieldValues :id :human_readable_values :values :field_id])] + [FieldValues :id :human_readable_values :values :field_id])] (for [field fields] (assoc field :values (get id->field-values (:id field) []))))) @@ -175,30 +206,30 @@ (or (isa? base-type :type/Text) (isa? base-type :type/TextLike))) -(defn with-has-field-values - "Infer what the value of the `has_field_values` should be for Fields where it's not set. Admins can set this to one - of the values below, but if it's `nil` in the DB we'll infer it automatically. +(defn- infer-has-field-values + "Determine the value of `has_field_values` we should return for a `Field` As of 0.29.1 this doesn't require any DB + calls! :D" + [{has-field-values :has_field_values, :as field}] + (or + ;; if `has_field_values` is set in the DB, use that value; but if it's `auto-list`, return the value as `list` to + ;; avoid confusing FE code, which can remain blissfully unaware that `auto-list` is a thing + (when has-field-values + (if (= (keyword has-field-values) :auto-list) + :list + has-field-values)) + ;; otherwise if it does not have value set in DB we will infer it + (if (is-searchable? field) + :search + :none))) - * `list` = has an associated FieldValues object - * `search` = does not have FieldValues - * `none` = admin has explicitly disabled search behavior for this Field" +(defn with-has-field-values + "Infer what the value of the `has_field_values` should be for Fields where it's not set. See documentation for + `has-field-values-options` above for a more detailed explanation of what these values mean." {:batched-hydrate :has_field_values} [fields] - (let [fields-without-has-field-values-ids (set (for [field fields - :when (nil? (:has_field_values field))] - (:id field))) - fields-with-fieldvalues-ids (when (seq fields-without-has-field-values-ids) - (db/select-field :field_id FieldValues - :field_id [:in fields-without-has-field-values-ids]))] - (for [field fields] - (when field - (assoc field - :has_field_values (or - (:has_field_values field) - (cond - (contains? fields-with-fieldvalues-ids (u/get-id field)) :list - (is-searchable? field) :search - :else :none))))))) + (for [field fields] + (when field + (assoc field :has_field_values (infer-has-field-values field))))) (defn readable-fields-only "Efficiently checks if each field is readable and returns only readable fields" diff --git a/src/metabase/models/field_values.clj b/src/metabase/models/field_values.clj index 703435d1293ea62eaf164dfe65034aed42bb5ce7..3f425ee713bbee014f5a16859627aba164eec1f7 100644 --- a/src/metabase/models/field_values.clj +++ b/src/metabase/models/field_values.clj @@ -1,6 +1,9 @@ (ns metabase.models.field-values (:require [clojure.tools.logging :as log] [metabase.util :as u] + [metabase.util.schema :as su] + [puppetlabs.i18n.core :refer [trs]] + [schema.core :as s] [toucan [db :as db] [models :as models]])) @@ -11,9 +14,9 @@ frontend behavior such as widget choices." (int 30)) -(def ^:const ^Integer list-cardinality-threshold - "Fields with less than this many distincy values should be given a `has_field_values` value of `list`, which means the - Field should have FieldValues." +(def ^:const ^Integer auto-list-cardinality-threshold + "Fields with less than this many distincy values should be given a `has_field_values` value of `list`, which means + the Field should have FieldValues." (int 100)) (def ^:private ^:const ^Integer entry-max-length @@ -22,7 +25,7 @@ (def ^:private ^:const ^Integer total-max-length "Maximum total length for a `FieldValues` entry (combined length of all values for the field)." - (int (* list-cardinality-threshold entry-max-length))) + (int (* auto-list-cardinality-threshold entry-max-length))) ;; ## Entity + DB Multimethods @@ -39,16 +42,18 @@ ;; ## `FieldValues` Helper Functions -(defn field-should-have-field-values? - "Should this `Field` be backed by a corresponding `FieldValues` object?" +(s/defn field-should-have-field-values? :- s/Bool + "Should this `field` be backed by a corresponding `FieldValues` object?" {:arglists '([field])} - [{base-type :base_type, visibility-type :visibility_type, has-field-values :has_field_values, :as field}] - {:pre [visibility-type - (contains? field :base_type) - (contains? field :has_field_values)]} - (and (not (contains? #{:retired :sensitive :hidden :details-only} (keyword visibility-type))) - (not (isa? (keyword base-type) :type/DateTime)) - (= has-field-values "list"))) + [{base-type :base_type, visibility-type :visibility_type, has-field-values :has_field_values, :as field} + :- {:visibility_type su/KeywordOrString + :base_type (s/maybe su/KeywordOrString) + :has_field_values (s/maybe su/KeywordOrString) + s/Keyword s/Any}] + (boolean + (and (not (contains? #{:retired :sensitive :hidden :details-only} (keyword visibility-type))) + (not (isa? (keyword base-type) :type/DateTime)) + (#{:list :auto-list} (keyword has-field-values))))) (defn- values-less-than-total-max-length? @@ -63,32 +68,19 @@ "FieldValues are allowed for this Field." "FieldValues are NOT allowed for this Field."))))) -(defn- cardinality-less-than-threshold? - "`true` if the number of DISTINCT-VALUES is less that `list-cardinality-threshold`. - Does some logging as well." - [distinct-values] - (let [num-values (count distinct-values)] - (u/prog1 (<= num-values list-cardinality-threshold) - (log/debug (if <> - (format "Field has %d distinct values (max %d). FieldValues are allowed for this Field." - num-values list-cardinality-threshold) - (format "Field has over %d values. FieldValues are NOT allowed for this Field." - list-cardinality-threshold)))))) - (defn- distinct-values - "Fetch a sequence of distinct values for FIELD that are below the `total-max-length` threshold. If the values are past - the threshold, this returns `nil`." + "Fetch a sequence of distinct values for `field` that are below the `total-max-length` threshold. If the values are + past the threshold, this returns `nil`." [field] (require 'metabase.db.metadata-queries) (let [values ((resolve 'metabase.db.metadata-queries/field-distinct-values) field)] - (when (cardinality-less-than-threshold? values) - (when (values-less-than-total-max-length? values) - values)))) + (when (values-less-than-total-max-length? values) + values))) (defn- fixup-human-readable-values "Field values and human readable values are lists that are zipped together. If the field values have changes, the - human readable values will need to change too. This function reconstructs the human_readable_values to reflect + human readable values will need to change too. This function reconstructs the `human_readable_values` to reflect `NEW-VALUES`. If a new field value is found, a string version of that is used" [{old-values :values, old-hrv :human_readable_values} new-values] (when (seq old-hrv) @@ -96,24 +88,41 @@ (map #(get orig-remappings % (str %)) new-values)))) (defn create-or-update-field-values! - "Create or update the FieldValues object for FIELD. If the FieldValues object already exists, then update values for + "Create or update the FieldValues object for `field`. If the FieldValues object already exists, then update values for it; otherwise create a new FieldValues object with the newly fetched values." [field & [human-readable-values]] (let [field-values (FieldValues :field_id (u/get-id field)) values (distinct-values field) field-name (or (:name field) (:id field))] (cond + ;; If this Field is marked `auto-list`, and the number of values in now over the list threshold, we need to + ;; unmark it as `auto-list`. Switch it to `has_field_values` = `nil` and delete the FieldValues; this will + ;; result in it getting a Search Widget in the UI when `has_field_values` is automatically inferred by the + ;; `metabase.models.field/infer-has-field-values` hydration function (see that namespace for more detailed + ;; discussion) + ;; + ;; It would be nicer if we could do this in analysis where it gets marked `:auto-list` in the first place, but + ;; Fingerprints don't get updated regularly enough that we could detect the sudden increase in cardinality in a + ;; way that could make this work. Thus, we are stuck doing it here :( + (and (> (count values) auto-list-cardinality-threshold) + (= :auto-list (keyword (:has_field_values field)))) + (do + (log/info (trs "Field {0} was previously automatically set to show a list widget, but now has {1} values." + field-name (count values)) + (trs "Switching Field to use a search widget instead.")) + (db/update! 'Field (u/get-id field) :has_field_values nil) + (db/delete! FieldValues :field_id (u/get-id field))) ;; if the FieldValues object already exists then update values in it (and field-values values) (do - (log/debug (format "Storing updated FieldValues for Field %s..." field-name)) + (log/debug (trs "Storing updated FieldValues for Field {0}..." field-name)) (db/update-non-nil-keys! FieldValues (u/get-id field-values) :values values :human_readable_values (fixup-human-readable-values field-values values))) ;; if FieldValues object doesn't exist create one values (do - (log/debug (format "Storing FieldValues for Field %s..." field-name)) + (log/debug (trs "Storing FieldValues for Field {0}..." field-name)) (db/insert! FieldValues :field_id (u/get-id field) :values values diff --git a/src/metabase/models/interface.clj b/src/metabase/models/interface.clj index 3b3dc7eb8ae41e0b5d9eb8975e8b97017267f66f..55ac4a67666bee3283e3411ca2a9afaaff9b4ba0 100644 --- a/src/metabase/models/interface.clj +++ b/src/metabase/models/interface.clj @@ -7,7 +7,9 @@ [encryption :as encryption]] [schema.core :as s] [taoensso.nippy :as nippy] - [toucan.models :as models]) + [toucan + [models :as models] + [util :as toucan-util]]) (:import java.sql.Blob)) ;;; +----------------------------------------------------------------------------------------------------------------+ @@ -78,6 +80,13 @@ :in validate-cron-string :out identity) +;; Toucan ships with a Keyword type, but on columns that are marked 'TEXT' it doesn't work properly since the values +;; might need to get de-CLOB-bered first. So replace the default Toucan `:keyword` implementation with one that +;; handles those cases. +(models/add-type! :keyword + :in toucan-util/keyword->qualified-name + :out (comp keyword u/jdbc-clob->str)) + ;;; properties diff --git a/src/metabase/models/params.clj b/src/metabase/models/params.clj index 19390eb6e02d6e6ae0b5c2d70e1f039ca11e82df..ce540903d2c7be95ebe40ab8049abf9ac2ef05ed 100644 --- a/src/metabase/models/params.clj +++ b/src/metabase/models/params.clj @@ -69,9 +69,13 @@ cases where more than one name Field exists for a Table, this just adds the first one it finds." [fields] (when-let [table-ids (seq (map :table_id fields))] - (u/key-by :table_id (db/select Field:params-columns-only - :table_id [:in table-ids] - :special_type (mdb/isa :type/Name))))) + (u/key-by :table_id (-> (db/select Field:params-columns-only + :table_id [:in table-ids] + :special_type (mdb/isa :type/Name)) + ;; run `metabase.models.field/infer-has-field-values` on these Fields so their values of + ;; `has_field_values` will be consistent with what the FE expects. (e.g. we'll return + ;; `list` instead of `auto-list`.) + (hydrate :has_field_values))))) (defn add-name-field "For all `fields` that are `:type/PK` Fields, look for a `:type/Name` Field belonging to the same Table. For each diff --git a/src/metabase/sync/analyze/classifiers/category.clj b/src/metabase/sync/analyze/classifiers/category.clj index 705d6268cc8a8c37d306bb6af8a35b38ee14b1b5..7c7a49373808884c643bd73ea3899d7de7ee1a80 100644 --- a/src/metabase/sync/analyze/classifiers/category.clj +++ b/src/metabase/sync/analyze/classifiers/category.clj @@ -11,7 +11,9 @@ determined by the cardinality of the Field, like Category status. Thus it is entirely possibly for a Field to be both a Category and a `list` Field." (:require [clojure.tools.logging :as log] - [metabase.models.field-values :as field-values] + [metabase.models + [field :as field] + [field-values :as field-values]] [metabase.sync [interface :as i] [util :as sync-util]] @@ -28,7 +30,8 @@ (isa? special-type :type/PK) (isa? special-type :type/FK))) -(defn- field-should-be-category? [distinct-count field] +(s/defn ^:private field-should-be-category? :- (s/maybe s/Bool) + [distinct-count :- s/Int, field :- su/Map] ;; only mark a Field as a Category if it doesn't already have a special type (when-not (:special_type field) (when (<= distinct-count field-values/category-cardinality-threshold) @@ -38,19 +41,20 @@ field-values/category-cardinality-threshold)) true))) -(defn- field-should-be-list? [distinct-count field] - {:pre [(contains? field :has_field_values)]} +(s/defn ^:private field-should-be-auto-list? :- (s/maybe s/Bool) + "Based on `distinct-count`, should we mark this `field` as `has_field_values` = `auto-list`?" + [distinct-count :- s/Int, field :- {:has_field_values (s/maybe (apply s/enum field/has-field-values-options)) + s/Keyword s/Any}] ;; only update has_field_values if it hasn't been set yet. If it's already been set then it was probably done so ;; manually by an admin, and we don't want to stomp over their choices. (when (nil? (:has_field_values field)) - (when (<= distinct-count field-values/list-cardinality-threshold) + (when (<= distinct-count field-values/auto-list-cardinality-threshold) (log/debug (format "%s has %d distinct values. Since that is less than %d, it should have cached FieldValues." (sync-util/name-for-logging field) distinct-count - field-values/list-cardinality-threshold)) + field-values/auto-list-cardinality-threshold)) true))) - (s/defn infer-is-category-or-list :- (s/maybe i/FieldInstance) "Classifier that attempts to determine whether FIELD ought to be marked as a Category based on its distinct count." [field :- i/FieldInstance, fingerprint :- (s/maybe i/Fingerprint)] @@ -58,5 +62,5 @@ (when-not (cannot-be-category-or-list? (:base_type field) (:special_type field)) (when-let [distinct-count (get-in fingerprint [:global :distinct-count])] (cond-> field - (field-should-be-category? distinct-count field) (assoc :special_type :type/Category) - (field-should-be-list? distinct-count field) (assoc :has_field_values "list")))))) + (field-should-be-category? distinct-count field) (assoc :special_type :type/Category) + (field-should-be-auto-list? distinct-count field) (assoc :has_field_values :auto-list)))))) diff --git a/src/metabase/sync/analyze/fingerprint/global.clj b/src/metabase/sync/analyze/fingerprint/global.clj index 3fa55fd999ce3cf03b3f9346c86d9358553d2449..81b1252df06bfc7e80203e58002f429da09d94d3 100644 --- a/src/metabase/sync/analyze/fingerprint/global.clj +++ b/src/metabase/sync/analyze/fingerprint/global.clj @@ -7,7 +7,7 @@ "Generate a fingerprint of global information for Fields of all types." [values :- i/FieldSample] ;; TODO - this logic isn't as nice as the old logic that actually called the DB - ;; We used to do (queries/field-distinct-count field field-values/list-cardinality-threshold) + ;; We used to do (queries/field-distinct-count field field-values/auto-list-cardinality-threshold) ;; Consider whether we are so married to the idea of only generating fingerprints from samples that we ;; are ok with inaccurate counts like the one we'll surely be getting here {:distinct-count (count (distinct values))}) diff --git a/test/metabase/automagic_dashboards/core_test.clj b/test/metabase/automagic_dashboards/core_test.clj index b19692f38655c4286ac0758029dc72f2a1bb09f4..6b57bfd03aa0ac9bb14ee7eca4629e530fccbb08 100644 --- a/test/metabase/automagic_dashboards/core_test.clj +++ b/test/metabase/automagic_dashboards/core_test.clj @@ -60,7 +60,7 @@ Table (assoc :entity_type nil) (#'magic/->root)) - (#'magic/matching-rules (rules/load-rules "table")) + (#'magic/matching-rules (rules/get-rules ["table"])) (map (comp first :applies_to)))) diff --git a/test/metabase/models/field_values_test.clj b/test/metabase/models/field_values_test.clj index fb1e94c0d67a97b88a2664ccbd79cc0974ef82a7..e00c0525bee4701b18649340dee82185f4dbd876 100644 --- a/test/metabase/models/field_values_test.clj +++ b/test/metabase/models/field_values_test.clj @@ -16,19 +16,19 @@ ;;; ---------------------------------------- field-should-have-field-values? ----------------------------------------- -(expect (field-should-have-field-values? {:has_field_values "list" +(expect (field-should-have-field-values? {:has_field_values :list :visibility_type :normal :base_type :type/Text})) -(expect false (field-should-have-field-values? {:has_field_values "list" +(expect false (field-should-have-field-values? {:has_field_values :list :visibility_type :sensitive :base_type :type/Text})) -(expect false (field-should-have-field-values? {:has_field_values "list" +(expect false (field-should-have-field-values? {:has_field_values :list :visibility_type :hidden :base_type :type/Text})) -(expect false (field-should-have-field-values? {:has_field_values "list" +(expect false (field-should-have-field-values? {:has_field_values :list :visibility_type :details-only :base_type :type/Text})) @@ -36,11 +36,11 @@ :visibility_type :normal :base_type :type/Text})) -(expect (field-should-have-field-values? {:has_field_values "list" +(expect (field-should-have-field-values? {:has_field_values :list :visibility_type :normal :base_type :type/Text})) -(expect (field-should-have-field-values? {:has_field_values "list" +(expect (field-should-have-field-values? {:has_field_values :list :special_type :type/Category :visibility_type :normal :base_type "type/Boolean"})) @@ -48,32 +48,32 @@ ;; retired/sensitive/hidden/details-only fields should always be excluded (expect false (field-should-have-field-values? {:base_type :type/Boolean - :has_field_values "list" + :has_field_values :list :visibility_type :retired})) (expect false (field-should-have-field-values? {:base_type :type/Boolean - :has_field_values "list" + :has_field_values :list :visibility_type :sensitive})) (expect false (field-should-have-field-values? {:base_type :type/Boolean - :has_field_values "list" + :has_field_values :list :visibility_type :hidden})) (expect false (field-should-have-field-values? {:base_type :type/Boolean - :has_field_values "list" + :has_field_values :list :visibility_type :details-only})) ;; date/time based fields should always be excluded (expect false (field-should-have-field-values? {:base_type :type/Date - :has_field_values "list" + :has_field_values :list :visibility_type :normal})) (expect false (field-should-have-field-values? {:base_type :type/DateTime - :has_field_values "list" + :has_field_values :list :visibility_type :normal})) (expect false (field-should-have-field-values? {:base_type :type/Time - :has_field_values "list" + :has_field_values :list :visibility_type :normal})) diff --git a/test/metabase/models/params_test.clj b/test/metabase/models/params_test.clj index 743fd0d451850336c5cc42c2d4d1348975c3f0b5..c6c9e45898cc934de1d8377ccd96fd35214aa099 100644 --- a/test/metabase/models/params_test.clj +++ b/test/metabase/models/params_test.clj @@ -4,8 +4,7 @@ [metabase.api.public-test :as public-test] [metabase.models [card :refer [Card]] - [field :refer [Field]] - [params :as params]] + [field :refer [Field]]] [metabase.test.data :as data] [toucan [db :as db] @@ -24,7 +23,7 @@ :display_name "Name" :base_type :type/Text :special_type :type/Name - :has_field_values "list"}} + :has_field_values :list}} (-> (db/select-one [Field :name :table_id :special_type], :id (data/id :venues :id)) (hydrate :name_field))) @@ -70,7 +69,7 @@ :display_name "Name" :base_type :type/Text :special_type :type/Name - :has_field_values "list"} + :has_field_values :list} :dimensions []}} (tt/with-temp Card [card {:dataset_query {:database (data/id) @@ -96,7 +95,7 @@ :display_name "Name" :base_type :type/Text :special_type :type/Name - :has_field_values "list"} + :has_field_values :list} :dimensions []}} (public-test/with-sharing-enabled-and-temp-dashcard-referencing :venues :id [dashboard] (-> (hydrate dashboard :param_fields) diff --git a/test/metabase/sync/analyze/classifiers/category_test.clj b/test/metabase/sync/analyze/classifiers/category_test.clj index f258bdbd2a2246558794220c1f7d1f2ac1880cfa..96b6819fab128bc926b08f03d9e712da0bb87248 100644 --- a/test/metabase/sync/analyze/classifiers/category_test.clj +++ b/test/metabase/sync/analyze/classifiers/category_test.clj @@ -3,25 +3,34 @@ (:require [expectations :refer :all] [metabase.sync.analyze.classifiers.category :as category-classifier])) +(defn- field-with-distinct-count [distinct-count] + {:database_type "VARCHAR" + :special_type :type/Name + :name "NAME" + :fingerprint_version 1 + :has_field_values nil + :active true + :visibility_type :normal + :preview_display true + :display_name "Name" + :fingerprint {:global {:distinct-count distinct-count} + :type + {:type/Text + {:percent-json 0.0 + :percent-url 0.0 + :percent-email 0.0 + :average-length 13.516}}} + :base_type :type/Text}) + ;; make sure the logic for deciding whether a Field should be a list works as expected (expect nil - (#'category-classifier/field-should-be-list? + (#'category-classifier/field-should-be-auto-list? 2500 - {:database_type "VARCHAR" - :special_type :type/Name - :name "NAME" - :fingerprint_version 1 - :has_field_values nil - :active true - :visibility_type :normal - :preview_display true - :display_name "Name" - :fingerprint {:global {:distinct-count 2500} - :type - {:type/Text - {:percent-json 0.0 - :percent-url 0.0 - :percent-email 0.0 - :average-length 13.516}}} - :base_type :type/Text})) + (field-with-distinct-count 2500))) + +(expect + true + (#'category-classifier/field-should-be-auto-list? + 99 + (field-with-distinct-count 99))) diff --git a/test/metabase/sync/field_values_test.clj b/test/metabase/sync/field_values_test.clj new file mode 100644 index 0000000000000000000000000000000000000000..05e37bdf268c336d3914bc4fafdd88ecf4ebc544 --- /dev/null +++ b/test/metabase/sync/field_values_test.clj @@ -0,0 +1,139 @@ +(ns metabase.sync.field-values-test + "Tests around the way Metabase syncs FieldValues, and sets the values of `field.has_field_values`." + (:require [clojure.java.jdbc :as jdbc] + [clojure.string :as str] + [expectations :refer :all] + [metabase + [db :as mdb] + [driver :as driver] + [sync :as sync :refer :all]] + [metabase.driver.generic-sql :as sql] + [metabase.models + [database :refer [Database]] + [field :refer [Field]] + [field-values :as field-values :refer [FieldValues]]] + [metabase.test + [data :as data] + [util :as tu]] + [toucan.db :as db] + [toucan.util.test :as tt])) + +;;; --------------------------------------------------- Helper Fns --------------------------------------------------- + +(defn- insert-range-sql + "Generate SQL to insert a row for each number in `rang`." + [rang] + (str "INSERT INTO blueberries_consumed (num) VALUES " + (str/join ", " (for [n rang] + (str "(" n ")"))))) + +(def ^:private ^:dynamic *conn* nil) + +(defn- do-with-blueberries-db + "An empty canvas upon which you may paint your dreams. + + Creates a database with a single table, `blueberries_consumed`, with one column, `num`; binds this DB to + `data/*get-db*` so you can use `data/db` and `data/id` to access it." + {:style/indent 0} + [f] + (let [details {:db (str "mem:" (tu/random-name) ";DB_CLOSE_DELAY=10")}] + (binding [mdb/*allow-potentailly-unsafe-connections* true] + (tt/with-temp Database [db {:engine :h2, :details details}] + (jdbc/with-db-connection [conn (sql/connection-details->spec (driver/engine->driver :h2) details)] + (jdbc/execute! conn ["CREATE TABLE blueberries_consumed (num INTEGER NOT NULL);"]) + (binding [data/*get-db* (constantly db)] + (binding [*conn* conn] + (f)))))))) + +(defmacro ^:private with-blueberries-db {:style/indent 0} [& body] + `(do-with-blueberries-db (fn [] ~@body))) + +(defn- insert-blueberries-and-sync! + "With the temp blueberries db from above, insert a `range` of values and re-sync the DB. + + (insert-blueberries-and-sync! [0 1 2 3]) ; insert 4 rows" + [rang] + (jdbc/execute! *conn* [(insert-range-sql rang)]) + (sync-database! (data/db))) + + +;;; ---------------------------------------------- The Tests Themselves ---------------------------------------------- + +;; A Field with 50 values should get marked as `auto-list` on initial sync, because it should be 'list', but was +;; marked automatically, as opposed to explicitly (`list`) +(expect + :auto-list + (with-blueberries-db + ;; insert 50 rows & sync + (insert-blueberries-and-sync! (range 50)) + ;; has_field_values should be auto-list + (db/select-one-field :has_field_values Field :id (data/id :blueberries_consumed :num)))) + +;; ... and it should also have some FieldValues +(expect + #metabase.models.field_values.FieldValuesInstance + {:values [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 + 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49] + :human_readable_values {}} + (with-blueberries-db + (insert-blueberries-and-sync! (range 50)) + (db/select-one [FieldValues :values :human_readable_values], :field_id (data/id :blueberries_consumed :num)))) + +;; ok, but if the number grows past the threshold & we sync again it should get unmarked as auto-list and set back to +;; `nil` (#3215) +(expect + nil + (with-blueberries-db + ;; insert 50 bloobs & sync. has_field_values should be auto-list + (insert-blueberries-and-sync! (range 50)) + (assert (= (db/select-one-field :has_field_values Field :id (data/id :blueberries_consumed :num)) + :auto-list)) + ;; now insert enough bloobs to put us over the limit and re-sync. + (insert-blueberries-and-sync! (range 50 (+ 100 field-values/auto-list-cardinality-threshold))) + ;; has_field_values should have been set to nil. + (db/select-one-field :has_field_values Field :id (data/id :blueberries_consumed :num)))) + +;; ...its FieldValues should also get deleted. +(expect + nil + (with-blueberries-db + ;; do the same steps as the test above... + (insert-blueberries-and-sync! (range 50)) + (insert-blueberries-and-sync! (range 50 (+ 100 field-values/auto-list-cardinality-threshold))) + ;; ///and FieldValues should also have been deleted. + (db/select-one [FieldValues :values :human_readable_values], :field_id (data/id :blueberries_consumed :num)))) + +;; If we had explicitly marked the Field as `list` (instead of `auto-list`), adding extra values shouldn't change +;; anything! +(expect + :list + (with-blueberries-db + ;; insert 50 bloobs & sync + (insert-blueberries-and-sync! (range 50)) + ;; change has_field_values to list + (db/update! Field (data/id :blueberries_consumed :num) :has_field_values "list") + ;; insert more bloobs & re-sync + (insert-blueberries-and-sync! (range 50 (+ 100 field-values/auto-list-cardinality-threshold))) + ;; has_field_values shouldn't change + (db/select-one-field :has_field_values Field :id (data/id :blueberries_consumed :num)))) + +;; it should still have FieldValues, and have new ones for the new Values. It should have 200 values even though this +;; is past the normal limit of 100 values! +(expect + #metabase.models.field_values.FieldValuesInstance + {:values [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 + 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 + 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 + 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 + 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 + 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 + 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 + 189 190 191 192 193 194 195 196 197 198 199] + :human_readable_values {}} + (with-blueberries-db + ;; follow the same steps as the test above... + (insert-blueberries-and-sync! (range 50)) + (db/update! Field (data/id :blueberries_consumed :num) :has_field_values "list") + (insert-blueberries-and-sync! (range 50 (+ 100 field-values/auto-list-cardinality-threshold))) + ;; ... and FieldValues should still be there, but this time updated to include the new values! + (db/select-one [FieldValues :values :human_readable_values], :field_id (data/id :blueberries_consumed :num)))) diff --git a/test/metabase/sync_database_test.clj b/test/metabase/sync_database_test.clj index ec5ea00bc892350c0fb9957244bedee16b66c525..5f34d68e4eb73d9b9e6bd5d8e81a1b6a67aa5fab 100644 --- a/test/metabase/sync_database_test.clj +++ b/test/metabase/sync_database_test.clj @@ -1,15 +1,11 @@ (ns metabase.sync-database-test "Tests for sync behavior that use a imaginary `SyncTestDriver`. These are kept around mainly because they've already been written. For newer sync tests see `metabase.sync.*` test namespaces." - (:require [clojure.java.jdbc :as jdbc] - [clojure.string :as str] - [expectations :refer :all] + (:require [expectations :refer :all] [metabase - [db :as mdb] [driver :as driver] [sync :refer :all] [util :as u]] - [metabase.driver.generic-sql :as sql] [metabase.models [database :refer [Database]] [field :refer [Field]] @@ -22,7 +18,6 @@ [toucan.db :as db] [toucan.util.test :as tt])) - (def ^:private ^:const sync-test-tables {"movie" {:name "movie" :schema "default" @@ -357,38 +352,8 @@ (do (sync-table! (Table (data/id :venues))) (get-field-values))])) -;; Make sure that if a Field's cardinality passes `list-cardinality-threshold` (currently 100) the corresponding -;; FieldValues entry will be deleted (#3215) -(defn- insert-range-sql - "Generate SQL to insert a row for each number in `rang`." - [rang] - (str "INSERT INTO blueberries_consumed (num) VALUES " - (str/join ", " (for [n rang] - (str "(" n ")"))))) - -(expect - false - (let [details {:db (str "mem:" (tu/random-name) ";DB_CLOSE_DELAY=10")}] - (binding [mdb/*allow-potentailly-unsafe-connections* true] - (tt/with-temp Database [db {:engine :h2, :details details}] - (jdbc/with-db-connection [conn (sql/connection-details->spec (driver/engine->driver :h2) details)] - (let [exec! #(doseq [statement %] - (jdbc/execute! conn [statement]))] - ;; create the `blueberries_consumed` table and insert 50 values - (exec! ["CREATE TABLE blueberries_consumed (num INTEGER NOT NULL);" - (insert-range-sql (range 50))]) - (sync-database! db) - (let [table-id (db/select-one-id Table :db_id (u/get-id db)) - field-id (db/select-one-id Field :table_id table-id)] - ;; field values should exist... - (assert (= (count (db/select-one-field :values FieldValues :field_id field-id)) - 50)) - ;; ok, now insert enough rows to push the field past the `list-cardinality-threshold` and sync again, - ;; there should be no more field values - (exec! [(insert-range-sql (range 50 (+ 100 field-values/list-cardinality-threshold)))]) - (sync-database! db) - (db/exists? FieldValues :field_id field-id)))))))) +;; TODO - hey, what is this testing? If you wrote this test, please explain what's going on here (defn- narrow-to-min-max [row] (-> row (get-in [:type :type/Number])