From 1ec961f64b1b796446c1d6979bec004ce5453f69 Mon Sep 17 00:00:00 2001 From: Ryan Senior <ryan@metabase.com> Date: Mon, 27 Mar 2017 19:12:40 -0500 Subject: [PATCH] Added min/max metadata to numerics and a default binning strategy This commit adds new min/max metadata to all numeric columns. That metadata is used in the automatic binning of breakout fields. This commit also extends MBQL to indicate that a breakout column (or columns) should be binned using a "default" strategy. The query must also include the maximum number of bins. Using the number of bins with the min/max values, the bin width is computed and used in the SQL query to group data. --- resources/migrations/000_migrations.yaml | 20 +++ src/metabase/db/metadata_queries.clj | 8 ++ .../driver/generic_sql/query_processor.clj | 62 ++++++--- src/metabase/query_processor/annotate.clj | 4 +- src/metabase/query_processor/expand.clj | 4 + src/metabase/query_processor/interface.clj | 12 +- src/metabase/query_processor/resolve.clj | 24 ++-- src/metabase/sync_database/analyze.clj | 24 +++- src/metabase/sync_database/interface.clj | 4 +- test/metabase/api/database_test.clj | 8 +- test/metabase/api/field_test.clj | 4 +- test/metabase/api/table_test.clj | 20 ++- test/metabase/driver/generic_sql_test.clj | 16 ++- .../query_processor/expand_resolve_test.clj | 120 +++++++++++------- test/metabase/query_processor_test.clj | 45 +++++-- .../query_processor_test/breakout_test.clj | 27 +++- .../query_processor_test/expressions_test.clj | 2 +- .../field_visibility_test.clj | 20 ++- .../sync_database/sync_dynamic_test.clj | 4 +- test/metabase/sync_database/sync_test.clj | 4 +- test/metabase/sync_database_test.clj | 15 ++- test/metabase/test/mock/moviedb.clj | 4 +- .../test/mock/schema_per_customer.clj | 4 +- test/metabase/test/mock/toucanery.clj | 4 +- 24 files changed, 332 insertions(+), 127 deletions(-) diff --git a/resources/migrations/000_migrations.yaml b/resources/migrations/000_migrations.yaml index 6c2af3c3761..b63f579201e 100644 --- a/resources/migrations/000_migrations.yaml +++ b/resources/migrations/000_migrations.yaml @@ -3646,3 +3646,23 @@ databaseChangeLog: columns: - column: name: dashboard_id + - changeSet: + id: 56 + author: senior + changes: + - addColumn: + tableName: metabase_field + columns: + - column: + name: min_value + type: float + constraints: + nullable: true + - addColumn: + tableName: metabase_field + columns: + - column: + name: max_value + type: float + constraints: + nullable: true diff --git a/src/metabase/db/metadata_queries.clj b/src/metabase/db/metadata_queries.clj index 1d0ff0b4529..def6feb7ac2 100644 --- a/src/metabase/db/metadata_queries.clj +++ b/src/metabase/db/metadata_queries.clj @@ -54,3 +54,11 @@ [{field-id :id :as field}] (-> (field-query field (ql/aggregation {} (ql/count (ql/field-id field-id)))) first first int)) + +(defn field-max-min + "Returns a pair [max min] for the given `FIELD`" + [{field-id :id :as field}] + (->> (field-query field (ql/aggregation {} [(ql/min (ql/field-id field-id)) + (ql/max (ql/field-id field-id))])) + first + (map double))) diff --git a/src/metabase/driver/generic_sql/query_processor.clj b/src/metabase/driver/generic_sql/query_processor.clj index 73e8c0623f9..9de08ad7677 100644 --- a/src/metabase/driver/generic_sql/query_processor.clj +++ b/src/metabase/driver/generic_sql/query_processor.clj @@ -17,7 +17,7 @@ [metabase.util.honeysql-extensions :as hx]) (:import clojure.lang.Keyword java.sql.SQLException - [metabase.query_processor.interface AgFieldRef DateTimeField DateTimeValue Expression ExpressionRef Field RelativeDateTimeValue Value])) + [metabase.query_processor.interface AgFieldRef BinnedField DateTimeField DateTimeValue Expression ExpressionRef Field RelativeDateTimeValue Value])) (def ^:dynamic *query* "The outer query currently being processed." @@ -33,11 +33,18 @@ ;;; ## Formatting +(defn qualified-alias + "Convert the given `FIELD` to a stringified alias" + [field] + (some->> field + (sql/field->alias (driver)) + hx/qualify-and-escape-dots)) + (defn as "Generate a FORM `AS` FIELD alias using the name information of FIELD." [form field] - (if-let [alias (sql/field->alias (driver) field)] - [form (hx/qualify-and-escape-dots alias)] + (if-let [alias (qualified-alias field)] + [form alias] form)) ;; TODO - Consider moving this into query processor interface and making it a method on `ExpressionRef` instead ? @@ -81,6 +88,17 @@ (formatted [{unit :unit, field :field}] (sql/date (driver) unit (formatted field))) + BinnedField + (formatted [{{:keys [min-value max-value] :as field} :field bins :num-bins}] + (let [bin-width (Math/ceil (/ (- (Math/floor max-value) + (Math/floor min-value)) + bins))] + (-> field + formatted + (hx// bin-width) + hx/floor + (hx/* bin-width)))) + ;; e.g. the ["aggregation" 0] fields we allow in order-by AgFieldRef (formatted [{index :index}] @@ -158,17 +176,18 @@ form (recur form more))))) - (defn apply-breakout "Apply a `breakout` clause to HONEYSQL-FORM. Default implementation of `apply-breakout` for SQL drivers." - [_ honeysql-form {breakout-fields :breakout, fields-fields :fields}] - (-> honeysql-form - ;; Group by all the breakout fields - ((partial apply h/group) (map formatted breakout-fields)) - ;; Add fields form only for fields that weren't specified in :fields clause -- we don't want to include it twice, or HoneySQL will barf - ((partial apply h/merge-select) (for [field breakout-fields - :when (not (contains? (set fields-fields) field))] - (as (formatted field) field))))) + [_ honeysql-form {breakout-fields :breakout, fields-fields :fields :as query}] + (as-> honeysql-form new-hsql + (apply h/merge-select new-hsql (for [field breakout-fields + :when (not (contains? (set fields-fields) field))] + (as (formatted field) field))) + (apply h/group new-hsql (map (fn [field] + (if (instance? BinnedField field) + (qualified-alias field) + (formatted field))) + breakout-fields)))) (defn apply-fields "Apply a `fields` clause to HONEYSQL-FORM. Default implementation of `apply-fields` for SQL drivers." @@ -227,14 +246,17 @@ (defn apply-order-by "Apply `order-by` clause to HONEYSQL-FORM. Default implementation of `apply-order-by` for SQL drivers." - [_ honeysql-form {subclauses :order-by}] - (loop [honeysql-form honeysql-form, [{:keys [field direction]} & more] subclauses] - (let [honeysql-form (h/merge-order-by honeysql-form [(formatted field) (case direction - :ascending :asc - :descending :desc)])] - (if (seq more) - (recur honeysql-form more) - honeysql-form)))) + [_ honeysql-form {subclauses :order-by breakout-fields :breakout}] + (let [[{:keys [special-type] :as first-breakout-field}] breakout-fields] + (loop [honeysql-form honeysql-form, [{:keys [field direction]} & more] subclauses] + (let [honeysql-form (h/merge-order-by honeysql-form (if (instance? BinnedField field) + (qualified-alias field) + [(formatted field) (case direction + :ascending :asc + :descending :desc)]))] + (if (seq more) + (recur honeysql-form more) + honeysql-form))))) (defn apply-page "Apply `page` clause to HONEYSQL-FORM. Default implementation of `apply-page` for SQL drivers." diff --git a/src/metabase/query_processor/annotate.clj b/src/metabase/query_processor/annotate.clj index 8200086f5f8..5468c76ea29 100644 --- a/src/metabase/query_processor/annotate.clj +++ b/src/metabase/query_processor/annotate.clj @@ -196,7 +196,9 @@ :schema-name :schema_name :special-type :special_type :table-id :table_id - :visibility-type :visibility_type}) + :visibility-type :visibility_type + :min-value :min_value + :max-value :max_value}) (dissoc :position :clause-position :parent :parent-id :table-name)))) (defn- fk-field->dest-fn diff --git a/src/metabase/query_processor/expand.clj b/src/metabase/query_processor/expand.clj index a6a1d3c7bc8..ee61ef2c85c 100644 --- a/src/metabase/query_processor/expand.clj +++ b/src/metabase/query_processor/expand.clj @@ -181,6 +181,10 @@ ;;; ## breakout & fields +(s/defn ^:ql ^:always-validate binning-strategy :- FieldPlaceholder + "Reference to a `BinnedField`. This is just a `Field` reference with an associated `STRATEGY-NAME` and `STRATEGY-PARAM`" + ([f strategy-name strategy-param] (assoc (field f) :binning-strategy strategy-param))) + (defn- fields-list-clause ([k query] query) ([k query & fields] (assoc query k (mapv field fields)))) diff --git a/src/metabase/query_processor/interface.clj b/src/metabase/query_processor/interface.clj index 05397e945ad..858425dc314 100644 --- a/src/metabase/query_processor/interface.clj +++ b/src/metabase/query_processor/interface.clj @@ -87,7 +87,9 @@ description :- (s/maybe su/NonBlankString) parent-id :- (s/maybe su/IntGreaterThanZero) ;; Field once its resolved; FieldPlaceholder before that - parent :- s/Any] + parent :- s/Any + min-value :- (s/maybe s/Num) + max-value :- (s/maybe s/Num)] clojure.lang.Named (getName [_] field-name) ; (name <field>) returns the *unqualified* name of the field, #obvi @@ -128,6 +130,11 @@ clojure.lang.Named (getName [_] (name field))) +(s/defrecord BinnedField [field :- Field + num-bins :- s/Int] + clojure.lang.Named + (getName [_] (name field))) + (s/defrecord ExpressionRef [expression-name :- su/NonBlankString] clojure.lang.Named (getName [_] expression-name) @@ -174,7 +181,8 @@ fk-field-id :- (s/maybe (s/constrained su/IntGreaterThanZero (fn [_] (or (assert-driver-supports :foreign-keys) true)) "foreign-keys is not supported by this driver.")) - datetime-unit :- (s/maybe (apply s/enum datetime-field-units))]) + datetime-unit :- (s/maybe (apply s/enum datetime-field-units)) + binning-strategy :- (s/maybe s/Int)]) (s/defrecord AgFieldRef [index :- s/Int]) diff --git a/src/metabase/query_processor/resolve.clj b/src/metabase/query_processor/resolve.clj index 1d1747d15df..72f9df2b849 100644 --- a/src/metabase/query_processor/resolve.clj +++ b/src/metabase/query_processor/resolve.clj @@ -28,7 +28,9 @@ :visibility_type :visibility-type :base_type :base-type :table_id :table-id - :parent_id :parent-id})) + :parent_id :parent-id + :min_value :min-value + :max_value :max-value})) ;;; # ------------------------------------------------------------ IRESOLVE PROTOCOL ------------------------------------------------------------ @@ -98,17 +100,19 @@ ;;; ## ------------------------------------------------------------ FIELD PLACEHOLDER ------------------------------------------------------------ -(defn- field-ph-resolve-field [{:keys [field-id datetime-unit fk-field-id], :as this} field-id->field] +(defn- field-ph-resolve-field [{:keys [field-id datetime-unit fk-field-id binning-strategy], :as this} field-id->field] (if-let [{:keys [base-type special-type], :as field} (some-> (field-id->field field-id) i/map->Field (assoc :fk-field-id fk-field-id))] - ;; try to resolve the Field with the ones available in field-id->field - (let [datetime-field? (or (isa? base-type :type/DateTime) - (isa? special-type :type/DateTime))] - (if-not datetime-field? - field - (i/map->DateTimeField {:field field - :unit (or datetime-unit :day)}))) ; default to `:day` if a unit wasn't specified + (cond + (or (isa? base-type :type/DateTime) + (isa? special-type :type/DateTime)) + (i/map->DateTimeField {:field field + :unit (or datetime-unit :day)}) ; default to `:day` if a unit wasn't specified + binning-strategy + (i/map->BinnedField {:field field + :num-bins binning-strategy}) + :else field) ;; If that fails just return ourselves as-is this)) @@ -192,7 +196,7 @@ ;; If there are no more Field IDs to resolve we're done. expanded-query-dict ;; Otherwise fetch + resolve the Fields in question - (let [fields (->> (u/key-by :id (db/select [field/Field :name :display_name :base_type :special_type :visibility_type :table_id :parent_id :description :id] + (let [fields (->> (u/key-by :id (db/select [field/Field :name :display_name :base_type :special_type :visibility_type :table_id :parent_id :description :id :max_value :min_value] :visibility_type [:not= "sensitive"] :id [:in field-ids])) (m/map-vals rename-mb-field-keys) diff --git a/src/metabase/sync_database/analyze.clj b/src/metabase/sync_database/analyze.clj index 87985f8b622..58e303f8071 100644 --- a/src/metabase/sync_database/analyze.clj +++ b/src/metabase/sync_database/analyze.clj @@ -75,6 +75,17 @@ (and (nil? (:special_type field)) (pos? (count distinct-values))) (assoc :special-type :type/Category)))) +(defn numeric-type? [{:keys [base_type] :as field}] + (isa? base_type :type/Number)) + +(defn test:add-min-max + "Add min/max values for numeric types" + [field field-stats] + (let [[max-val min-val] (queries/field-max-min field)] + (assoc field-stats + :min-value min-val + :max-value max-val))) + (defn- test:no-preview-display "If FIELD's is textual and its average length is too great, mark it so it isn't displayed in the UI." [driver field field-stats] @@ -198,8 +209,9 @@ :fields (for [{:keys [id] :as field} (table/fields table)] (let [new-field? (contains? new-field-ids id)] (cond->> {:id id} - (test-for-cardinality? field new-field?) (test:cardinality-and-extract-field-values field) - new-field? (test:new-field driver field))))}))) + (test-for-cardinality? field new-field?) (test:cardinality-and-extract-field-values field) + (numeric-type? field) (test:add-min-max field) + new-field? (test:new-field driver field))))}))) (defn generic-analyze-table "An implementation of `analyze-table` using the defaults (`default-field-avg-length` and `field-percent-urls`)." @@ -221,13 +233,15 @@ (db/update! table/Table table-id, :rows (:row_count table-stats))) ;; update individual fields - (doseq [{:keys [id preview-display special-type values]} (:fields table-stats)] + (doseq [{:keys [id preview-display special-type values min-value max-value]} (:fields table-stats)] ;; set Field metadata we may have detected - (when (and id (or preview-display special-type)) + (when (and id (or preview-display special-type min-value max-value)) (db/update-non-nil-keys! field/Field id ;; if a field marked `preview-display` as false then set the visibility type to `:details-only` (see models.field/visibility-types) :visibility_type (when (false? preview-display) :details-only) - :special_type special-type)) + :special_type special-type + :min_value min-value + :max_value max-value)) ;; handle field values, setting them if applicable otherwise clearing them (if (and id values (pos? (count (filter identity values)))) (field-values/save-field-values! id values) diff --git a/src/metabase/sync_database/interface.clj b/src/metabase/sync_database/interface.clj index bc931754bb0..dcd2b134a9c 100644 --- a/src/metabase/sync_database/interface.clj +++ b/src/metabase/sync_database/interface.clj @@ -9,7 +9,9 @@ (s/optional-key :fields) [{:id su/IntGreaterThanZero (s/optional-key :special-type) su/FieldType (s/optional-key :preview-display) s/Bool - (s/optional-key :values) [s/Any]}]}) + (s/optional-key :values) [s/Any] + (s/optional-key :min-value) s/Num + (s/optional-key :max-value) s/Num}]}) (def DescribeDatabase "Schema for the expected output of `describe-database`." diff --git a/test/metabase/api/database_test.clj b/test/metabase/api/database_test.clj index ab97f890d3d..5a94d7de692 100644 --- a/test/metabase/api/database_test.clj +++ b/test/metabase/api/database_test.clj @@ -278,7 +278,9 @@ :visibility_type "normal" :fk_target_field_id $ :parent_id nil - :values $}) + :values $ + :min_value 1.0 + :max_value 75.0}) (match-$ (hydrate/hydrate (Field (id :categories :name)) :values) {:description nil :table_id (id :categories) @@ -300,7 +302,9 @@ :visibility_type "normal" :fk_target_field_id $ :parent_id nil - :values $})] + :values $ + :min_value nil + :max_value nil})] :segments [] :metrics [] :rows 75 diff --git a/test/metabase/api/field_test.clj b/test/metabase/api/field_test.clj index 951da88272f..6564bdde4dd 100644 --- a/test/metabase/api/field_test.clj +++ b/test/metabase/api/field_test.clj @@ -69,7 +69,9 @@ :created_at $ :base_type "type/Text" :fk_target_field_id nil - :parent_id nil}) + :parent_id nil + :min_value nil + :max_value nil}) ((user->client :rasta) :get 200 (format "field/%d" (id :users :name)))) diff --git a/test/metabase/api/table_test.clj b/test/metabase/api/table_test.clj index f323184f32e..2074cad07d3 100644 --- a/test/metabase/api/table_test.clj +++ b/test/metabase/api/table_test.clj @@ -73,7 +73,9 @@ :visibility_type "normal" :caveats nil :points_of_interest nil - :parent_id nil}) + :parent_id nil + :min_value nil + :max_value nil}) ;; ## GET /api/table @@ -147,7 +149,9 @@ :base_type "type/BigInteger" :fk_target_field_id $ :raw_column_id $ - :last_analyzed $})) + :last_analyzed $ + :min_value 1.0 + :max_value 75.0})) (merge defaults (match-$ (Field (id :categories :name)) {:special_type "type/Name" :name "NAME" @@ -207,7 +211,9 @@ :visibility_type "normal" :fk_target_field_id $ :raw_column_id $ - :last_analyzed $})) + :last_analyzed $ + :min_value 1.0 + :max_value 15.0})) (merge defaults (match-$ (Field (id :users :last_login)) {:special_type nil :name "LAST_LOGIN" @@ -286,7 +292,9 @@ :base_type "type/BigInteger" :fk_target_field_id $ :raw_column_id $ - :last_analyzed $})) + :last_analyzed $ + :min_value 1.0 + :max_value 15.0})) (merge defaults (match-$ (Field (id :users :last_login)) {:special_type nil :name "LAST_LOGIN" @@ -420,6 +428,8 @@ :created_at $ :updated_at $ :last_analyzed $ + :min_value 1.0 + :max_value 15.0 :table (merge (dissoc (table-defaults) :segments :field_values :metrics) (match-$ (Table (id :checkins)) {:schema "PUBLIC" @@ -445,6 +455,8 @@ :created_at $ :updated_at $ :last_analyzed $ + :min_value 1.0 + :max_value 15.0 :table (merge (dissoc (table-defaults) :db :segments :field_values :metrics) (match-$ (Table (id :users)) {:schema "PUBLIC" diff --git a/test/metabase/driver/generic_sql_test.clj b/test/metabase/driver/generic_sql_test.clj index b78b80cc98f..eb46a9b808d 100644 --- a/test/metabase/driver/generic_sql_test.clj +++ b/test/metabase/driver/generic_sql_test.clj @@ -68,15 +68,19 @@ ;; ANALYZE-TABLE - (expect {:row_count 100, - :fields [{:id (id :venues :category_id)} - {:id (id :venues :id)} - {:id (id :venues :latitude)} - {:id (id :venues :longitude)} + :fields [{:id (id :venues :category_id) + :min-value 2.0, :max-value 74.0} + {:id (id :venues :id) + :min-value 1.0, :max-value 100.0} + {:id (id :venues :latitude) + :min-value 10.0646, :max-value 40.7794} + {:id (id :venues :longitude) + :min-value -165.374, :max-value -73.9533} {:id (id :venues :name), :values (db/select-one-field :values 'FieldValues, :field_id (id :venues :name))} - {:id (id :venues :price), :values [1 2 3 4]}]} + {:id (id :venues :price), :values [1 2 3 4] + :min-value 1.0, :max-value 4.0}]} (driver/analyze-table (H2Driver.) @venues-table (set (mapv :id (table/fields @venues-table))))) (resolve-private-vars metabase.driver.generic-sql field-avg-length field-values-lazy-seq table-rows-seq) diff --git a/test/metabase/query_processor/expand_resolve_test.clj b/test/metabase/query_processor/expand_resolve_test.clj index cb381f32c16..3288ace2a20 100644 --- a/test/metabase/query_processor/expand_resolve_test.clj +++ b/test/metabase/query_processor/expand_resolve_test.clj @@ -27,12 +27,14 @@ :type :query :query {:source-table (id :venues) :filter {:filter-type :> - :field {:field-id (id :venues :price) - :fk-field-id nil - :datetime-unit nil} - :value {:field-placeholder {:field-id (id :venues :price) - :fk-field-id nil - :datetime-unit nil} + :field {:field-id (id :venues :price) + :fk-field-id nil + :datetime-unit nil + :binning-strategy nil} + :value {:field-placeholder {:field-id (id :venues :price) + :fk-field-id nil + :datetime-unit nil + :binning-strategy nil} :value 1}}}} ;; resolved form {:database (id) @@ -54,7 +56,9 @@ :position nil :description nil :parent-id nil - :parent nil} + :parent nil + :min-value 1.0 + :max-value 4.0} :value {:value 1 :field {:field-id (id :venues :price) :fk-field-id nil @@ -69,12 +73,14 @@ :position nil :description nil :parent-id nil - :parent nil}}} + :parent nil + :min-value 1.0 + :max-value 4.0}}} :join-tables nil} :fk-field-ids #{} :table-ids #{(id :venues)}}] (let [expanded-form (ql/expand (wrap-inner-query (query venues - (ql/filter (ql/and (ql/> $price 1))))))] + (ql/filter (ql/and (ql/> $price 1))))))] (mapv obj->map [expanded-form (resolve/resolve expanded-form)]))) @@ -86,12 +92,14 @@ :type :query :query {:source-table (id :venues) :filter {:filter-type := - :field {:field-id (id :categories :name) - :fk-field-id (id :venues :category_id) - :datetime-unit nil} - :value {:field-placeholder {:field-id (id :categories :name) - :fk-field-id (id :venues :category_id) - :datetime-unit nil} + :field {:field-id (id :categories :name) + :fk-field-id (id :venues :category_id) + :datetime-unit nil + :binning-strategy nil} + :value {:field-placeholder {:field-id (id :categories :name) + :fk-field-id (id :venues :category_id) + :datetime-unit nil + :binning-strategy nil} :value "abc"}}}} ;; resolved form {:database (id) @@ -113,7 +121,9 @@ :position nil :description nil :parent-id nil - :parent nil} + :parent nil + :min-value nil + :max-value nil} :value {:value "abc" :field {:field-id (id :categories :name) :fk-field-id (id :venues :category_id) @@ -128,7 +138,9 @@ :position nil :description nil :parent-id nil - :parent nil}}} + :parent nil + :min-value nil + :max-value nil}}} :join-tables [{:source-field {:field-id (id :venues :category_id) :field-name "CATEGORY_ID"} :pk-field {:field-id (id :categories :id) @@ -140,8 +152,8 @@ :fk-field-ids #{(id :venues :category_id)} :table-ids #{(id :categories)}}] (let [expanded-form (ql/expand (wrap-inner-query (query venues - (ql/filter (ql/= $category_id->categories.name - "abc")))))] + (ql/filter (ql/= $category_id->categories.name + "abc")))))] (mapv obj->map [expanded-form (resolve/resolve expanded-form)]))) @@ -153,12 +165,14 @@ :type :query :query {:source-table (id :checkins) :filter {:filter-type :> - :field {:field-id (id :users :last_login) - :fk-field-id (id :checkins :user_id) - :datetime-unit :year} - :value {:field-placeholder {:field-id (id :users :last_login) - :fk-field-id (id :checkins :user_id) - :datetime-unit :year} + :field {:field-id (id :users :last_login) + :fk-field-id (id :checkins :user_id) + :datetime-unit :year + :binning-strategy nil} + :value {:field-placeholder {:field-id (id :users :last_login) + :fk-field-id (id :checkins :user_id) + :datetime-unit :year + :binning-strategy nil} :value "1980-01-01"}}}} ;; resolved form {:database (id) @@ -180,7 +194,9 @@ :position nil :description nil :parent-id nil - :parent nil} + :parent nil + :min-value nil + :max-value nil} :unit :year} :value {:value (u/->Timestamp "1980-01-01") :field {:field {:field-id (id :users :last_login) @@ -196,7 +212,9 @@ :position nil :description nil :parent-id nil - :parent nil} + :parent nil + :min-value nil + :max-value nil} :unit :year}}} :join-tables [{:source-field {:field-id (id :checkins :user_id) :field-name "USER_ID"} @@ -225,32 +243,36 @@ :custom-name nil :field {:field-id (id :venues :price) :fk-field-id (id :checkins :venue_id) - :datetime-unit nil}}] + :datetime-unit nil + :binning-strategy nil}}] :breakout [{:field-id (id :checkins :date) :fk-field-id nil - :datetime-unit :day-of-week}]}} + :datetime-unit :day-of-week + :binning-strategy nil}]}} ;; resolved form {:database (id) :type :query :query {:source-table {:schema "PUBLIC" :name "CHECKINS" :id (id :checkins)} - :aggregation [{:aggregation-type :sum - :custom-name nil - :field {:description nil - :base-type :type/Integer - :parent nil - :table-id (id :venues) - :special-type :type/Category - :field-name "PRICE" - :field-display-name "Price" - :parent-id nil - :visibility-type :normal - :position nil - :field-id (id :venues :price) - :fk-field-id (id :checkins :venue_id) - :table-name "VENUES__via__VENUE_ID" - :schema-name nil}}] + :aggregation [{:aggregation-type :sum + :custom-name nil + :field {:description nil + :base-type :type/Integer + :parent nil + :table-id (id :venues) + :special-type :type/Category + :field-name "PRICE" + :field-display-name "Price" + :parent-id nil + :visibility-type :normal + :position nil + :field-id (id :venues :price) + :fk-field-id (id :checkins :venue_id) + :table-name "VENUES__via__VENUE_ID" + :schema-name nil + :min-value 1.0 + :max-value 4.0}}] :breakout [{:field {:description nil :base-type :type/Date :parent nil @@ -264,7 +286,9 @@ :field-id (id :checkins :date) :fk-field-id nil :table-name "CHECKINS" - :schema-name "PUBLIC"} + :schema-name "PUBLIC" + :min-value nil + :max-value nil} :unit :day-of-week}] :join-tables [{:source-field {:field-id (id :checkins :venue_id) :field-name "VENUE_ID"} @@ -277,7 +301,7 @@ :fk-field-ids #{(id :checkins :venue_id)} :table-ids #{(id :venues) (id :checkins)}}] (let [expanded-form (ql/expand (wrap-inner-query (query checkins - (ql/aggregation (ql/sum $venue_id->venues.price)) - (ql/breakout (ql/datetime-field $checkins.date :day-of-week)))))] + (ql/aggregation (ql/sum $venue_id->venues.price)) + (ql/breakout (ql/datetime-field $checkins.date :day-of-week)))))] (mapv obj->map [expanded-form (resolve/resolve expanded-form)]))) diff --git a/test/metabase/query_processor_test.clj b/test/metabase/query_processor_test.clj index dec254d8d3b..87fb329b7ce 100644 --- a/test/metabase/query_processor_test.clj +++ b/test/metabase/query_processor_test.clj @@ -90,11 +90,13 @@ :visibility_type :normal :schema_name (data/default-schema) :source :fields - :fk_field_id nil}) + :fk_field_id nil + :min_value nil + :max_value nil}) (defn- target-field [field] (when (data/fks-supported?) - (dissoc field :target :extra_info :schema_name :source :fk_field_id))) + (dissoc field :target :extra_info :schema_name :source :fk_field_id :min_value :max_value))) (defn categories-col "Return column information for the `categories` column named by keyword COL." @@ -125,7 +127,9 @@ :id {:special_type :type/PK :base_type (data/id-field-type) :name (data/format-name "id") - :display_name "ID"} + :display_name "ID" + :min_value 1.0 + :max_value 15.0} :name {:special_type :type/Name :base_type (data/expected-base-type->actual :type/Text) :name (data/format-name "name") @@ -153,7 +157,9 @@ :id {:special_type :type/PK :base_type (data/id-field-type) :name (data/format-name "id") - :display_name "ID"} + :display_name "ID" + :min_value 1.0 + :max_value 100.0} :category_id {:extra_info (if (data/fks-supported?) {:target_table_id (data/id :categories)} {}) @@ -163,28 +169,39 @@ :type/Category) :base_type (data/expected-base-type->actual :type/Integer) :name (data/format-name "category_id") - :display_name "Category ID"} + :display_name "Category ID" + :min_value nil + :max_value nil} :price {:special_type :type/Category :base_type (data/expected-base-type->actual :type/Integer) :name (data/format-name "price") - :display_name "Price"} + :display_name "Price" + :min_value 1.0 + :max_value 4.0} :longitude {:special_type :type/Longitude :base_type (data/expected-base-type->actual :type/Float) :name (data/format-name "longitude") - :display_name "Longitude"} + :display_name "Longitude" + :min_value nil + :max_value nil} :latitude {:special_type :type/Latitude :base_type (data/expected-base-type->actual :type/Float) :name (data/format-name "latitude") - :display_name "Latitude"} + :display_name "Latitude" + :min_value nil + :max_value nil} :name {:special_type :type/Name :base_type (data/expected-base-type->actual :type/Text) :name (data/format-name "name") - :display_name "Name"}))) + :display_name "Name" + :min_value nil + :max_value nil}))) (defn venues-cols "`cols` information for all the columns in `venues`." [] - (mapv venues-col [:id :name :category_id :latitude :longitude :price])) + (mapv (comp #(assoc % :min_value nil :max_value nil) venues-col) + [:id :name :category_id :latitude :longitude :price])) ;; #### checkins (defn checkins-col @@ -208,7 +225,9 @@ :type/Category) :base_type (data/expected-base-type->actual :type/Integer) :name (data/format-name "venue_id") - :display_name "Venue ID"} + :display_name "Venue ID" + :min_value 1.0 + :max_value 100.0} :user_id {:extra_info (if (data/fks-supported?) {:target_table_id (data/id :users)} {}) :target (target-field (users-col :id)) @@ -217,7 +236,9 @@ :type/Category) :base_type (data/expected-base-type->actual :type/Integer) :name (data/format-name "user_id") - :display_name "User ID"}))) + :display_name "User ID" + :min_value 1.0 + :max_value 15.0}))) ;;; #### aggregate columns diff --git a/test/metabase/query_processor_test/breakout_test.clj b/test/metabase/query_processor_test/breakout_test.clj index 2d8ecbf89ce..40d52a69a5f 100644 --- a/test/metabase/query_processor_test/breakout_test.clj +++ b/test/metabase/query_processor_test/breakout_test.clj @@ -2,9 +2,10 @@ "Tests for the `:breakout` clause." (:require [metabase.query-processor-test :refer :all] [metabase.query-processor.expand :as ql] - [metabase.test.data :as data])) + [metabase.test.data :as data] + [metabase.util :as u])) -;;; single column +;; single column (qp-expect-with-all-engines {:rows [[1 31] [2 70] [3 75] [4 77] [5 69] [6 70] [7 76] [8 81] [9 68] [10 78] [11 74] [12 59] [13 76] [14 62] [15 34]] :columns [(data/format-name "user_id") @@ -69,3 +70,25 @@ (ql/limit 10)) booleanize-native-form (format-rows-by [int int int]))) + +(expect-with-non-timeseries-dbs + [[10.0 1] [32.0 4] [34.0 57] [36.0 29] [40.0 9]] + (format-rows-by [(partial u/round-to-decimals 1) int] + (rows (data/run-query venues + (ql/aggregation (ql/count)) + (ql/breakout (ql/binning-strategy $latitude :default 20)))))) + +(expect-with-non-timeseries-dbs + [[10.0 1] [30.0 90] [40.0 9]] + (format-rows-by [(partial u/round-to-decimals 1) int] + (rows (data/run-query venues + (ql/aggregation (ql/count)) + (ql/breakout (ql/binning-strategy $latitude :default 3)))))) + +(expect-with-non-timeseries-dbs + [[10.0 -170.0 1] [32.0 -120.0 4] [34.0 -120.0 57] [36.0 -125.0 29] [40.0 -75.0 9]] + (format-rows-by [(partial u/round-to-decimals 1) (partial u/round-to-decimals 1) int] + (rows (data/run-query venues + (ql/aggregation (ql/count)) + (ql/breakout (ql/binning-strategy $latitude :default 20) + (ql/binning-strategy $longitude :default 20)))))) diff --git a/test/metabase/query_processor_test/expressions_test.clj b/test/metabase/query_processor_test/expressions_test.clj index 4f9c6cc7acb..800e0899fdc 100644 --- a/test/metabase/query_processor_test/expressions_test.clj +++ b/test/metabase/query_processor_test/expressions_test.clj @@ -13,7 +13,7 @@ ;; Test the expansion of the expressions clause (expect {:expressions {:my-cool-new-field (qpi/map->Expression {:operator :* - :args [{:field-id 10, :fk-field-id nil, :datetime-unit nil} + :args [{:field-id 10, :fk-field-id nil, :datetime-unit nil, :binning-strategy nil} 20.0]})}} ; 20 should be converted to a FLOAT (ql/expressions {} {:my-cool-new-field (ql/* (ql/field-id 10) 20)})) diff --git a/test/metabase/query_processor_test/field_visibility_test.clj b/test/metabase/query_processor_test/field_visibility_test.clj index e012dd68df1..f9f5b1968e1 100644 --- a/test/metabase/query_processor_test/field_visibility_test.clj +++ b/test/metabase/query_processor_test/field_visibility_test.clj @@ -18,10 +18,18 @@ [(set (venues-cols)) #{(venues-col :category_id) (venues-col :name) - (venues-col :latitude) - (venues-col :id) - (venues-col :longitude) - (assoc (venues-col :price) :visibility_type :details-only)} + (assoc (venues-col :latitude) + :min_value nil + :max_value nil) + (assoc (venues-col :id) + :min_value nil + :max_value nil) + (assoc (venues-col :longitude) + :min_value nil + :max_value nil) + (assoc (venues-col :price) :visibility_type :details-only + :min_value nil + :max_value nil)} (set (venues-cols))] [(get-col-names) (do (db/update! Field (data/id :venues :price), :visibility_type :details-only) @@ -34,7 +42,9 @@ ;;; Make sure :sensitive information fields are never returned by the QP (qp-expect-with-all-engines {:columns (->columns "id" "name" "last_login") - :cols [(users-col :id) + :cols [(assoc (users-col :id) + :min_value nil + :max_value nil) (users-col :name) (users-col :last_login)], :rows [[ 1 "Plato Yeshua"] diff --git a/test/metabase/sync_database/sync_dynamic_test.clj b/test/metabase/sync_database/sync_dynamic_test.clj index 8876096b732..efe161931a0 100644 --- a/test/metabase/sync_database/sync_dynamic_test.clj +++ b/test/metabase/sync_database/sync_dynamic_test.clj @@ -35,7 +35,9 @@ :fk_target_field_id false :last_analyzed false :created_at true - :updated_at true}) + :updated_at true + :min_value nil + :max_value nil}) ;; save-table-fields! (also covers save-nested-fields!) (expect diff --git a/test/metabase/sync_database/sync_test.clj b/test/metabase/sync_database/sync_test.clj index f41579d6f4b..970ff77ec5d 100644 --- a/test/metabase/sync_database/sync_test.clj +++ b/test/metabase/sync_database/sync_test.clj @@ -116,7 +116,9 @@ :fk_target_field_id false :last_analyzed false :created_at true - :updated_at true}) + :updated_at true + :min_value nil + :max_value nil}) ;; save-table-fields! ;; this test also covers create-field-from-field-def! and update-field-from-field-def! diff --git a/test/metabase/sync_database_test.clj b/test/metabase/sync_database_test.clj index 7a8c4628e52..cde5670128f 100644 --- a/test/metabase/sync_database_test.clj +++ b/test/metabase/sync_database_test.clj @@ -103,7 +103,9 @@ :fk_target_field_id false :created_at true :updated_at true - :last_analyzed true}) + :last_analyzed true + :min_value nil + :max_value nil}) ;; ## SYNC DATABASE @@ -325,7 +327,6 @@ (do (sync-table! @venues-table) (get-field-values))])) - ;;; -------------------- Make sure that if a Field's cardinality passes `metabase.sync-database.analyze/low-cardinality-threshold` (currently 300) (#3215) -------------------- (defn- insert-range-sql [rang] (str "INSERT INTO blueberries_consumed (num) VALUES " @@ -354,3 +355,13 @@ (exec! [(insert-range-sql (range 100 (+ 100 @(resolve 'metabase.sync-database.analyze/low-cardinality-threshold))))]) (sync-database! db, :full-sync? true) (db/exists? FieldValues :field_id field-id))))))) + +(expect + [-165.374 -73.9533 10.0646 40.7794] + (tt/with-temp* [Database [database {:details (:details (Database (id))), :engine :h2}] + Table [table {:db_id (u/get-id database), :name "VENUES"}]] + (sync-table! table) + [(db/select-one-field :min_value Field, :id (id :venues :longitude)) + (db/select-one-field :max_value Field, :id (id :venues :longitude)) + (db/select-one-field :min_value Field, :id (id :venues :latitude)) + (db/select-one-field :max_value Field, :id (id :venues :latitude))])) diff --git a/test/metabase/test/mock/moviedb.clj b/test/metabase/test/mock/moviedb.clj index f32aea0fe03..7ff287e37e4 100644 --- a/test/metabase/test/mock/moviedb.clj +++ b/test/metabase/test/mock/moviedb.clj @@ -184,7 +184,9 @@ :position 0 :visibility_type :normal :preview_display true - :created_at true}) + :created_at true + :min_value nil + :max_value nil}) (def ^:const moviedb-tables-and-fields [(merge table-defaults diff --git a/test/metabase/test/mock/schema_per_customer.clj b/test/metabase/test/mock/schema_per_customer.clj index 28e3beedaae..cb458a065df 100644 --- a/test/metabase/test/mock/schema_per_customer.clj +++ b/test/metabase/test/mock/schema_per_customer.clj @@ -282,7 +282,9 @@ :position 0 :visibility_type :normal :preview_display true - :created_at true}) + :created_at true + :min_value nil + :max_value nil}) (def ^:const schema-per-customer-tables-and-fields [(merge table-defaults diff --git a/test/metabase/test/mock/toucanery.clj b/test/metabase/test/mock/toucanery.clj index 8dd616e0fd1..9d2abe6046f 100644 --- a/test/metabase/test/mock/toucanery.clj +++ b/test/metabase/test/mock/toucanery.clj @@ -109,7 +109,9 @@ :position 0 :visibility_type :normal :preview_display true - :created_at true}) + :created_at true + :min_value nil + :max_value nil}) (def ^:const toucanery-tables-and-fields [(merge table-defaults -- GitLab