Unverified Commit 810f209d authored by metamben's avatar metamben Committed by GitHub
Browse files

Support getting the available aggregation options (#30836)

* Implement available-aggregation-operators

* Support display-info and getting columns from aggregation-operators

* Support adding aggregation operators

* Add :cum-sum to the type hierarchy
parent 991c4264
......@@ -9,12 +9,15 @@
[metabase.util :as u]))
;; Importing and re-exporting some functions defined in each implementation.
(shared.ns/import-fn common/to-range)
(shared.ns/import-fn internal/valid?)
(shared.ns/import-fn internal/same-day?)
(shared.ns/import-fn internal/same-month?)
(shared.ns/import-fn internal/same-year?)
(shared.ns/import-fn internal/day-diff)
(defn- prep-options [options]
(merge internal/default-options (u/normalize-map options)))
......@@ -10,6 +10,7 @@
[metabase.lib.ref :as lib.ref]
[metabase.lib.schema :as lib.schema]
[metabase.lib.schema.aggregation :as lib.schema.aggregation]
[metabase.lib.types.isa :as lib.types.isa]
[metabase.lib.util :as lib.util]
[metabase.shared.util.i18n :as i18n]
[metabase.util.malli :as mu]))
......@@ -190,8 +191,9 @@
((get-method lib.metadata.calculation/metadata-method :default) query stage-number clause)))
(lib.common/defop count [] [x])
(lib.common/defop avg [x])
(lib.common/defop cum-count [] [x])
(lib.common/defop count-where [x y])
(lib.common/defop avg [x])
(lib.common/defop distinct [x])
(lib.common/defop max [x])
(lib.common/defop median [x])
......@@ -200,6 +202,7 @@
(lib.common/defop share [x])
(lib.common/defop stddev [x])
(lib.common/defop sum [x])
(lib.common/defop cum-sum [x])
(lib.common/defop sum-where [x y])
(lib.common/defop var [x])
......@@ -235,3 +238,75 @@
(-> (lib.metadata.calculation/metadata query stage-number aggregation)
(assoc :lib/source :source/aggregations
:lib/source-uuid (:lib/uuid (second aggregation))))))))))
(def ^:private OperatorWithColumns
[:columns {:optional true} [:sequential lib.metadata/ColumnMetadata]]]])
(defmethod lib.metadata.calculation/display-name-method :mbql.aggregation/operator
[_query _stage-number {:keys [display-info]} _display-name-style]
(:display-name (display-info)))
(defmethod lib.metadata.calculation/display-info-method :mbql.aggregation/operator
[_query _stage-number {:keys [display-info requires-column?] short-name :short}]
(assoc (display-info)
:short short-name
:requires-column requires-column?))
(mu/defn aggregation-operator-columns :- [:maybe [:sequential lib.metadata/ColumnMetadata]]
"Returns the columns for which `aggregation-operator` is applicable."
[aggregation-operator :- OperatorWithColumns]
(:columns aggregation-operator))
(mu/defn available-aggregation-operators :- [:maybe [:sequential OperatorWithColumns]]
"Returns the available aggegation operators for the stage with `stage-number` of `query`.
If `stage-number` is omitted, uses the last stage."
(available-aggregation-operators query -1))
([query :- ::lib.schema/query
stage-number :- :int]
(let [db-features (or (:features (lib.metadata/database query)) #{})
stage (lib.util/query-stage query stage-number)
columns (lib.metadata.calculation/visible-columns query stage-number stage)
with-columns (fn [{:keys [requires-column? supported-field] :as operator}]
(not requires-column?)
(= supported-field :any)
(assoc operator :columns columns)
(when-let [cols (->> columns
(filterv #(lib.types.isa/field-type? supported-field %))
(assoc operator :columns cols))))]
(into []
(comp (filter (fn [op]
(let [feature (:driver-feature op)]
(or (nil? feature) (db-features feature)))))
(keep with-columns)
(map #(assoc % :lib/type :mbql.aggregation/operator)))
(mu/defn aggregation-clause
"Returns a standalone aggregation clause for an `aggregation-operator` and
a `column`.
For aggregations requiring an argument `column` is mandatory, otherwise
it is optional."
(if-not (:requires-column? aggregation-operator)
{:lib/type :lib/external-op
:operator (:short aggregation-operator)}
(throw (ex-info (lib.util/format "aggregation operator %s requires an argument"
(:short aggregation-operator))
{:aggregation-operator aggregation-operator}))))
([aggregation-operator column]
{:lib/type :lib/external-op
:operator (:short aggregation-operator)
:args [column]}))
......@@ -37,7 +37,9 @@
(defmethod ->op-arg :lib/external-op
[query stage-number {:keys [operator options args] :or {options {}}}]
(->op-arg query stage-number (lib.options/ensure-uuid (into [(keyword operator) options] args))))
(->op-arg query stage-number (lib.options/ensure-uuid (into [(keyword operator) options]
(map #(->op-arg query stage-number %))
(defmethod ->op-arg :dispatch-type/fn
[query stage-number f]
......@@ -54,9 +54,12 @@
......@@ -288,7 +288,7 @@
[query stage-number field-ref]
(lib.temporal-bucket/available-temporal-buckets query stage-number (resolve-field-metadata query stage-number field-ref)))
(defn- fingerprint-based-default [fingerprint]
(defn- fingerprint-based-default-unit [fingerprint]
(when-let [{:keys [earliest latest]} (-> fingerprint :type :type/DateTime)]
(let [days (shared.ut/day-diff (shared.ut/coerce-to-timestamp earliest)
......@@ -310,7 +310,7 @@
(defmethod lib.temporal-bucket/available-temporal-buckets-method :metadata/field
[_query _stage-number field-metadata]
(let [effective-type ((some-fn :effective-type :base-type) field-metadata)
fingerprint-default (some-> field-metadata :fingerprint fingerprint-based-default)]
fingerprint-default (some-> field-metadata :fingerprint fingerprint-based-default-unit)]
(cond-> (cond
(isa? effective-type :type/DateTime) lib.temporal-bucket/datetime-bucket-options
(isa? effective-type :type/Date) lib.temporal-bucket/date-bucket-options
......@@ -312,3 +312,19 @@
(let [n (if (string? n) (keyword n) n)
unit (if (string? unit) (keyword unit) unit)]
(lib.core/describe-relative-datetime n unit)))
(defn ^:export available-aggregation-operators
"Get the available aggregation operators for the stage with `stage-number` of
the query `a-query`.
If `stage-number` is omitted, the last stage is used."
(available-aggregation-operators a-query -1))
([a-query stage-number]
(to-array (lib.core/available-aggregation-operators a-query stage-number))))
(defn ^:export aggregation-operator-columns
"Get the columns `aggregation-operator` can be applied to.
The columns are valid for the stage of the query that was used in
[[available-binning-strategies]] to get `available-aggregation`."
(to-array (lib.core/aggregation-operator-columns aggregation-operator)))
......@@ -3,12 +3,17 @@
[metabase.lib.hierarchy :as lib.hierarchy]
[metabase.lib.schema.expression :as expression]
[metabase.lib.schema.mbql-clause :as mbql-clause]
[metabase.shared.util.i18n :as i18n]
[metabase.util.malli.registry :as mr]))
;; count has an optional expression arg
(mbql-clause/define-catn-mbql-clause :count :- :type/Integer
[:expression [:? [:schema [:ref ::expression/number]]]])
;; cum-count has an optional expression arg
(mbql-clause/define-catn-mbql-clause :cum-count :- :type/Integer
[:expression [:? [:schema [:ref ::expression/number]]]])
(mbql-clause/define-tuple-mbql-clause :avg :- :type/Float
[:schema [:ref ::expression/number]])
......@@ -56,8 +61,13 @@
(mbql-clause/define-tuple-mbql-clause :sum
[:schema [:ref ::expression/number]])
(mbql-clause/define-tuple-mbql-clause :cum-sum
[:schema [:ref ::expression/number]])
(lib.hierarchy/derive :sum :lib.type-of/type-is-type-of-first-arg)
(lib.hierarchy/derive :cum-sum :lib.type-of/type-is-type-of-first-arg)
(mbql-clause/define-tuple-mbql-clause :sum-where
[:schema [:ref ::expression/number]]
[:schema [:ref ::expression/boolean]])
......@@ -72,6 +82,7 @@
......@@ -81,9 +92,101 @@
(mr/def ::aggregations
[:sequential {:min 1} [:ref ::aggregation]])
(def aggregation-operators
"The list of available aggregation operator.
The order of operators is relevant for the front end."
[{:short :count
:requires-column? false
:driver-feature :basic-aggregations
:display-info (fn []
{:display-name (i18n/tru "Count of rows")
:column-name (i18n/tru "Count")
:description (i18n/tru "Total number of rows in the answer.")})}
{:short :sum
:supported-field :metabase.lib.types.constants/summable
:requires-column? true
:driver-feature :basic-aggregations
:display-info (fn []
{:display-name (i18n/tru "Sum of ...")
:column-name (i18n/tru "Sum")
:description (i18n/tru "Sum of all the values of a column.")})}
{:short :avg
:supported-field :metabase.lib.types.constants/summable
:requires-column? true
:driver-feature :basic-aggregations
:display-info (fn []
{:display-name (i18n/tru "Average of ...")
:column-name (i18n/tru "Average")
:description (i18n/tru "Average of all the values of a column")})}
{:short :median
:supported-field :metabase.lib.types.constants/summable
:requires-column? true
:driver-feature :percentile-aggregations
:display-info (fn []
{:display-name (i18n/tru "Median of ...")
:column-name (i18n/tru "Median")
:description (i18n/tru "Median of all the values of a column")})}
{:short :distinct
:supported-field :any
:requires-column? true
:driver-feature :basic-aggregations
:display-info (fn []
{:display-name (i18n/tru "Number of distinct values of ...")
:column-name (i18n/tru "Distinct values")
:description (i18n/tru "Number of unique values of a column among all the rows in the answer.")})}
{:short :cum-sum
:supported-field :metabase.lib.types.constants/summable
:requires-column? true
:driver-feature :basic-aggregations
:display-info (fn []
{:display-name (i18n/tru "Cumulative sum of ...")
:column-name (i18n/tru "Sum")
:description (i18n/tru "Additive sum of all the values of a column.\ne.x. total revenue over time.")})}
{:short :cum-count
:requires-column? false
:driver-feature :basic-aggregations
:display-info (fn []
{:display-name (i18n/tru "Cumulative count of rows")
:column-name (i18n/tru "Count")
:description (i18n/tru "Additive count of the number of rows.\ne.x. total number of sales over time.")})}
{:short :stddev
:supported-field :metabase.lib.types.constants/summable
:requires-column? true
:driver-feature :standard-deviation-aggregations
:display-info (fn []
{:display-name (i18n/tru "Standard deviation of ...")
:column-name (i18n/tru "SD")
:description (i18n/tru "Number which expresses how much the values of a column vary among all rows in the answer.")})}
{:short :min
:supported-field :metabase.lib.types.constants/scope
:requires-column? true
:driver-feature :basic-aggregations
:display-info (fn []
{:display-name (i18n/tru "Minimum of ...")
:column-name (i18n/tru "Min")
:description (i18n/tru "Minimum value of a column")})}
{:short :max
:supported-field :metabase.lib.types.constants/scope
:requires-column? true
:driver-feature :basic-aggregations
:display-info (fn []
{:display-name (i18n/tru "Maximum of ...")
:column-name (i18n/tru "Max")
:description (i18n/tru "Maximum value of a column")})}])
(mr/def ::operator
[:lib/type [:= :mbql.aggregation/operator]]
[:short (into [:enum] (map :short) aggregation-operators)]
[:supported-field {:optional true} [:maybe :keyword]] ; TODO more precise type?
[:requires-column? :boolean]
[:driver-feature :keyword] ; TODO more precise type?
[:display-info fn?]])
......@@ -154,10 +154,14 @@
unique-name-fn :- fn?]
(for [expression (lib.expression/expressions-metadata query stage-number)]
(assoc expression
:lib/source :source/expressions
:lib/source-column-alias (:name expression)
:lib/desired-column-alias (unique-name-fn (:name expression))))))
(let [base-type (:base-type expression)]
(cond-> (assoc expression
:lib/source :source/expressions
:lib/source-column-alias (:name expression)
:lib/desired-column-alias (unique-name-fn (:name expression)))
(and (not (:effective-type expression))
(assoc :effective-type base-type))))))
;;; Calculate the columns to return if `:aggregations`/`:breakout`/`:fields` are unspecified.
......@@ -244,6 +244,184 @@
(is (= :type/Integer
(lib/type-of query (first (lib/aggregations-metadata query)))))))
(deftest ^:parallel aggregation-operator-test
(let [query (-> (lib/query-for-table-name meta/metadata-provider "VENUES")
(lib/expression "double-price" (lib/* (lib/field (meta/id :venues :price)) 2))
(lib/expression "budget?" (lib/< (lib/field (meta/id :venues :price)) 2))
(lib/aggregate (lib/sum [:expression {:lib/uuid (str (random-uuid))} "double-price"])))
summable-cols [{:display-name "Latitude",
:effective-type :type/Float,
:semantic-type :type/Latitude,
:lib/source :source/table-defaults}
{:display-name "Longitude",
:effective-type :type/Float,
:semantic-type :type/Longitude,
:lib/source :source/table-defaults}
{:display-name "Price",
:effective-type :type/Integer,
:semantic-type :type/Category,
:lib/source :source/table-defaults}
{:display-name "double-price",
:effective-type :type/Integer
:lib/source :source/expressions}]
all-cols [{:display-name "ID"
:effective-type :type/BigInteger
:semantic-type :type/PK
:lib/source :source/table-defaults}
{:display-name "Name"
:effective-type :type/Text
:semantic-type :type/Name
:lib/source :source/table-defaults}
{:display-name "Category ID",
:effective-type :type/Integer,
:semantic-type :type/FK,
:lib/source :source/table-defaults}
{:display-name "Latitude",
:effective-type :type/Float,
:semantic-type :type/Latitude,
:lib/source :source/table-defaults}
{:display-name "Longitude",
:effective-type :type/Float,
:semantic-type :type/Longitude,
:lib/source :source/table-defaults}
{:display-name "Price",
:effective-type :type/Integer,
:semantic-type :type/Category,
:lib/source :source/table-defaults}
{:display-name "double-price"
:effective-type :type/Integer
:lib/source :source/expressions}
{:display-name "budget?"
:effective-type :type/Boolean
:lib/source :source/expressions}
{:display-name "ID",
:effective-type :type/BigInteger,
:semantic-type :type/PK,
:lib/source :source/implicitly-joinable}
{:display-name "Name",
:effective-type :type/Text,
:semantic-type :type/Name,
:lib/source :source/implicitly-joinable}]
scope-cols all-cols
aggregation-operators (lib/available-aggregation-operators query)
count-op (first aggregation-operators)
sum-op (second aggregation-operators)]
(testing "available aggregation operators"
(is (=? [{:short :count,
:requires-column? false}
{:short :sum,
:requires-column? true,
:columns summable-cols}
{:short :avg,
:requires-column? true,
:columns summable-cols}
{:short :distinct,
:requires-column? true,
:columns all-cols}
{:short :cum-sum,
:requires-column? true,
:columns summable-cols}
{:short :cum-count,
:requires-column? false}
{:short :stddev,
:requires-column? true,
:columns summable-cols}
{:short :min,
:requires-column? true,
:columns scope-cols}
{:short :max,
:requires-column? true,
:columns scope-cols}]
(testing "aggregation operator display info"
(is (=? [{:display-name "Count of rows",
:column-name "Count",
:description "Total number of rows in the answer.",
:short :count,
:requires-column false}
{:display-name "Sum of ...",
:column-name "Sum",
:description "Sum of all the values of a column.",
:short :sum,
:requires-column true}
{:display-name "Average of ...",
:column-name "Average",
:description "Average of all the values of a column",
:short :avg,
:requires-column true}
{:display-name "Number of distinct values of ...",
:column-name "Distinct values",
:description "Number of unique values of a column among all the rows in the answer.",
:short :distinct,
:requires-column true}
{:display-name "Cumulative sum of ...",
:column-name "Sum",
:description "Additive sum of all the values of a column.\ne.x. total revenue over time.",
:short :cum-sum,
:requires-column true}
{:display-name "Cumulative count of rows",
:column-name "Count",
:description "Additive count of the number of rows.\ne.x. total number of sales over time.",
:short :cum-count,
:requires-column false}
{:display-name "Standard deviation of ...",
:column-name "SD",
:description "Number which expresses how much the values of a column vary among all rows in the answer.",
:short :stddev,
:requires-column true}
{:display-name "Minimum of ...",
:column-name "Min",
:description "Minimum value of a column",
:short :min,
:requires-column true}
{:display-name "Maximum of ...",
:column-name "Max",
:description "Maximum value of a column",
:short :max,
:requires-column true}]
(map #(lib/display-info query %) aggregation-operators))))
(testing "display name"
(is (= "Count of rows" (lib/display-name query (first aggregation-operators)))))
(testing "testing getting the available columns for an aggregation operator"
(is (nil? (lib/aggregation-operator-columns count-op)))
(is (=? summable-cols (lib/aggregation-operator-columns sum-op))))
(testing "aggregation operators can be added as aggregates"
(let [price-col (-> sum-op lib/aggregation-operator-columns pop peek)
agg-query (-> query
(lib/aggregate (lib/aggregation-clause count-op))
(lib/aggregate (lib/aggregation-clause sum-op price-col)))]
(is (=? {:lib/type :mbql/query
[{:lib/type :mbql.stage/mbql,
:source-table int?,
[:* {} [:field {:base-type :type/Integer, :effective-type :type/Integer} int?] 2]
[:< {} [:field {:base-type :type/Integer, :effective-type :type/Integer} int?] 2]}
[[:sum {} [:expression {} "double-price"]]
[:count {}]
[:sum {} [:field {:base-type :type/Integer, :effective-type :type/Integer} int?]]]}]}
(is (=? [{:lib/type :metadata/field,
:base-type :type/Integer,
:name "sum_double-price",
:display-name "Sum of double-price",
:lib/source :source/aggregations}
{:lib/type :metadata/field,
:base-type :type/Integer,
:name "count",
:display-name "Count",
:lib/source :source/aggregations}
{:settings {:is_priceless true},
:lib/type :metadata/field,
:base-type :type/Integer,
:name "sum_PRICE",
:display-name "Sum of Price",
:lib/source :source/aggregations}]
(lib/aggregations-metadata agg-query)))))))
(deftest ^:parallel preserve-field-settings-metadata-test
(testing "Aggregation metadata should return the `:settings` for the field being aggregated, for some reason."
(let [query (-> (lib/query-for-table-name meta/metadata-provider "VENUES")
......@@ -83,34 +83,3 @@
:display-name "Sum"
:source_alias "Orders"}]
(lib.metadata.calculation/metadata mlv2-query))))))
(deftest ^:parallel temporal-bucketing-options-test
(mt/dataset sample-dataset
(let [query {:lib/type :mbql/query
:stages [{:lib/type :mbql.stage/mbql
:fields [[:field
{:lib/uuid (str (random-uuid))}
(mt/id :products :created_at)]]
:source-table (mt/id :products)}]
:database (mt/id)}
query (lib/query (lib.metadata.jvm/application-database-metadata-provider (mt/id))
(is (= [{:unit :minute}
{:unit :hour}
{:unit :day}
{:unit :week}
{:unit :month, :default true}
{:unit :quarter}
{:unit :year}
{:unit :minute-of-hour}
{:unit :hour-of-day}
{:unit :day-of-week}
{:unit :day-of-month}
{:unit :day-of-year}
{:unit :week-of-year}
{:unit :month-of-year}
{:unit :quarter-of-year}]
(->> (lib.metadata.calculation/metadata query)
(lib/available-temporal-buckets query)
(mapv #(select-keys % [:unit :default]))))))))
(ns metabase.lib.temporal-bucket-test
[clojure.test :refer [are deftest is testing]]
[metabase.lib.temporal-bucket :as lib.temporal-bucket]))
[metabase.lib.core :as lib]
[metabase.lib.metadata.calculation :as lib.metadata.calculation]
[metabase.lib.temporal-bucket :as lib.temporal-bucket]
[metabase.lib.test-metadata :as meta]))
(deftest ^:parallel describe-temporal-interval-test
(doseq [unit [:day nil]]
......@@ -127,3 +130,26 @@
(into #{} (map :unit) options)))
(is (= (assoc-in expected-defaults [0 :unit] unit)
(filter :default options)))))))))
(deftest ^:parallel temporal-bucketing-options-test
(let [query (-> (lib/query-for-table-name meta/metadata-provider "PRODUCTS")
(lib/with-fields [(lib/field "PRODUCTS" "CREATED_AT")]))]
(is (= [{:unit :minute}
{:unit :hour}
{:unit :day}
{:unit :week}
{:unit :month, :default true}
{:unit :quarter}
{:unit :year}
{:unit :minute-of-hour}
{:unit :hour-of-day}
{:unit :day-of-week}
{:unit :day-of-month}
{:unit :day-of-year}
{:unit :week-of-year}
{:unit :month-of-year}
{:unit :quarter-of-year}]
(->> (lib.metadata.calculation/metadata query)
(lib/available-temporal-buckets query)
(mapv #(select-keys % [:unit :default])))))))
......@@ -35,10 +35,6 @@
;; #29942: missing schema for `:cum-sum` and `:cum-count` aggregations
(mbql.u/match-one legacy-query
#{:cum-sum :cum-count}
;; #29946: nested arithmetic expressions wrapping a `:field` clause
(mbql.u/match-one legacy-query
#{:+ :- :*}
