Skip to content
Snippets Groups Projects
Commit c5b93e31 authored by Ryan Senior's avatar Ryan Senior
Browse files

Add the "bin-width" binning strategy

In MBQL you can now specify a bin-width binning strategy with a
specified bin-width and the BinnedField will bin using that
information.

This commit also changes the existing behavior of the "default"
binning strategy. When default is specified (for now) this uses the
"num-bins" strategy with the globally set default number of
bins (specified via setting). Specifying num-bins or bin-width
requires an extra parameter specifying the number of bins or the bin
width. Only default uses the global setting.
parent 8f552032
Branches
Tags
No related merge requests found
......@@ -176,9 +176,12 @@
(s/defn ^:ql ^:always-validate binning-strategy :- FieldPlaceholder
"Reference to a `BinnedField`. This is just a `Field` reference with an associated `STRATEGY-NAME` and `STRATEGY-PARAM`"
([f strategy-name & [strategy-param]] (assoc (field f)
:binning-strategy (clojure.core/or strategy-param
(public-settings/breakout-bins-num)))))
([f strategy-name & [strategy-param]]
(let [strategy (qputil/normalize-token strategy-name)
field (field f)]
(if (clojure.core/= :default strategy)
(assoc field :binning-strategy :num-bins, :binning-param (public-settings/breakout-bins-num))
(assoc field :binning-strategy strategy, :binning-param strategy-param)))))
(defn- fields-list-clause
([k query] query)
......
......@@ -141,7 +141,11 @@
clojure.lang.Named
(getName [_] (name field)))
(def binning-strategies
#{:num-bins :bin-width})
(s/defrecord BinnedField [field :- Field
strategy :- (apply s/enum binning-strategies)
num-bins :- s/Int
min-value :- s/Num
max-value :- s/Num
......@@ -165,7 +169,8 @@
(fn [_] (or (assert-driver-supports :foreign-keys) true)) ; assert-driver-supports will throw Exception if driver is bound
"foreign-keys is not supported by this driver.")) ; and driver does not support foreign keys
datetime-unit :- (s/maybe (apply s/enum datetime-field-units))
binning-strategy :- (s/maybe s/Int)])
binning-strategy :- (s/maybe (apply s/enum binning-strategies))
binning-param :- (s/maybe s/Num)])
(s/defrecord AgFieldRef [index :- s/Int])
;; TODO - add a method to get matching expression from the query?
......
......@@ -31,6 +31,10 @@
(u/round-to-decimals 5 (/ (- max-value min-value)
num-bins)))
(defn- calculate-num-bins [min-value max-value bin-width]
(Math/ceil (/ (- max-value min-value)
bin-width)))
(defn- extract-bounds
"Given query criteria, find a min/max value for the binning strategy
using the greatest user specified min value and the smallest user
......@@ -60,17 +64,23 @@
specified crtieria that could impact that min/max. Throws an
Exception if no min/max values are found."
[breakouts filter-field-map]
(mapv (fn [{:keys [field num-bins] :as breakout}]
(mapv (fn [{:keys [field num-bins bin-width] :as breakout}]
(if (instance? BinnedField breakout)
(let [[min-value max-value] (extract-bounds field filter-field-map)]
(let [[min-value max-value] (extract-bounds field filter-field-map)
updated-breakout (assoc breakout :min-value min-value :max-value max-value)]
(when-not (and min-value max-value)
(throw (Exception. (format "Unable to bin field '%s' with id '%s' without a min/max value"
(get-in breakout [:field :field-name])
(get-in breakout [:field :field-id])))))
(assoc breakout
:min-value min-value
:max-value max-value
:bin-width (calculate-bin-width min-value max-value num-bins)))
(cond
(= :num-bins (:strategy updated-breakout))
(assoc updated-breakout
:bin-width (calculate-bin-width min-value max-value num-bins))
(= :bin-width (:strategy updated-breakout))
(assoc updated-breakout
:num-bins (calculate-num-bins min-value max-value bin-width))))
breakouts))
breakouts))
......
......@@ -100,7 +100,7 @@
;;; ## ------------------------------------------------------------ FIELD PLACEHOLDER ------------------------------------------------------------
(defn- field-ph-resolve-field [{:keys [field-id datetime-unit fk-field-id binning-strategy], :as this} field-id->field]
(defn- field-ph-resolve-field [{:keys [field-id datetime-unit fk-field-id binning-strategy binning-param], :as this} field-id->field]
(if-let [{:keys [base-type special-type], :as field} (some-> (field-id->field field-id)
i/map->Field
(assoc :fk-field-id fk-field-id))]
......@@ -109,9 +109,20 @@
(isa? special-type :type/DateTime))
(i/map->DateTimeField {:field field
:unit (or datetime-unit :day)}) ; default to `:day` if a unit wasn't specified
binning-strategy
(= :num-bins binning-strategy)
(i/map->BinnedField {:field field
:num-bins binning-strategy})
:strategy binning-strategy
:num-bins binning-param})
(= :bin-width binning-strategy)
(i/map->BinnedField {:field field
:strategy binning-strategy
:bin-width binning-param})
binning-strategy
(throw (Exception. (format "Unregonized binning strategy '%s'" binning-strategy)))
:else field)
;; If that fails just return ourselves as-is
this))
......
......@@ -19,6 +19,9 @@
{k (obj->map v)}))
:else o))
(def ^:private field-ph-defaults
{:fk-field-id nil, :datetime-unit nil,
:binning-strategy nil, :binning-param nil})
;; basic rows query w/ filter
(expect
......@@ -27,14 +30,10 @@
:type :query
:query {:source-table (id :venues)
:filter {:filter-type :>
:field {:field-id (id :venues :price)
:fk-field-id nil
:datetime-unit nil
:binning-strategy nil}
:value {:field-placeholder {:field-id (id :venues :price)
:fk-field-id nil
:datetime-unit nil
:binning-strategy nil}
:field (merge field-ph-defaults
{:field-id (id :venues :price)})
:value {:field-placeholder (merge field-ph-defaults
{:field-id (id :venues :price)})
:value 1}}}}
;; resolved form
{:database (id)
......@@ -92,14 +91,12 @@
:type :query
:query {:source-table (id :venues)
:filter {:filter-type :=
:field {:field-id (id :categories :name)
:fk-field-id (id :venues :category_id)
:datetime-unit nil
:binning-strategy nil}
:value {:field-placeholder {:field-id (id :categories :name)
:fk-field-id (id :venues :category_id)
:datetime-unit nil
:binning-strategy nil}
:field (merge field-ph-defaults
{:field-id (id :categories :name)
:fk-field-id (id :venues :category_id)})
:value {:field-placeholder (merge field-ph-defaults
{:field-id (id :categories :name)
:fk-field-id (id :venues :category_id)})
:value "abc"}}}}
;; resolved form
{:database (id)
......@@ -165,14 +162,14 @@
:type :query
:query {:source-table (id :checkins)
:filter {:filter-type :>
:field {:field-id (id :users :last_login)
:fk-field-id (id :checkins :user_id)
:datetime-unit :year
:binning-strategy nil}
:value {:field-placeholder {:field-id (id :users :last_login)
:fk-field-id (id :checkins :user_id)
:datetime-unit :year
:binning-strategy nil}
:field (merge field-ph-defaults
{:field-id (id :users :last_login)
:fk-field-id (id :checkins :user_id)
:datetime-unit :year})
:value {:field-placeholder (merge field-ph-defaults
{:field-id (id :users :last_login)
:fk-field-id (id :checkins :user_id)
:datetime-unit :year})
:value "1980-01-01"}}}}
;; resolved form
{:database (id)
......@@ -241,14 +238,12 @@
:query {:source-table (id :checkins)
:aggregation [{:aggregation-type :sum
:custom-name nil
:field {:field-id (id :venues :price)
:fk-field-id (id :checkins :venue_id)
:datetime-unit nil
:binning-strategy nil}}]
:breakout [{:field-id (id :checkins :date)
:fk-field-id nil
:datetime-unit :day-of-week
:binning-strategy nil}]}}
:field (merge field-ph-defaults
{:field-id (id :venues :price)
:fk-field-id (id :checkins :venue_id)})}]
:breakout [(merge field-ph-defaults
{:field-id (id :checkins :date)
:datetime-unit :day-of-week})]}}
;; resolved form
{:database (id)
:type :query
......
......@@ -79,22 +79,22 @@
(format-rows-by [(partial u/round-to-decimals 1) int]
(rows (data/run-query venues
(ql/aggregation (ql/count))
(ql/breakout (ql/binning-strategy $latitude :default 20))))))
(ql/breakout (ql/binning-strategy $latitude :num-bins 20))))))
(expect-with-non-timeseries-dbs
[[10.1 1] [30.5 99]]
(format-rows-by [(partial u/round-to-decimals 1) int]
(rows (data/run-query venues
(ql/aggregation (ql/count))
(ql/breakout (ql/binning-strategy $latitude :default 3))))))
(ql/breakout (ql/binning-strategy $latitude :num-bins 3))))))
(expect-with-non-timeseries-dbs
[[10.1 -165.4 1] [33.1 -119.7 61] [37.7 -124.2 29] [39.2 -78.5 8] [40.8 -78.5 1]]
(format-rows-by [(partial u/round-to-decimals 1) (partial u/round-to-decimals 1) int]
(rows (data/run-query venues
(ql/aggregation (ql/count))
(ql/breakout (ql/binning-strategy $latitude :default 20)
(ql/binning-strategy $longitude :default 20))))))
(ql/breakout (ql/binning-strategy $latitude :num-bins 20)
(ql/binning-strategy $longitude :num-bins 20))))))
;; Currently defaults to 8 bins when the number of bins isn't
;; specified
......@@ -113,6 +113,22 @@
(ql/aggregation (ql/count))
(ql/breakout (ql/binning-strategy $latitude :default)))))))
;; Testing bin-width
(expect-with-non-timeseries-dbs
[[10.1 1] [33.1 25] [34.1 36] [37.1 29] [40.1 9]]
(format-rows-by [(partial u/round-to-decimals 1) int]
(rows (data/run-query venues
(ql/aggregation (ql/count))
(ql/breakout (ql/binning-strategy $latitude :bin-width 1))))))
;; Testing bin-width using a float
(expect-with-non-timeseries-dbs
[[10.1 1] [32.6 61] [37.6 29] [40.1 9]]
(format-rows-by [(partial u/round-to-decimals 1) int]
(rows (data/run-query venues
(ql/aggregation (ql/count))
(ql/breakout (ql/binning-strategy $latitude :bin-width 2.5))))))
(expect-with-non-timeseries-dbs
[[33.0 4] [34.0 57]]
(tu/with-temporary-setting-values [breakout-bins-num 15]
......
......@@ -13,7 +13,8 @@
;; Test the expansion of the expressions clause
(expect
{:expressions {:my-cool-new-field (qpi/map->Expression {:operator :*
:args [{:field-id 10, :fk-field-id nil, :datetime-unit nil, :binning-strategy nil}
:args [{:field-id 10, :fk-field-id nil, :datetime-unit nil,
:binning-strategy nil, :binning-param nil}
20.0]})}} ; 20 should be converted to a FLOAT
(ql/expressions {} {:my-cool-new-field (ql/* (ql/field-id 10) 20)}))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment