Skip to content
Snippets Groups Projects
Unverified Commit adaff043 authored by Simon Belak's avatar Simon Belak Committed by GitHub
Browse files

Insights: correctly handle infinities in results (#10502)

Filter out models with infinities  
parent 754b2dee
Branches
Tags
No related merge requests found
......@@ -95,6 +95,12 @@
(def ^:private ^:const ^Long validation-set-size 20)
(defn- real-number?
[x]
(and (number? x)
(not (Double/isNaN x))
(not (Double/isInfinite x))))
(defn- best-fit
"Fit curves from `trendline-function-families` and pick the one with the smallest RMSE.
To keep the operation single pass we collect a small validation set as we go using reservoir
......@@ -102,18 +108,15 @@
[fx fy]
(redux/post-complete
(redux/fuse
{:fits (->> (for [{:keys [x-link-fn y-link-fn formula model]} trendline-function-families]
(redux/post-complete
(stats/simple-linear-regression (comp (stats/somef x-link-fn) fx)
(comp (stats/somef y-link-fn) fy))
(fn [[offset slope]]
(when-not (or (nil? offset)
(nil? slope)
(Double/isNaN offset)
(Double/isNaN slope))
{:model (model offset slope)
:formula (formula offset slope)}))))
(apply redux/juxt))
{:fits (->> (for [{:keys [x-link-fn y-link-fn formula model]} trendline-function-families]
(redux/post-complete
(stats/simple-linear-regression (comp (stats/somef x-link-fn) fx)
(comp (stats/somef y-link-fn) fy))
(fn [[offset slope]]
(when (every? real-number? [offset slope])
{:model (model offset slope)
:formula (formula offset slope)}))))
(apply redux/juxt))
:validation-set ((keep (fn [row]
(let [x (fx row)
y (fy row)]
......@@ -123,10 +126,12 @@
(fn [{:keys [validation-set fits]}]
(some->> fits
(remove nil?)
(map #(assoc % :mae (transduce identity
(mae (comp (:model %) first) second)
validation-set)))
(filter (comp real-number? :mae))
not-empty
(apply min-key #(transduce identity
(mae (comp (:model %) first) second)
validation-set))
(apply min-key :mae)
:formula))))
(defn- timeseries?
......@@ -176,7 +181,7 @@
(field/unix-timestamp? datetime))
#(some-> %
(nth x-position)
;; at this point in the pipeline, dates are still stings
;; at this point in the pipeline dates are still stings
f/->date
(.getTime)
ms->day)
......
......@@ -80,3 +80,60 @@
(expect
false
(valid-period? #inst "2015-01" #inst "2015-02" nil))
;; Make sure we don't return nosense results like infinitiy coeficients
;; Fixes https://github.com/metabase/metabase/issues/9070
(def ^:private ts [["2018-11-01",2960,10875]
["2018-11-02",2574,11762]
["2018-11-03",2761,13101]
["2018-11-04",2405,12931]
["2018-11-05",1726,10890]
["2018-11-06",1669,10829]
["2018-11-07",3661,10098]
["2018-11-08",5760,12935]
["2018-11-09",5251,30183]
["2018-11-10",5757,36148]
["2018-11-11",5244,32264]
["2018-11-12",4190,25583]
["2018-11-13",2343,21411]
["2018-11-14",2109,21848]
["2018-11-15",1865,19892]
["2018-11-16",2130,14942]
["2018-11-17",5037,15690]
["2018-11-18",5029,14506]
["2018-11-19",2335,10714]
["2018-11-20",1745,9545]
["2018-11-21",1784,7516]
["2018-11-22",1717,6460]
["2018-11-23",1796,4901]
["2018-11-24",2039,5217]
["2018-11-25",1781,4477]
["2018-11-26",1330,3263]
["2018-11-27",1296,2994]
["2018-11-28",1278,3238]
["2018-11-29",1377,3120]
["2018-11-30",1553,2984]
["2018-12-01",1805,3732]
["2018-12-02",1796,3311]
["2018-12-03",1444,2525]])
(expect
[{:last-value 1444,
:previous-value 1796,
:last-change -0.19599109131403117,
:slope -73.10260695187168,
:offset 1307680.6786987525,
:best-fit
[:* 2.3076724063296997E223 [:exp [:* -0.02837494263105348 :x]]],
:col nil}
{:last-value 2525,
:previous-value 3311,
:last-change -0.2373905164602839,
:slope -551.1062834224598,
:offset 9850467.098930478,
:best-fit [:+ 9850467.098930478 [:* -551.1062834224598 :x]],
:col nil}]
(transduce identity
(insights [{:base_type :type/DateTime} {:base_type :type/Number} {:base_type :type/Number}])
ts))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment