From 165f88b814098c7ec960c7777b2ee8e58b9792fe Mon Sep 17 00:00:00 2001 From: Simon Belak <simon.belak@gmail.com> Date: Fri, 10 Nov 2017 14:42:44 +0100 Subject: [PATCH] Make outliers work with empty input --- src/metabase/feature_extraction/math.clj | 9 +++++---- test/metabase/feature_extraction/math_test.clj | 11 +++++++---- test/metabase/feature_extraction/timeseries_test.clj | 4 +++- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/metabase/feature_extraction/math.clj b/src/metabase/feature_extraction/math.clj index a3e719a1537..92201172514 100644 --- a/src/metabase/feature_extraction/math.clj +++ b/src/metabase/feature_extraction/math.clj @@ -148,7 +148,8 @@ https://en.wikipedia.org/wiki/Interquartile_range" ([xs] (outliers identity xs)) ([keyfn xs] - (let [{:keys [q1 q3 iqr]} (->> xs (transduce (map keyfn) h/histogram) h/iqr) - lower-bound (- q1 (* 1.5 iqr)) - upper-bound (+ q3 (* 1.5 iqr))] - (remove (comp #(< lower-bound % upper-bound) keyfn) xs)))) + (when (not-empty xs) + (let [{:keys [q1 q3 iqr]} (->> xs (transduce (map keyfn) h/histogram) h/iqr) + lower-bound (- q1 (* 1.5 iqr)) + upper-bound (+ q3 (* 1.5 iqr))] + (remove (comp #(< lower-bound % upper-bound) keyfn) xs))))) diff --git a/test/metabase/feature_extraction/math_test.clj b/test/metabase/feature_extraction/math_test.clj index d3a53800ca0..2ff7fda7416 100644 --- a/test/metabase/feature_extraction/math_test.clj +++ b/test/metabase/feature_extraction/math_test.clj @@ -73,19 +73,22 @@ :lag 1} {:autocorrelation -1.0 :lag 1} - nil nil nil] + nil nil nil nil] [(autocorrelation (range 10)) (autocorrelation [1 -1 1 -1 1 -1]) (autocorrelation [1 2 3]) ; not significant (autocorrelation [1]) - (autocorrelation [])]) + (autocorrelation []) + (autocorrelation nil)]) (expect [nil - #{50 100 35}] + #{50 100 35} + nil] (let [xs (vec (repeatedly 100 rand))] [(not-empty (outliers xs)) (set (outliers (-> xs (assoc-in [10] 50) (assoc-in [30] 100) - (assoc-in [70] 35))))])) + (assoc-in [70] 35)))) + (outliers nil)])) diff --git a/test/metabase/feature_extraction/timeseries_test.clj b/test/metabase/feature_extraction/timeseries_test.clj index fe0733fc05f..9b4300d554b 100644 --- a/test/metabase/feature_extraction/timeseries_test.clj +++ b/test/metabase/feature_extraction/timeseries_test.clj @@ -49,7 +49,9 @@ (expect [[99] + [] []] [(breaks 12 (map vector (range) (concat (repeat 100 10) (repeat 100 20)))) - (breaks 12 (map vector (range) (take 100 (cycle (range 10)))))]) + (breaks 12 (map vector (range) (take 100 (cycle (range 10))))) + (breaks 4 nil)]) -- GitLab