Skip to content
Snippets Groups Projects
Commit a350a5eb authored by Sameer Al-Sakran's avatar Sameer Al-Sakran Committed by GitHub
Browse files

Merge pull request #6168 from metabase/xray-comparison-fix-nil-states

Fix 2 corner cases in x-ray comparison 
parents c25c8d40 28b842ef
Branches
Tags
No related merge requests found
......@@ -102,15 +102,15 @@
(defmethod difference [nil Object]
[a b]
{:difference 1})
{:difference nil})
(defmethod difference [Object nil]
[a b]
{:difference 1})
{:difference nil})
(defmethod difference [nil nil]
[a b]
{:difference 0})
{:difference nil})
(defn chi-squared-distance
"Chi-squared distane between empirical probability distributions `p` and `q`.
......@@ -136,9 +136,10 @@
https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test"
([m p n q] (ks-test 0.95 m p n q))
([significance-level m p n q]
(let [D (apply max (map (comp math/abs -) (pdf->cdf p) (pdf->cdf q)))
c (math/sqrt (* -0.5 (Math/log (/ significance-level 2))))]
(> D (* c (math/sqrt (/ (+ m n) (* m n))))))))
(when-not (zero? (* m n))
(let [D (apply max (map (comp math/abs -) (pdf->cdf p) (pdf->cdf q)))
c (math/sqrt (* -0.5 (Math/log (/ significance-level 2))))]
(> D (* c (math/sqrt (/ (+ m n) (* m n)))))))))
(defn- unify-categories
"Given two PMFs add missing categories and align them so they both cover the
......@@ -197,8 +198,9 @@
"Pairwise differences of feature vectors `a` and `b`."
[a b]
(into {}
(map (fn [[k a] [_ b]]
[k (difference a b)])
(map (fn [[ka va] [kb vb]]
(assert (= ka kb) "Incomparable models.")
[ka (difference va vb)])
(flatten-map (fe/comparison-vector a))
(flatten-map (fe/comparison-vector b)))))
......@@ -208,12 +210,14 @@
"Distance metric between feature vectors `a` and `b`."
[a b]
(let [differences (pairwise-differences a b)]
{:distance (transduce (map (comp :difference val))
{:distance (transduce (keep (comp :difference val))
(redux/post-complete
magnitude
#(/ % (math/sqrt (count differences))))
differences)
:components differences
:top-contributors (head-tails-breaks (comp :difference second) differences)
:top-contributors (->> differences
(filter (comp :difference second))
(head-tails-breaks (comp :difference second)))
:thereshold interestingness-thershold
:significant? (some :significant? (vals differences))}))
......@@ -90,8 +90,8 @@
(expect
true
(:significant? (async-call :get (format "x-ray/compare/fields/%s/%s"
(id :venues :longitude)
(id :venues :latitude)))))
(id :venues :price)
(id :venues :category_id)))))
(expect
false
......
(ns metabase.feature-extraction.comparison-test
(:require [expectations :refer :all]
[metabase.feature-extraction.comparison :refer :all :as c]))
[metabase.feature-extraction
[comparison :refer :all :as c]
[histogram :as h]]))
(expect
(approximately 5.5 0.1)
......@@ -22,14 +24,14 @@
(expect
[0.25
0
1
1
nil
nil
0
1
1
0
0.5
0]
nil]
(mapv :difference [(difference 1 2.0)
(difference 2.0 2.0)
(difference 2.0 nil)
......@@ -61,6 +63,17 @@
[(#'c/flatten-map {:foo 4 :bar 5})
(#'c/flatten-map {:foo 4 :bar {:a 4 :b {:x 4 :y 7}}})])
(expect
[true
false
nil]
(let [h1 (transduce identity h/histogram (range 10))
h2 (transduce identity h/histogram (repeat 10 10))
h-empty (transduce identity h/histogram nil)]
(map :significant? [(difference h1 h2)
(difference h1 h1)
(difference h1 h-empty)])))
(expect
(approximately 0.3 0.1)
(:distance (features-distance {:foo 2.0 :bar [[1 2] [2 3] [3 4]] :baz false}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment