Skip to content
Snippets Groups Projects
Unverified Commit 28a737bc authored by Oleksandr Yakushev's avatar Oleksandr Yakushev Committed by GitHub
Browse files

perf: Add more timeseries insights optimizations (#47651)

* perf: [insights] Optimize last-2 allocations

* perf: [insights] Optimize best-fit allocations when creating validation sample

* perf: [insights] Optimize simple-linear-regression
parent 458669de
No related branches found
No related tags found
No related merge requests found
......@@ -19,17 +19,19 @@
(set! *warn-on-reflection* true)
(defn- last-2 []
(fn
([] [])
([acc]
(let [cnt (count acc)]
(cond (= cnt 0) [nil nil]
(= cnt 1) [nil (nth acc 0)]
:else acc)))
([acc x]
(if (< (count acc) 2)
(conj acc x)
[(nth acc 1) x]))))
(let [none (Object.)]
(fn
([] (object-array [none none]))
([^objects acc]
(let [a (aget acc 0)
b (aget acc 1)]
(cond (identical? b none) [nil nil]
(identical? a none) [nil b]
:else [a b])))
([^objects acc, x]
(aset acc 0 (aget acc 1))
(aset acc 1 x)
acc))))
(defn change
"Relative difference between `x1` an `x2`."
......@@ -47,8 +49,8 @@
"Transducer that samples a fixed number `n` of samples consistently.
https://en.wikipedia.org/wiki/Reservoir_sampling. Uses java.util.Random
with a seed of `n` to ensure a consistent sample if a dataset has not changed.
The returned instance is mutable, so don't reuse it."
[n]
The returned instance is mutable, so don't reuse it. `f` is invoked on the element before adding it to the sample."
[n f]
(let [n (int n)
rng (Random. n)
counter (int-array 1) ;; A box for a mutable primitive int.
......@@ -66,13 +68,14 @@
idx (.nextInt rng c+1)]
(aset counter 0 c+1)
(cond
(< c n) (aset reservoir c x)
(< idx n) (aset reservoir idx x)))))))
(< c n) (aset reservoir c (f x))
(< idx n) (aset reservoir idx (f x))))))))
(defn- simple-linear-regression
"Faster and more efficient implementation of `kixi.stats.estimate/simple-linear-regression`. Computes some of squares
on each step, and on the completing step returns `[offset slope]`."
[fx fy]
on each step, and on the completing step returns `[offset slope]`. Additionally accepts `x-link-fn` and `y-link-fn`
functions which should be double->double."
[fx, fy, ^clojure.lang.IFn$DD x-link-fn, ^clojure.lang.IFn$DD y-link-fn]
(fn
([] (double-array 6))
([^doubles arr e]
......@@ -80,8 +83,8 @@
y (fy e)]
(if (or (nil? x) (nil? y))
arr
(let [x (double x)
y (double y)
(let [x (.invokePrim x-link-fn (double x))
y (.invokePrim y-link-fn (double y))
c (aget arr 0)
mx (aget arr 1)
my (aget arr 2)
......@@ -118,34 +121,38 @@
(math/abs (- (fy x) (fy-hat x))))))
stats/mean))
(defn- double-identity ^double [^double x] x)
(defn- double-log ^double [^double x] (Math/log x))
(def ^:private trendline-function-families
;; http://mathworld.wolfram.com/LeastSquaresFitting.html
[{:x-link-fn identity
:y-link-fn identity
[{:x-link-fn double-identity
:y-link-fn double-identity
:model (fn [offset slope]
(fn [x]
(+ offset (* slope x))))
:formula (fn [offset slope]
[:+ offset [:* slope :x]])}
;; http://mathworld.wolfram.com/LeastSquaresFittingExponential.html
{:x-link-fn identity
:y-link-fn math/log
{:x-link-fn double-identity
:y-link-fn double-log
:model (fn [offset slope]
(fn [x]
(* (math/exp offset) (math/exp (* slope x)))))
:formula (fn [offset slope]
[:* (math/exp offset) [:exp [:* slope :x]]])}
;; http://mathworld.wolfram.com/LeastSquaresFittingLogarithmic.html
{:x-link-fn math/log
:y-link-fn identity
{:x-link-fn double-log
:y-link-fn double-identity
:model (fn [offset slope]
(fn [x]
(+ offset (* slope (math/log x)))))
(+ offset (* slope (double-log x)))))
:formula (fn [offset slope]
[:+ offset [:* slope [:log :x]]])}
;; http://mathworld.wolfram.com/LeastSquaresFittingPowerLaw.html
{:x-link-fn math/log
:y-link-fn math/log
{:x-link-fn double-log
:y-link-fn double-log
:model (fn [offset slope]
(fn [x]
(* (math/exp offset) (math/pow x slope))))
......@@ -163,19 +170,15 @@
(fingerprinters/robust-fuse
{:fits (->> (for [{:keys [x-link-fn y-link-fn formula model]} trendline-function-families]
(redux/post-complete
(simple-linear-regression #(some-> (fx %) x-link-fn)
#(some-> (fy %) y-link-fn))
(simple-linear-regression fx fy x-link-fn y-link-fn)
(fn [[offset slope]]
(when (every? u/real-number? [offset slope])
{:model (model offset slope)
:formula (formula offset slope)}))))
redux/juxt*)
:validation-set ((keep (fn [row]
(let [x (fx row)
y (fy row)]
(when (and x y)
[x y]))))
(reservoir-sample validation-set-size))})
:validation-set ((filter (fn [row] (and (fx row) (fy row))))
(reservoir-sample validation-set-size
(fn [row] [(fx row) (fy row)])))})
(fn [{:keys [validation-set fits]}]
(some->> fits
(remove nil?)
......@@ -254,7 +257,7 @@
((filter (comp u/real-number? yfn))
(redux/juxt ((map yfn) (last-2))
((map xfn) (last-2))
(simple-linear-regression xfn yfn)
(simple-linear-regression xfn yfn double-identity double-identity)
(best-fit xfn yfn))))
(fn [[[y-previous y-current] [x-previous x-current] [offset slope] best-fit-equation]]
(let [unit (let [unit (some-> datetime :unit mbql.u/normalize-token)]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment