Skip to content
Snippets Groups Projects
Unverified Commit 63d25083 authored by Maz Ameli's avatar Maz Ameli Committed by GitHub
Browse files

Merge pull request #8709 from metabase/trendlines-mega-branch

Trendlines mega branch
parents 3f9c0d00 06031f31
No related branches found
No related tags found
No related merge requests found
......@@ -27,6 +27,7 @@ import {
} from "./apply_axis";
import { setupTooltips } from "./apply_tooltips";
import { getTrendDataPointsFromInsight } from "./trends";
import fillMissingValuesInDatas from "./fill_data";
......@@ -599,9 +600,20 @@ function addGoalChartAndGetOnGoalHover(
};
}
function findSeriesIndexForColumnName(series, colName) {
return (
_.findIndex(series, ({ data: { cols } }) =>
_.findWhere(cols, { name: colName }),
) || 0
);
}
const TREND_LINE_POINT_SPACING = 25;
function addTrendlineChart(
{ series, settings, onHoverChange },
xDomain,
{ xDomain },
{ yAxisSplit },
parent,
charts,
) {
......@@ -610,39 +622,42 @@ function addTrendlineChart(
}
const rawSeries = series._raw || series;
const insights = rawSeries[0].data.insights;
if (insights && insights.slope != null && insights.offset != null) {
const fn = x => x * insights.slope + insights.offset;
const trendData = [
[xDomain[0], fn(xDomain[0])],
[xDomain[1], fn(xDomain[1])],
];
const trendDimension = crossfilter(trendData).dimension(d => d[0]);
// Take the last point rather than summing in case xDomain[0] === xDomain[1], e.x. when the chart
// has just a single row / datapoint
const trendGroup = trendDimension
.group()
.reduce((p, d) => d[1], (p, d) => p, () => 0);
const trendIndex = charts.length;
const color = lighten(settings.series(series[0]).color, 0.25);
const trendChart = dc
.lineChart(parent)
.dimension(trendDimension)
.group(trendGroup)
.on("renderlet", function(chart) {
// remove "sub" class so the trend is not used in voronoi computation
chart
.select(".sub._" + trendIndex)
.classed("sub", false)
.classed("trend", true);
})
.colors([color]);
charts.push(trendChart);
const insights = rawSeries[0].data.insights || [];
for (const insight of insights) {
if (insight.slope != null && insight.offset != null) {
const index = findSeriesIndexForColumnName(series, insight.col);
const seriesSettings = settings.series(series[index]);
const color = lighten(seriesSettings.color, 0.25);
const points = Math.round(parent.width() / TREND_LINE_POINT_SPACING);
const trendData = getTrendDataPointsFromInsight(insight, xDomain, points);
const trendDimension = crossfilter(trendData).dimension(d => d[0]);
// Take the last point rather than summing in case xDomain[0] === xDomain[1], e.x. when the chart
// has just a single row / datapoint
const trendGroup = trendDimension
.group()
.reduce((p, d) => d[1], (p, d) => p, () => 0);
const trendIndex = charts.length;
const trendChart = dc
.lineChart(parent)
.dimension(trendDimension)
.group(trendGroup)
.on("renderlet", function(chart) {
// remove "sub" class so the trend is not used in voronoi computation
chart
.select(".sub._" + trendIndex)
.classed("sub", false)
.classed("trend", true);
})
.colors([color])
.useRightYAxis(yAxisSplit.length > 1 && yAxisSplit[1].includes(index))
.interpolate("cardinal");
charts.push(trendChart);
}
}
}
......@@ -797,7 +812,7 @@ export default function lineAreaBar(
parent,
charts,
);
addTrendlineChart(props, xAxisProps.xDomain, parent, charts);
addTrendlineChart(props, xAxisProps, yAxisProps, parent, charts);
parent.compose(charts);
......
......@@ -324,7 +324,7 @@ export const GRAPH_GOAL_SETTINGS = {
default: false,
getHidden: (series, vizSettings) => {
const { insights } = series[0].data;
return !insights || insights.slope == null || insights.offset == null;
return !insights || insights.length === 0;
},
useRawSeries: true,
},
......
import _ from "underscore";
import moment from "moment";
// mappings of allowed operators
const EXPRESSION_OPERATORS = new Map([
["+", (...args) => `(${args.join(" + ")})`],
["-", (...args) => `(${args.join(" - ")})`],
["*", (...args) => `(${args.join(" * ")})`],
["/", (...args) => `(${args.join(" / ")})`],
["log", x => `Math.log(${x})`],
["pow", (x, y) => `Math.pow(${x}, ${y})`],
["exp", x => `Math.pow(Math.E, ${x})`],
]);
// whitelist of allowed expressions
const EXPRESSION_IDENTIFIERS = new Set(["x"]);
function compileNode(node) {
if (Array.isArray(node)) {
const [operator, ...args] = node;
if (EXPRESSION_OPERATORS.has(operator)) {
return EXPRESSION_OPERATORS.get(operator)(...args.map(compileNode));
}
} else if (typeof node === "number") {
return node;
} else if (typeof node === "string" && EXPRESSION_IDENTIFIERS.has(node)) {
return node;
}
throw new Error(`Invalid expression: ${node}`);
}
export function compileExpression(node) {
const compiled = compileNode(node);
return new Function("x", `return ${compiled};`);
}
const msToDays = ms => ms / (24 * 60 * 60 * 1000);
export function getTrendDataPointsFromInsight(insight, xDomain, count = 10) {
const isTimeseries = moment.isMoment(xDomain[0]);
let fn;
if (insight["best-fit"]) {
fn = compileExpression(insight["best-fit"]);
} else {
fn = x => x * insight.slope + insight.offset;
}
const [start, end] = isTimeseries ? xDomain.map(x => +x) : xDomain;
const xValues = getValuesInRange(start, end, count);
const trendData = isTimeseries
? xValues.map(x => [moment(x), fn(msToDays(x))])
: xValues.map(x => [x, fn(x)]);
return trendData;
}
function getValuesInRange(start, end, count) {
const delta = (end - start) / (count - 1);
return _.range(start, end, delta).concat([end]);
}
......@@ -8,9 +8,7 @@ import colors from "metabase/lib/colors";
import Icon from "metabase/components/Icon";
import * as Query from "metabase/lib/query/query";
import * as Card from "metabase/meta/Card";
import { parseFieldBucketing, formatBucketing } from "metabase/lib/query_time";
import { formatBucketing } from "metabase/lib/query_time";
import { columnSettings } from "metabase/visualizations/lib/settings/column";
import { NoBreakoutError } from "metabase/visualizations/lib/errors";
......@@ -44,12 +42,12 @@ export default class Smart extends React.Component {
};
static isSensible({ insights }) {
return !!insights;
return insights && insights.length > 0;
}
// Smart scalars need to have a breakout
static checkRenderable(series, settings) {
if (!series[0].data.insights) {
static checkRenderable([{ data: { insights } }], settings) {
if (!insights || insights.length === 0) {
throw new NoBreakoutError(
t`Group by a time field to see how this has changed over time`,
);
......@@ -57,10 +55,6 @@ export default class Smart extends React.Component {
}
render() {
const insights =
this.props.rawSeries &&
this.props.rawSeries[0].data &&
this.props.rawSeries[0].data.insights;
const {
actionButtons,
onChangeCardAndRun,
......@@ -70,22 +64,30 @@ export default class Smart extends React.Component {
settings,
visualizationIsClickable,
series: [{ card, data: { rows, cols } }],
rawSeries,
} = this.props;
if (!insights) {
return null;
}
const metricIndex = 1;
const dimensionIndex = 0;
const lastRow = rows[rows.length - 1];
const value = lastRow && lastRow[metricIndex];
const column = cols[metricIndex];
const dimensionColumn = cols[dimensionIndex];
let granularity =
dimensionColumn && dimensionColumn.unit
? formatBucketing(dimensionColumn.unit).toLowerCase()
: null;
let granularity;
if (Card.isStructured(card)) {
const query = Card.getQuery(card);
const breakouts = query && Query.getBreakouts(query);
granularity = formatBucketing(
parseFieldBucketing(breakouts[0]),
).toLowerCase();
const insights =
rawSeries && rawSeries[0].data && rawSeries[0].data.insights;
const insight = _.findWhere(insights, { col: column.name });
if (!insight) {
return null;
}
const change = formatNumber(insights["last-change"] * 100);
const change = formatNumber(insight["last-change"] * 100);
const isNegative = (change && Math.sign(change) < 0) || false;
let color = isNegative ? colors["error"] : colors["success"];
......@@ -116,13 +118,6 @@ export default class Smart extends React.Component {
<span style={{ marginLeft: 5 }}>{jt`last ${granularity}`}</span>
);
const metricIndex = 1;
const dimensionIndex = 0;
const lastRow = rows[rows.length - 1];
const value = lastRow && lastRow[metricIndex];
const column = cols[metricIndex];
const clicked = {
value,
column,
......@@ -151,7 +146,7 @@ export default class Smart extends React.Component {
ref={scalar => (this._scalar = scalar)}
>
<ScalarValue
value={formatValue(insights["last-value"], settings.column(column))}
value={formatValue(insight["last-value"], settings.column(column))}
/>
</span>
{isDashboard && (
......@@ -178,7 +173,7 @@ export default class Smart extends React.Component {
>
{!isFullscreen &&
jt`${separator} was ${formatValue(
insights["previous-value"],
insight["previous-value"],
settings.column(column),
)} ${granularityDisplay}`}
</h4>
......
(ns metabase.sync.analyze.fingerprint.insights
"Deeper statistical analysis of results."
(:require [kixi.stats.core :as stats]
(:require [kixi.stats
[core :as stats]
[math :as math]]
[metabase.models.field :as field]
[metabase.sync.analyze.fingerprint.fingerprinters :as f]
[redux.core :as redux]))
......@@ -27,38 +29,142 @@
(neg? x1) (- (change x2 x1))
:else (/ (- x2 x1) x1)))))
(defn reservoir-sample
"Transducer that samples a fixed number `n` of samples.
https://en.wikipedia.org/wiki/Reservoir_sampling"
[n]
(fn
([] [(transient []) 0])
([[reservoir c] x]
(let [c (inc c)
idx (rand-int c)]
(cond
(<= c n) [(conj! reservoir x) c]
(< idx n) [(assoc! reservoir idx x) c]
:else [reservoir c])))
([[reservoir _]] (persistent! reservoir))))
(defn rmse
"Given two functions: (fŷ input) and (fy input), returning the predicted and actual values of y
respectively, calculates the root mean squared error of the estimate.
https://en.wikipedia.org/wiki/Root-mean-square_deviation"
[fy-hat fy]
(fn
([] [0.0 0.0])
([[^double c ^double mse :as acc] e]
(let [y-hat (fy-hat e)
y (fy e)]
(if (or (nil? y-hat) (nil? y))
acc
(let [se (math/sq (- y y-hat))
c' (inc c)]
[c' (+ mse (/ (- se mse) c'))]))))
([[c mse]]
(when (pos? c)
(math/sqrt mse)))))
(def ^:private trendline-function-families
;; http://mathworld.wolfram.com/LeastSquaresFitting.html
[{:x-link-fn identity
:y-link-fn identity
:model (fn [offset slope]
(fn [x]
(+ offset (* slope x))))
:formula (fn [offset slope]
[:+ offset [:* slope :x]])}
;; http://mathworld.wolfram.com/LeastSquaresFittingExponential.html
{:x-link-fn identity
:y-link-fn math/log
:model (fn [offset slope]
(fn [x]
(* (math/exp offset) (math/exp (* slope x)))))
:formula (fn [offset slope]
[:* (math/exp offset) [:exp [:* slope :x]]])}
;; http://mathworld.wolfram.com/LeastSquaresFittingLogarithmic.html
{:x-link-fn math/log
:y-link-fn identity
:model (fn [offset slope]
(fn [x]
(+ offset (* slope (math/log x)))))
:formula (fn [offset slope]
[:+ offset [:* slope [:log :x]]])}
;; http://mathworld.wolfram.com/LeastSquaresFittingPowerLaw.html
{:x-link-fn math/log
:y-link-fn math/log
:model (fn [offset slope]
(fn [x]
(* (math/exp offset) (math/pow x slope))))
:formula (fn [offset slope]
[:* (math/exp offset) [:pow :x slope]])}])
(def ^:private ^:const ^Long validation-set-size 20)
(defn- best-fit
"Fit curves from `trendline-function-families` and pick the one with the smallest RMSE.
To keep the operation single pass we collect a small validation set as we go using reservoir
sampling, and use it to calculate RMSE."
[fx fy]
(redux/post-complete
(redux/fuse
{:fits (->> (for [{:keys [x-link-fn y-link-fn formula model]} trendline-function-families]
(redux/post-complete
(stats/simple-linear-regression (comp x-link-fn fx) (comp y-link-fn fy))
(fn [[offset slope]]
(when-not (or (Double/isNaN offset)
(Double/isNaN slope))
{:model (model offset slope)
:formula (formula offset slope)}))))
(apply redux/juxt))
:validation-set ((map (juxt fx fy)) (reservoir-sample validation-set-size))})
(fn [{:keys [validation-set fits]}]
(->> fits
(remove nil?)
(apply min-key #(transduce identity
(rmse (comp (:model %) first) second)
validation-set))
:formula))))
(defn- timeseries?
[{:keys [numbers datetimes others]}]
(and (= (count numbers) 1)
(and (pos? (count numbers))
(= (count datetimes) 1)
(empty? others)))
(def ^:private ms->day
"We downsize UNIX timestamps to lessen the chance of overflows and numerical instabilities."
#(/ % (* 1000 60 60 24)))
(defn- timeseries-insight
[{:keys [numbers datetimes]}]
(redux/post-complete
(let [datetime (first datetimes)
x-position (:position datetime)
y-position (-> numbers first :position)
xfn (if (or (-> datetime :base_type (isa? :type/DateTime))
(field/unix-timestamp? datetime))
#(some-> %
(nth x-position)
;; at this point in the pipeline, dates are still stings
f/->date
(.getTime))
;; unit=year workaround. While the field is in this case marked as :type/Text,
;; at this stage in the pipeline the value is still an int, so we can use it
;; directly.
#(nth % x-position))
yfn #(nth % y-position)]
(redux/juxt ((map yfn) (last-n 2))
(stats/simple-linear-regression xfn yfn)))
(fn [[[previous current] [offset slope]]]
{:last-value current
:previous-value previous
:last-change (change current previous)
:slope slope
:offset offset})))
(let [datetime (first datetimes)
x-position (:position datetime)
xfn (if (or (-> datetime :base_type (isa? :type/DateTime))
(field/unix-timestamp? datetime))
#(some-> %
(nth x-position)
;; at this point in the pipeline, dates are still stings
f/->date
(.getTime)
ms->day)
;; unit=year workaround. While the field is in this case marked as :type/Text,
;; at this stage in the pipeline the value is still an int, so we can use it
;; directly.
(comp ms->day #(nth % x-position)))]
(apply redux/juxt (for [number-col numbers]
(redux/post-complete
(let [y-position (:position number-col)
yfn #(nth % y-position)]
(redux/juxt ((map yfn) (last-n 2))
(stats/simple-linear-regression xfn yfn)
(best-fit xfn yfn)))
(fn [[[previous current] [offset slope] best-fit]]
{:last-value current
:previous-value previous
:last-change (change current previous)
:slope slope
:offset offset
:best-fit best-fit
:col (:name number-col)}))))))
(defn- datetime-truncated-to-year?
"This is hackish as hell, but we change datetimes with year granularity to strings upstream and
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment