Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
Metabase
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Engineering Digital Service
Metabase
Commits
eb6a9d45
Commit
eb6a9d45
authored
7 years ago
by
Simon Belak
Browse files
Options
Downloads
Patches
Plain Diff
Refactoring, renamed resolution to scale, added back :field
parent
c7c5acd0
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/metabase/api/fingerprint.clj
+5
-5
5 additions, 5 deletions
src/metabase/api/fingerprint.clj
src/metabase/fingerprinting.clj
+52
-58
52 additions, 58 deletions
src/metabase/fingerprinting.clj
test/metabase/fingerprinting_test.clj
+1
-2
1 addition, 2 deletions
test/metabase/fingerprinting_test.clj
with
58 additions
and
65 deletions
src/metabase/api/fingerprint.clj
+
5
−
5
View file @
eb6a9d45
...
...
@@ -20,7 +20,7 @@
"unbounded"
"yolo"
)))
(
def
^
:private
^
:const
Resolution
(
def
^
:private
^
:const
Scale
(
s/maybe
(
s/enum
"month"
"day"
"raw"
)))
...
...
@@ -72,15 +72,15 @@
(
api/defendpoint
GET
"/fields/:id1/:id2"
"Get a multi-field fingerprint for `Field`s with ID1 and ID2."
[
id1
id2
max_query_cost
max_computation_cost
resolution
]
[
id1
id2
max_query_cost
max_computation_cost
scale
]
{
max_query_cost
MaxQueryCost
max_computation_cost
MaxComputationCost
resolution
Resolution
}
scale
Scale
}
(
->>
[
id1
id2
]
(
map
(
partial
api/read-check
Field
))
(
apply
fingerprinting/multifield-fingerprint
{
:max-cost
(
max-cost
max_query_cost
max_computation_cost
)
:
resolution
(
or
(
keyword
resolution
)
:day
)})))
{
:max-cost
(
max-cost
max_query_cost
max_computation_cost
)
:
scale
(
or
(
keyword
scale
)
:day
)})))
(
api/defendpoint
GET
"/compare/fields/:id1/:id2"
"Get comparison fingerprints for `Field`s with ID1 and ID2."
...
...
This diff is collapsed.
Click to expand it.
src/metabase/fingerprinting.clj
+
52
−
58
View file @
eb6a9d45
...
...
@@ -95,7 +95,7 @@
(
defn
binned-entropy
"Calculate entropy of given histogram."
[
histogram
]
(
transduce
(
map
(
*
%
(
math/log
%
)))
(
transduce
(
map
#
(
*
%
(
math/log
%
)))
(
redux/post-complete
+
-
)
(
vals
(
pmf
histogram
))))
...
...
@@ -191,7 +191,8 @@
:all-distinct?
(
>=
unique%
(
-
1
cardinality-error
))
:entropy
(
binned-entropy
histogram
)
:type
Num
})
{
:count
0
}))))
{
:count
0
:type
Num
}))))
(
defmethod
fingerprinter
[
Num
Num
]
[
_
_
]
...
...
@@ -199,15 +200,13 @@
:covariance
(
stats/covariance
first
second
)
:linear-regression
(
stats/simple-linear-regression
first
second
)}))
(
def
^
:private
^
:const
timestamp-truncation-factor
(
/
1
1000
60
60
24
))
(
def
^
:private
^
{
:arglists
'
([
t
])}
to-double
"Coerce `DateTime` to `Double`."
(
comp
double
t.coerce/to-long
))
(
def
^
:private
^
{
:arglists
'
([
t
])}
truncate-timestamp
"Truncate UNIX timestamp from ms to days."
(
comp
long
(
partial
*
timestamp-truncation-factor
)))
(
def
^
:private
^
{
:arglists
'
([
t
])}
pad-timestamp
"Pad truncated timestamp back into a proper UNIX (ms) timestamp."
(
comp
long
(
partial
*
(
/
timestamp-truncation-factor
))))
(
def
^
:private
^
{
:arglists
'
([
t
])}
from-double
"Coerce `Double` into a `DateTime`."
(
comp
t.coerce/from-long
long
))
(
defn-
fill-timeseries
"Given a coll of `[DateTime, Any]` pairs with periodicty `step` fill missing
...
...
@@ -215,56 +214,52 @@
[
step
ts
]
(
let
[
ts-index
(
into
{}
ts
)]
(
into
[]
(
comp
(
map
(
comp
truncate-timestamp
t.coerce/to-long
)
)
(
comp
(
map
to-double
)
(
take-while
(
partial
>=
(
->
ts
last
first
)))
(
map
(
fn
[
t
]
[
t
(
ts-index
t
0
)])))
(
some->
ts
ffirst
pad-timestamp
t.coerce/from-long
from-double
(
t.periodic/periodic-seq
step
)))))
(
defn-
decompose-timeseries
"Decompose given timeseries with expected periodicty `
resolut
io
n
` into trend,
"Decompose given timeseries with expected periodicty `
per
io
d
` into trend,
seasonal component, and reminder.
`
resolut
io
n
` can be one of `:day`, or `:month`."
[
resolut
io
n
ts
]
(
let
[
period
(
case
resolut
io
n
`
per
io
d
` can be one of `:day`, or `:month`."
[
per
io
d
ts
]
(
let
[
period
(
case
per
io
d
:month
12
:day
52
)]
(
when
(
>=
(
count
ts
)
(
*
2
period
))
(
tide/decompose
period
ts
))))
(
select-keys
(
tide/decompose
period
ts
)
[
:trend
:seasonal
:reminder
])
)))
(
defmethod
fingerprinter
[
DateTime
Num
]
[{
:keys
[
max-cost
resolution
]}
_
]
(
let
[
resolution
(
or
resolution
:raw
)]
[{
:keys
[
max-cost
scale
]}
_
]
(
let
[
scale
(
or
scale
:raw
)]
(
redux/post-complete
(
redux/pre-step
(
redux/fuse
{
:linear-regression
(
stats/simple-linear-regression
first
second
)
:series
(
if
(
=
resolution
:raw
)
:series
(
if
(
=
scale
:raw
)
conj
(
redux/post-complete
conj
(
partial
fill-timeseries
(
case
resolution
(
case
scale
:month
(
t/months
1
)
:day
(
t/days
1
)))))})
(
fn
[[
x
y
]]
[(
->
x
t.format/parse
t
.coerce/to-long
truncate-timestamp
)
y
]))
[(
->
x
t.format/parse
t
o-double
)
y
]))
(
fn
[{
:keys
[
series
linear-regression
]}]
(
let
[
ys-r
(
->>
series
(
map
second
)
reverse
not-empty
)
{
:keys
[
trend
seasonal
reminder
]}
(
when
(
and
(
not=
resolution
:raw
)
(
unbounded-computation?
max-cost
))
(
decompose-timeseries
resolution
series
))]
(
merge
{
:series
(
for
[[
x
y
]
series
]
[(
t.coerce/from-long
(
pad-timestamp
x
))
y
])
:linear-regression
linear-regression
:trend
trend
:seasonal
seasonal
:reminder
reminder
}
(
case
resolution
(
let
[
ys-r
(
->>
series
(
map
second
)
reverse
not-empty
)]
(
merge
{
:series
(
for
[[
x
y
]
series
]
[(
from-double
x
)
y
])
:linear-regression
linear-regression
:seasonal-decomposition
(
when
(
and
(
not=
scale
:raw
)
(
unbounded-computation?
max-cost
))
(
decompose-timeseries
scale
series
))}
(
case
scale
:month
{
:YoY
(
growth
(
first
ys-r
)
(
nth
ys-r
11
))
:YoY-previous
(
growth
(
second
ys-r
)
(
nth
ys-r
12
))
:MoM
(
growth
(
first
ys-r
)
(
second
ys-r
))
...
...
@@ -311,12 +306,11 @@
t.format/parse
)
(
fn
[{
:keys
[
histogram
histogram-hour
histogram-day
histogram-month
histogram-quarter
]}]
(
let
[
nil-count
(
nil-count
histogram
)
->datetime
(
comp
t.coerce/from-long
long
)]
{
:min
(
->datetime
(
hist/minimum
histogram
))
:max
(
->datetime
(
hist/maximum
histogram
))
:histogram
(
m/map-keys
->datetime
(
pmf
histogram
))
:percentiles
(
m/map-vals
->datetime
(
let
[
nil-count
(
nil-count
histogram
)]
{
:min
(
from-double
(
hist/minimum
histogram
))
:max
(
from-double
(
hist/maximum
histogram
))
:histogram
(
m/map-keys
from-double
(
pmf
histogram
))
:percentiles
(
m/map-vals
from-double
(
apply
hist/percentiles
histogram
percentiles
))
:histogram-hour
(
pmf
histogram-hour
)
...
...
@@ -366,19 +360,20 @@
(
defn-
fingerprint-field
"Transduce given column with corresponding fingerprinter."
[
opts
field
data
]
(
transduce
identity
(
fingerprinter
opts
field
)
data
))
(
->
(
transduce
identity
(
fingerprinter
opts
field
)
data
)
(
assoc
:field
field
)))
(
defn-
fingerprint-query
"Transuce each column in given dataset with corresponding fingerprinter."
[
opts
{
:keys
[
rows
cols
]}]
(
transduce
identity
(
redux/fuse
(
into
{}
(
map-indexed
(
fn
[
i
field
]
[(
:name
field
)
(
redux/pre-step
(
fingerprinter
opts
field
)
#
(
nth
%
i
))
]))
cols
))
(
redux/fuse
(
into
{}
(
for
[[
i
field
]
(
m/indexed
cols
)]
[(
:name
field
)
(
redux/post-complete
(
redux/pre-step
(
fingerprinter
opts
field
)
#
(
nth
%
i
))
#
(
assoc
%
:field
field
))])
))
rows
))
(
def
^
:private
^
:const
^
Long
max-sample-size
10000
)
...
...
@@ -397,8 +392,8 @@
scan), or `:joins` (bring in data from other tables if
needed).
* `:
resolution`
controls pre-aggregation by time. Can be one of `:day`,
`:month`, or `:raw`"
* `:
scale`
controls pre-aggregation by time. Can be one of `:day`,
`:month`, or `:raw`"
#
(
class
%2
))
(
defn-
extract-query-opts
...
...
@@ -408,9 +403,8 @@
(
defmethod
fingerprint
(
class
Field
)
[
opts
field
]
(
let
[
data
(
metadata/field-values
field
(
extract-query-opts
opts
))]
(
->
(
fingerprint-field
opts
field
data
)
(
assoc
:field
field
))))
(
->>
(
metadata/field-values
field
(
extract-query-opts
opts
))
(
fingerprint-field
opts
field
)))
(
defmethod
fingerprint
(
class
Table
)
[
opts
table
]
...
...
@@ -441,9 +435,9 @@
(
defn
multifield-fingerprint
"Holistically fingerprint dataset with multiple columns.
Takes and additional option `:
resolution
` which controls how timeseries data
Takes and additional option `:
scale
` which controls how timeseries data
is aggregated. Possible values: `:month`, `:day`, `:raw`."
[{
:keys
[
resolution
]
:as
opts
}
a
b
]
[{
:keys
[
scale
]
:as
opts
}
a
b
]
(
assert
(
=
(
:table_id
a
)
(
:table_id
b
)))
{
:fingerprint
(
->>
(
metadata/query-values
(
metadata/db-id
a
)
...
...
@@ -451,10 +445,10 @@
(
extract-query-opts
opts
)
(
if
(
and
(
isa?
(
field-type
a
)
DateTime
)
(
isa?
(
field-type
b
)
Num
)
(
not=
resolution
:raw
))
(
not=
scale
:raw
))
{
:source-table
(
:table_id
a
)
:breakout
[[
:datetime-field
[
:field-id
(
:id
a
)]
resolution
]]
scale
]]
:aggregation
[
:sum
[
:field-id
(
:id
b
)]]}
{
:source-table
(
:table_id
a
)
:fields
[[
:field-id
(
:id
a
)]
...
...
This diff is collapsed.
Click to expand it.
test/metabase/fingerprinting_test.clj
+
1
−
2
View file @
eb6a9d45
...
...
@@ -83,8 +83,7 @@
(
defn-
make-timestamp
[
y
m
]
(
->
(
t/date-time
y
m
)
t.coerce/to-long
((
var
f/truncate-timestamp
))))
((
var
f/to-double
))))
(
expect
[[(
make-timestamp
2016
1
)
12
]
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment