diff --git a/frontend/src/metabase/icon_paths.js b/frontend/src/metabase/icon_paths.js index 9c658befe881179fca8f13b1917ae7c912cafcaa..5a2fd4f245bad7d67eb954ea41942416d0197d93 100644 --- a/frontend/src/metabase/icon_paths.js +++ b/frontend/src/metabase/icon_paths.js @@ -53,6 +53,9 @@ export var ICON_PATHS = { }, cursor_move: 'M14.8235294,14.8235294 L14.8235294,6.58823529 L17.1764706,6.58823529 L17.1764706,14.8235294 L25.4117647,14.8235294 L25.4117647,17.1764706 L17.1764706,17.1764706 L17.1764706,25.4117647 L14.8235294,25.4117647 L14.8235294,17.1764706 L6.58823529,17.1764706 L6.58823529,14.8235294 L14.8235294,14.8235294 L14.8235294,14.8235294 Z M16,0 L20.1176471,6.58823529 L11.8823529,6.58823529 L16,0 Z M11.8823529,25.4117647 L20.1176471,25.4117647 L16,32 L11.8823529,25.4117647 Z M32,16 L25.4117647,20.1176471 L25.4117647,11.8823529 L32,16 Z M6.58823529,11.8823529 L6.58823529,20.1176471 L0,16 L6.58823529,11.8823529 Z', cursor_resize: 'M17.4017952,6.81355995 L15.0488541,6.81355995 L15.0488541,25.6370894 L17.4017952,25.6370894 L17.4017952,6.81355995 Z M16.2253247,0.225324657 L20.3429717,6.81355995 L12.1076776,6.81355995 L16.2253247,0.225324657 Z M12.1076776,25.6370894 L20.3429717,25.6370894 L16.2253247,32.2253247 L12.1076776,25.6370894 Z', + costapproximate: 'M27 19a3 3 0 1 1 0-6 3 3 0 0 1 0 6zM16 8a3 3 0 1 1 0-6 3 3 0 0 1 0 6zm0 22a3 3 0 1 1 0-6 3 3 0 0 1 0 6zM5 19a3 3 0 1 1 0-6 3 3 0 0 1 0 6z', + costexact: 'M27 19a3 3 0 1 1 0-6 3 3 0 0 1 0 6zM16 8a3 3 0 1 1 0-6 3 3 0 0 1 0 6zm0 22a3 3 0 1 1 0-6 3 3 0 0 1 0 6zM5 19a3 3 0 1 1 0-6 3 3 0 0 1 0 6zm11 0a3 3 0 1 1 0-6 3 3 0 0 1 0 6z', + costextended: 'M27,19 C25.3431458,19 24,17.6568542 24,16 C24,14.3431458 25.3431458,13 27,13 C28.6568542,13 30,14.3431458 30,16 C30,17.6568542 28.6568542,19 27,19 Z M16,8 C14.3431458,8 13,6.65685425 13,5 C13,3.34314575 14.3431458,2 16,2 C17.6568542,2 19,3.34314575 19,5 C19,6.65685425 17.6568542,8 16,8 Z M16,30 C14.3431458,30 13,28.6568542 13,27 C13,25.3431458 14.3431458,24 16,24 C17.6568542,24 19,25.3431458 19,27 C19,28.6568542 17.6568542,30 16,30 Z M5,19 C3.34314575,19 2,17.6568542 2,16 C2,14.3431458 3.34314575,13 5,13 C6.65685425,13 8,14.3431458 8,16 C8,17.6568542 6.65685425,19 5,19 Z M16,19 C14.3431458,19 13,17.6568542 13,16 C13,14.3431458 14.3431458,13 16,13 C17.6568542,13 19,14.3431458 19,16 C19,17.6568542 17.6568542,19 16,19 Z M10,12 C8.8954305,12 8,11.1045695 8,10 C8,8.8954305 8.8954305,8 10,8 C11.1045695,8 12,8.8954305 12,10 C12,11.1045695 11.1045695,12 10,12 Z M22,12 C20.8954305,12 20,11.1045695 20,10 C20,8.8954305 20.8954305,8 22,8 C23.1045695,8 24,8.8954305 24,10 C24,11.1045695 23.1045695,12 22,12 Z M22,24 C20.8954305,24 20,23.1045695 20,22 C20,20.8954305 20.8954305,20 22,20 C23.1045695,20 24,20.8954305 24,22 C24,23.1045695 23.1045695,24 22,24 Z M10,24 C8.8954305,24 8,23.1045695 8,22 C8,20.8954305 8.8954305,20 10,20 C11.1045695,20 12,20.8954305 12,22 C12,23.1045695 11.1045695,24 10,24 Z', database: 'M1.18285296e-08,10.5127919 C-1.47856568e-08,7.95412848 1.18285298e-08,4.57337284 1.18285298e-08,4.57337284 C1.18285298e-08,4.57337284 1.58371041,5.75351864e-10 15.6571342,0 C29.730558,-5.7535027e-10 31.8900148,4.13849684 31.8900148,4.57337284 L31.8900148,10.4843058 C31.8900148,10.4843058 30.4448001,15.1365942 16.4659751,15.1365944 C2.48715012,15.1365947 2.14244494e-08,11.4353349 1.18285296e-08,10.5127919 Z M0.305419478,21.1290071 C0.305419478,21.1290071 0.0405133833,21.2033291 0.0405133833,21.8492606 L0.0405133833,27.3032816 C0.0405133833,27.3032816 1.46515486,31.941655 15.9641228,31.941655 C30.4630908,31.941655 32,27.3446712 32,27.3446712 C32,27.3446712 32,21.7986104 32,21.7986105 C32,21.2073557 31.6620557,21.0987647 31.6620557,21.0987647 C31.6620557,21.0987647 29.7146434,25.22314 16.0318829,25.22314 C2.34912233,25.22314 0.305419478,21.1290071 0.305419478,21.1290071 Z M0.305419478,12.656577 C0.305419478,12.656577 0.0405133833,12.730899 0.0405133833,13.3768305 L0.0405133833,18.8308514 C0.0405133833,18.8308514 1.46515486,23.4692249 15.9641228,23.4692249 C30.4630908,23.4692249 32,18.8722411 32,18.8722411 C32,18.8722411 32,13.3261803 32,13.3261803 C32,12.7349256 31.6620557,12.6263346 31.6620557,12.6263346 C31.6620557,12.6263346 29.7146434,16.7507099 16.0318829,16.7507099 C2.34912233,16.7507099 0.305419478,12.656577 0.305419478,12.656577 Z', dashboard: 'M32,29 L32,4 L32,0 L0,0 L0,8 L28,8 L28,28 L4,28 L4,8 L0,8 L0,29.5 L0,32 L32,32 L32,29 Z M7.27272727,18.9090909 L17.4545455,18.9090909 L17.4545455,23.2727273 L7.27272727,23.2727273 L7.27272727,18.9090909 Z M7.27272727,12.0909091 L24.7272727,12.0909091 L24.7272727,16.4545455 L7.27272727,16.4545455 L7.27272727,12.0909091 Z M20.3636364,18.9090909 L24.7272727,18.9090909 L24.7272727,23.2727273 L20.3636364,23.2727273 L20.3636364,18.9090909 Z', dashboards: 'M17,5.49100518 L17,10.5089948 C17,10.7801695 17.2276528,11 17.5096495,11 L26.4903505,11 C26.7718221,11 27,10.7721195 27,10.5089948 L27,5.49100518 C27,5.21983051 26.7723472,5 26.4903505,5 L17.5096495,5 C17.2281779,5 17,5.22788048 17,5.49100518 Z M18.5017326,14 C18.225722,14 18,13.77328 18,13.4982674 L18,26.5017326 C18,26.225722 18.22672,26 18.5017326,26 L5.49826741,26 C5.77427798,26 6,26.22672 6,26.5017326 L6,13.4982674 C6,13.774278 5.77327997,14 5.49826741,14 L18.5017326,14 Z M14.4903505,6 C14.2278953,6 14,5.78028538 14,5.49100518 L14,10.5089948 C14,10.2167107 14.2224208,10 14.4903505,10 L5.50964952,10 C5.77210473,10 6,10.2197146 6,10.5089948 L6,5.49100518 C6,5.78328929 5.77757924,6 5.50964952,6 L14.4903505,6 Z M26.5089948,22 C26.2251201,22 26,21.7774008 26,21.4910052 L26,26.5089948 C26,26.2251201 26.2225992,26 26.5089948,26 L21.4910052,26 C21.7748799,26 22,26.2225992 22,26.5089948 L22,21.4910052 C22,21.7748799 21.7774008,22 21.4910052,22 L26.5089948,22 Z M26.5089948,14 C26.2251201,14 26,13.7774008 26,13.4910052 L26,18.5089948 C26,18.2251201 26.2225992,18 26.5089948,18 L21.4910052,18 C21.7748799,18 22,18.2225992 22,18.5089948 L22,13.4910052 C22,13.7748799 21.7774008,14 21.4910052,14 L26.5089948,14 Z M26.4903505,6 C26.2278953,6 26,5.78028538 26,5.49100518 L26,10.5089948 C26,10.2167107 26.2224208,10 26.4903505,10 L17.5096495,10 C17.7721047,10 18,10.2197146 18,10.5089948 L18,5.49100518 C18,5.78328929 17.7775792,6 17.5096495,6 L26.4903505,6 Z M5,13.4982674 L5,26.5017326 C5,26.7769181 5.21990657,27 5.49826741,27 L18.5017326,27 C18.7769181,27 19,26.7800934 19,26.5017326 L19,13.4982674 C19,13.2230819 18.7800934,13 18.5017326,13 L5.49826741,13 C5.22308192,13 5,13.2199066 5,13.4982674 Z M5,5.49100518 L5,10.5089948 C5,10.7801695 5.22765279,11 5.50964952,11 L14.4903505,11 C14.7718221,11 15,10.7721195 15,10.5089948 L15,5.49100518 C15,5.21983051 14.7723472,5 14.4903505,5 L5.50964952,5 C5.22817786,5 5,5.22788048 5,5.49100518 Z M21,21.4910052 L21,26.5089948 C21,26.7801695 21.2278805,27 21.4910052,27 L26.5089948,27 C26.7801695,27 27,26.7721195 27,26.5089948 L27,21.4910052 C27,21.2198305 26.7721195,21 26.5089948,21 L21.4910052,21 C21.2198305,21 21,21.2278805 21,21.4910052 Z M21,13.4910052 L21,18.5089948 C21,18.7801695 21.2278805,19 21.4910052,19 L26.5089948,19 C26.7801695,19 27,18.7721195 27,18.5089948 L27,13.4910052 C27,13.2198305 26.7721195,13 26.5089948,13 L21.4910052,13 C21.2198305,13 21,13.2278805 21,13.4910052 Z', diff --git a/frontend/src/metabase/meta/types/Segment.js b/frontend/src/metabase/meta/types/Segment.js index 1074de8ab6c540e924e927d5ed01fb5effa2262d..37d1101ad7430491e1f41b97dd1c1b684d014a64 100644 --- a/frontend/src/metabase/meta/types/Segment.js +++ b/frontend/src/metabase/meta/types/Segment.js @@ -9,5 +9,6 @@ export type Segment = { name: string, id: SegmentId, table_id: TableId, - is_active: bool + is_active: bool, + description: string }; diff --git a/frontend/src/metabase/qb/components/actions/XRaySegment.jsx b/frontend/src/metabase/qb/components/actions/XRaySegment.jsx new file mode 100644 index 0000000000000000000000000000000000000000..c157dce81e9e6b20b6c36a1faf95ddd0ac628639 --- /dev/null +++ b/frontend/src/metabase/qb/components/actions/XRaySegment.jsx @@ -0,0 +1,26 @@ +/* @flow */ +/* + * NOTE(@kdoh) 8/5/2017 - Disabling this file until we add Card XRay support + * import type { + ClickAction, + ClickActionProps +} from "metabase/meta/types/Visualization"; + +export default ({ card, tableMetadata }: ClickActionProps): ClickAction[] => { + console.log(card); + if (card.id) { + return [ + { + name: "underlying-data", + title: "XRay this Card", + icon: "table", + url: () => { + return "/xray/card/" + card.id; + } + } + ]; + } else { + return []; + } +}; +*/ diff --git a/frontend/src/metabase/qb/components/actions/index.js b/frontend/src/metabase/qb/components/actions/index.js index bfbda41978d864fec0e45d3d4ac9cc32fc7baa8c..4ffc2879f3a722324b9ba3cef3a26d66036b0eaa 100644 --- a/frontend/src/metabase/qb/components/actions/index.js +++ b/frontend/src/metabase/qb/components/actions/index.js @@ -3,4 +3,8 @@ import UnderlyingDataAction from "./UnderlyingDataAction"; import UnderlyingRecordsAction from "./UnderlyingRecordsAction"; -export const DEFAULT_ACTIONS = [UnderlyingDataAction, UnderlyingRecordsAction]; +export const DEFAULT_ACTIONS = [ + UnderlyingDataAction, + UnderlyingRecordsAction + // XRaySegment +]; diff --git a/frontend/src/metabase/qb/components/modes/SegmentMode.jsx b/frontend/src/metabase/qb/components/modes/SegmentMode.jsx index 93eccd58ca0ffe8d754e568150816fd0ae7c65eb..4cb9150614fe14530841fd7d17c5e84cffe523e5 100644 --- a/frontend/src/metabase/qb/components/modes/SegmentMode.jsx +++ b/frontend/src/metabase/qb/components/modes/SegmentMode.jsx @@ -18,6 +18,7 @@ const SegmentMode: QueryMode = { name: "segment", actions: [ ...DEFAULT_ACTIONS, + // XRaySegment, CommonMetricsAction, CountByTimeAction, SummarizeBySegmentMetricAction diff --git a/frontend/src/metabase/reference/databases/FieldSidebar.jsx b/frontend/src/metabase/reference/databases/FieldSidebar.jsx index 171ee257214ed2c526c9209ad9cf78fc9759ed7e..b0ca33652f32e583c233e2ba547d4c791099a011 100644 --- a/frontend/src/metabase/reference/databases/FieldSidebar.jsx +++ b/frontend/src/metabase/reference/databases/FieldSidebar.jsx @@ -32,6 +32,10 @@ const FieldSidebar =({ href={`/reference/databases/${database.id}/tables/${table.id}/fields/${field.id}`} icon="document" name="Details" /> + <SidebarItem key={`/xray/field/${field.id}/approximate`} + href={`/xray/field/${field.id}/approximate`} + icon="document" + name="X-Ray this Field" /> </ul> </div> diff --git a/frontend/src/metabase/reference/databases/TableSidebar.jsx b/frontend/src/metabase/reference/databases/TableSidebar.jsx index c221a3d3ed06c4b1b282764e05ffbd60b09c9650..2b294109cbe5d1a0c028c80f696dcf0157ff5d9b 100644 --- a/frontend/src/metabase/reference/databases/TableSidebar.jsx +++ b/frontend/src/metabase/reference/databases/TableSidebar.jsx @@ -39,6 +39,10 @@ const TableSidebar = ({ href={`/reference/databases/${database.id}/tables/${table.id}/questions`} icon="all" name="Questions about this table" /> + <SidebarItem key={`/xray/table/${table.id}/approximate`} + href={`/xray/table/${table.id}/approximate`} + icon="all" + name="X-Ray this table" /> </ol> </div> diff --git a/frontend/src/metabase/reference/metrics/MetricDetail.jsx b/frontend/src/metabase/reference/metrics/MetricDetail.jsx index e72f971cf43086e2e72a5682a91dac6756b7c4fd..981fef4ed3348e3b40a04834d6b86ab402faba13 100644 --- a/frontend/src/metabase/reference/metrics/MetricDetail.jsx +++ b/frontend/src/metabase/reference/metrics/MetricDetail.jsx @@ -7,7 +7,6 @@ import { push } from "react-router-redux"; import List from "metabase/components/List.jsx"; import LoadingAndErrorWrapper from "metabase/components/LoadingAndErrorWrapper.jsx"; - import EditHeader from "metabase/reference/components/EditHeader.jsx"; import EditableReferenceHeader from "metabase/reference/components/EditableReferenceHeader.jsx"; import Detail from "metabase/reference/components/Detail.jsx"; diff --git a/frontend/src/metabase/reference/reference.js b/frontend/src/metabase/reference/reference.js index 8145614b49d61711c5f0517ac2b109545be2e974..ff94c989aa9227c8211c573a8420b3427d16c7a2 100644 --- a/frontend/src/metabase/reference/reference.js +++ b/frontend/src/metabase/reference/reference.js @@ -10,11 +10,11 @@ import { import MetabaseAnalytics from 'metabase/lib/analytics'; -import { GettingStartedApi } from "metabase/services"; +import { GettingStartedApi, XRayApi } from 'metabase/services'; -import { - filterUntouchedFields, - isEmptyObject +import { + filterUntouchedFields, + isEmptyObject } from "./utils.js" export const FETCH_GUIDE = "metabase/reference/FETCH_GUIDE"; @@ -74,6 +74,117 @@ export const showDashboardModal = createAction(SHOW_DASHBOARD_MODAL); export const hideDashboardModal = createAction(HIDE_DASHBOARD_MODAL); +// Xray Fetch Actions +// These actions are used to fetch Xray fingerprints and comparisons. Most take a cost which +// is used by the backend to figure out how precise to be when generating the xray stats. + +const FETCH_FIELD_FINGERPRINT = 'metabase/reference/FETCH_FIELD_FINGERPRINT'; +export const fetchFieldFingerPrint = createThunkAction(FETCH_FIELD_FINGERPRINT, function(fieldId, cost) { + return async () => { + try { + let fingerprint = await XRayApi.field_fingerprint({ fieldId, ...cost.method }); + return fingerprint; + } catch (error) { + console.error(error); + } + }; +}); + +const FETCH_TABLE_FINGERPRINT = 'metabase/reference/FETCH_TABLE_FINGERPRINT'; +export const fetchTableFingerPrint = createThunkAction(FETCH_TABLE_FINGERPRINT, function(tableId, cost) { + return async () => { + try { + let fingerprint = await XRayApi.table_fingerprint({ tableId, ...cost.method }); + return fingerprint; + } catch (error) { + console.error(error); + } + }; +}); + + +const FETCH_SEGMENT_FINGERPRINT = 'metabase/reference/FETCH_SEGMENT_FINGERPRINT'; +export const fetchSegmentFingerPrint = createThunkAction(FETCH_SEGMENT_FINGERPRINT, function(segmentId, cost) { + return async () => { + try { + let fingerprint = await XRayApi.segment_fingerprint({ segmentId, ...cost.method }); + return fingerprint; + } catch (error) { + console.error(error); + } + }; +}); + +const FETCH_CARD_FINGERPRINT = 'metabase/reference/FETCH_CARD_FINGERPRINT'; +export const fetchCardFingerPrint = createThunkAction(FETCH_CARD_FINGERPRINT, function(cardId) { + return async () => { + try { + let fingerprint = await XRayApi.card_fingerprint({ cardId }); + return fingerprint; + } catch (error) { + console.error(error); + } + }; +}); + +const FETCH_FIELD_COMPARISON = 'metabase/reference/FETCH_FIELD_COMPARISON'; +export const fetchFieldComparison = createThunkAction(FETCH_FIELD_COMPARISON, function(fieldId1, fieldId2) { + return async () => { + try { + let comparison = await XRayApi.field_compare({ fieldId1, fieldId2 }) + return comparison + } catch (error) { + console.error(error) + } + } +}) +const FETCH_TABLE_COMPARISON = 'metabase/reference/FETCH_TABLE_COMPARISON'; +export const fetchTableComparison = createThunkAction(FETCH_TABLE_COMPARISON, function(tableId1, tableId2) { + return async () => { + try { + let comparison = await XRayApi.table_compare({ tableId1, tableId2 }) + return comparison + } catch (error) { + console.error(error) + } + } +}) + +const FETCH_SEGMENT_COMPARISON = 'metabase/reference/FETCH_SEGMENT_COMPARISON'; +export const fetchSegmentComparison = createThunkAction(FETCH_SEGMENT_COMPARISON, function(segmentId1, segmentId2) { + return async () => { + try { + let comparison = await XRayApi.segment_compare({ segmentId1, segmentId2 }) + return comparison + } catch (error) { + console.error(error) + } + } +}) + +const FETCH_METRIC_COMPARISON = 'metabase/reference/FETCH_METRIC_COMPARISON'; +export const fetchMetricComparison = createThunkAction(FETCH_METRIC_COMPARISON, function(metricId1, metricId2) { + return async () => { + try { + let comparison = await XRayApi.metric_compare({ metricId1, metricId2 }) + return comparison + } catch (error) { + console.error(error) + } + } +}) + +const FETCH_CARD_COMPARISON = 'metabase/reference/FETCH_CARD_COMPARISON'; +export const fetchCardComparison = createThunkAction(FETCH_CARD_COMPARISON, function(cardId1, cardId2) { + return async () => { + try { + let comparison = await XRayApi.card_compare({ cardId1, cardId2 }) + return comparison + } catch (error) { + console.error(error) + } + } +}) // Helper functions. This is meant to be a transitional state to get things out of tryFetchData() and friends @@ -96,8 +207,8 @@ const fetchDataWrapper = (props, fn) => { export const wrappedFetchGuide = async (props) => { fetchDataWrapper( - props, - async () => { + props, + async () => { await Promise.all( [props.fetchGuide(), props.fetchDashboards(), @@ -114,8 +225,8 @@ export const wrappedFetchDatabaseMetadata = (props, databaseID) => { export const wrappedFetchDatabaseMetadataAndQuestion = async (props, databaseID) => { fetchDataWrapper( - props, - async (dbID) => { + props, + async (dbID) => { await Promise.all( [props.fetchDatabaseMetadata(dbID), props.fetchQuestions()] @@ -125,11 +236,11 @@ export const wrappedFetchDatabaseMetadataAndQuestion = async (props, databaseID) export const wrappedFetchMetricDetail = async (props, metricID) => { fetchDataWrapper( - props, - async (mID) => { + props, + async (mID) => { await Promise.all( [props.fetchMetricTable(mID), - props.fetchMetrics(), + props.fetchMetrics(), props.fetchGuide()] )} )(metricID) @@ -137,11 +248,11 @@ export const wrappedFetchMetricDetail = async (props, metricID) => { export const wrappedFetchMetricQuestions = async (props, metricID) => { fetchDataWrapper( - props, - async (mID) => { + props, + async (mID) => { await Promise.all( [props.fetchMetricTable(mID), - props.fetchMetrics(), + props.fetchMetrics(), props.fetchQuestions()] )} )(metricID) @@ -149,8 +260,8 @@ export const wrappedFetchMetricQuestions = async (props, metricID) => { export const wrappedFetchMetricRevisions = async (props, metricID) => { fetchDataWrapper( - props, - async (mID) => { + props, + async (mID) => { await Promise.all( [props.fetchMetricRevisions(mID), props.fetchMetrics()] @@ -194,8 +305,8 @@ export const wrappedFetchSegmentDetail = (props, segmentID) => { export const wrappedFetchSegmentQuestions = async (props, segmentID) => { fetchDataWrapper( - props, - async (sID) => { + props, + async (sID) => { await props.fetchSegments(sID); await Promise.all( [props.fetchSegmentTable(sID), @@ -206,8 +317,8 @@ export const wrappedFetchSegmentQuestions = async (props, segmentID) => { export const wrappedFetchSegmentRevisions = async (props, segmentID) => { fetchDataWrapper( - props, - async (sID) => { + props, + async (sID) => { await props.fetchSegments(sID); await Promise.all( [props.fetchSegmentRevisions(sID), @@ -218,8 +329,8 @@ export const wrappedFetchSegmentRevisions = async (props, segmentID) => { export const wrappedFetchSegmentFields = async (props, segmentID) => { fetchDataWrapper( - props, - async (sID) => { + props, + async (sID) => { await props.fetchSegments(sID); await Promise.all( [props.fetchSegmentFields(sID), @@ -229,7 +340,7 @@ export const wrappedFetchSegmentFields = async (props, segmentID) => { } // This is called when a component gets a new set of props. -// I *think* this is un-necessary in all cases as we're using multiple +// I *think* this is un-necessary in all cases as we're using multiple // components where the old code re-used the same component export const clearState = props => { props.endEditing(); @@ -247,9 +358,9 @@ const resetForm = (props) => { } // Update actions -// these use the "fetchDataWrapper" for now. It should probably be renamed. -// Using props to fire off actions, which imo should be refactored to -// dispatch directly, since there is no actual dependence with the props +// these use the "fetchDataWrapper" for now. It should probably be renamed. +// Using props to fire off actions, which imo should be refactored to +// dispatch directly, since there is no actual dependence with the props // of that component const updateDataWrapper = (props, fn) => { @@ -542,6 +653,7 @@ export const tryUpdateGuide = async (formFields, props) => { endEditing(); }; + const initialState = { error: null, isLoading: false, @@ -553,6 +665,18 @@ export default handleActions({ [FETCH_GUIDE]: { next: (state, { payload }) => assoc(state, 'guide', payload) }, + [FETCH_FIELD_FINGERPRINT]: { + next: (state, { payload }) => assoc(state, 'fieldFingerprint', payload) + }, + [FETCH_TABLE_FINGERPRINT]: { + next: (state, { payload }) => assoc(state, 'tableFingerprint', payload) + }, + [FETCH_SEGMENT_FINGERPRINT]: { + next: (state, { payload }) => assoc(state, 'segmentFingerprint', payload) + }, + [FETCH_FIELD_COMPARISON]: { + next: (state, { payload }) => assoc(state, 'fieldComparison', payload) + }, [SET_ERROR]: { throw: (state, { payload }) => assoc(state, 'error', payload) }, diff --git a/frontend/src/metabase/reference/segments/SegmentSidebar.jsx b/frontend/src/metabase/reference/segments/SegmentSidebar.jsx index fc955e1a6a0045fd57ffcd8af3a97a5037556d9b..8e05aecebcd288e41bebbf267ee6ab964e2adac2 100644 --- a/frontend/src/metabase/reference/segments/SegmentSidebar.jsx +++ b/frontend/src/metabase/reference/segments/SegmentSidebar.jsx @@ -38,6 +38,10 @@ const SegmentSidebar = ({ href={`/reference/segments/${segment.id}/questions`} icon="all" name={`Questions about this segment`} /> + <SidebarItem key={`/xray/segment/${segment.id}/approximate`} + href={`/xray/segment/${segment.id}/approximate`} + icon="all" + name={`X-Ray this segment`} /> { user && user.is_superuser && <SidebarItem key={`/reference/segments/${segment.id}/revisions`} diff --git a/frontend/src/metabase/reference/selectors.js b/frontend/src/metabase/reference/selectors.js index 7539d3c7ae23e80b5921010c47d26491e331b600..3a508879eae0b209fc865efcce130045de5f5ded 100644 --- a/frontend/src/metabase/reference/selectors.js +++ b/frontend/src/metabase/reference/selectors.js @@ -169,3 +169,28 @@ export const getGuide = (state, props) => state.reference.guide; export const getDashboards = (state, props) => getDashboardListing(state) && resourceListToMap(getDashboardListing(state)); export const getIsDashboardModalOpen = (state, props) => state.reference.isDashboardModalOpen; + + +export const getFieldFingerprint = (state) => + state.reference.fieldFingerprint && state.reference.fieldFingerprint.fingerprint + +export const getTableFingerprint = (state) => + state.reference.tableFingerprint && state.reference.tableFingerprint.fingerprint + +export const getSegmentFingerprint = (state) => + state.reference.segmentFingerprint && state.reference.segmentFingerprint.fingerprint + +export const getTableConstituents = (state) => + state.reference.tableFingerprint && ( + Object.keys(state.reference.tableFingerprint.constituents).map(key => + state.reference.tableFingerprint.constituents[key] + ) + ) + +export const getSegmentConstituents = (state) => + state.reference.segmentFingerprint && ( + Object.keys(state.reference.segmentFingerprint.constituents).map(key => + state.reference.segmentFingerprint.constituents[key] + ) + ) + diff --git a/frontend/src/metabase/routes.jsx b/frontend/src/metabase/routes.jsx index d24593c3cb2ee89bf1b62b676766cc105d16ab5e..4e9807668f08a1a89348a0b4c18925de2d8f8d36 100644 --- a/frontend/src/metabase/routes.jsx +++ b/frontend/src/metabase/routes.jsx @@ -57,7 +57,7 @@ import Unauthorized from "metabase/components/Unauthorized.jsx"; // Reference Guide import GettingStartedGuideContainer from "metabase/reference/guide/GettingStartedGuideContainer.jsx"; -// Reference Metrics +// Reference Metrics import MetricListContainer from "metabase/reference/metrics/MetricListContainer.jsx"; import MetricDetailContainer from "metabase/reference/metrics/MetricDetailContainer.jsx"; import MetricQuestionsContainer from "metabase/reference/metrics/MetricQuestionsContainer.jsx"; @@ -79,6 +79,18 @@ import FieldListContainer from "metabase/reference/databases/FieldListContainer. import FieldDetailContainer from "metabase/reference/databases/FieldDetailContainer.jsx"; +/* XRay */ +import FieldXRay from "metabase/xray/containers/FieldXray.jsx"; +import TableXRay from "metabase/xray/containers/TableXRay.jsx"; +import SegmentXRay from "metabase/xray/containers/SegmentXRay.jsx"; +import CardXRay from "metabase/xray/containers/CardXRay.jsx"; + +/* Comparisons */ +import FieldComparison from "metabase/xray/containers/FieldComparison.jsx"; +import TableComparison from "metabase/xray/containers/TableComparison.jsx"; +import SegmentComparison from "metabase/xray/containers/SegmentComparison.jsx"; +import CardComparison from "metabase/xray/containers/CardComparison.jsx"; + import getAdminPermissionsRoutes from "metabase/admin/permissions/routes.jsx"; import PeopleListingApp from "metabase/admin/people/containers/PeopleListingApp.jsx"; @@ -229,6 +241,17 @@ export const getRoutes = (store) => <Route path="databases/:databaseId/tables/:tableId/fields/:fieldId" component={FieldDetailContainer} /> <Route path="databases/:databaseId/tables/:tableId/questions" component={TableQuestionsContainer} /> </Route> + {/* REFERENCE */} + <Route path="/xray" title="XRay"> + <Route path="segment/:segmentId/:cost" component={SegmentXRay} /> + <Route path="table/:tableId/:cost" component={TableXRay} /> + <Route path="field/:fieldId/:cost" component={FieldXRay} /> + <Route path="card/:cardId" component={CardXRay} /> + <Route path="compare/fields/:fieldId1/:fieldId2" component={FieldComparison} /> + <Route path="compare/tables/:tableId1/:tableId2" component={TableComparison} /> + <Route path="compare/segments/:segmentId1/:segmentId2" component={SegmentComparison} /> + <Route path="compare/cards/:cardId1/:cardId2" component={CardComparison} /> + </Route> {/* PULSE */} <Route path="/pulse" title="Pulses"> diff --git a/frontend/src/metabase/services.js b/frontend/src/metabase/services.js index 736da5a3eda9200eef53923c229874f335c77ac9..468299f5f87072d8addf80a521e19f8ff1b55246 100644 --- a/frontend/src/metabase/services.js +++ b/frontend/src/metabase/services.js @@ -132,7 +132,22 @@ export const MetabaseApi = { field_dimension_update: POST("/api/field/:fieldId/dimension"), field_dimension_delete: DELETE("/api/field/:fieldId/dimension"), dataset: POST("/api/dataset"), - dataset_duration: POST("/api/dataset/duration"), + dataset_duration: POST("/api/dataset/duration") +}; + +export const XRayApi = { + // X-Rays + field_fingerprint: GET("api/fingerprint/field/:fieldId"), + table_fingerprint: GET("api/fingerprint/table/:tableId"), + segment_fingerprint: GET("api/fingerprint/segment/:segmentId"), + card_fingerprint: GET("api/fingerprint/card/:cardId"), + + // Comparisons + // TODO - the api is currently set where compare is nested under fingerprint + field_compare: GET("api/fingerprint/compare/fields/:fieldId1/:fieldId2"), + table_compare: GET("api/fingerprint/compare/table/:tableId/:otherTableId"), + segment_compare: GET("api/fingerprint/compare/segment/:segmentId/:otherSegmentId"), + card_compare: GET("api/fingerprint/compare/card/:cardId/:otherCardId") }; export const PulseApi = { diff --git a/frontend/src/metabase/xray/Histogram.jsx b/frontend/src/metabase/xray/Histogram.jsx new file mode 100644 index 0000000000000000000000000000000000000000..ad57ecc5ffee465442a43722428b03137459f5ae --- /dev/null +++ b/frontend/src/metabase/xray/Histogram.jsx @@ -0,0 +1,20 @@ +import React from 'react' +import Visualization from 'metabase/visualizations/components/Visualization' + +const Histogram = ({ histogram }) => + <Visualization + className="full-height" + series={[ + { + card: { + display: "bar", + visualization_settings: {} + }, + data: histogram + } + ]} + showTitle={false} + /> + +export default Histogram + diff --git a/frontend/src/metabase/xray/SimpleStat.jsx b/frontend/src/metabase/xray/SimpleStat.jsx new file mode 100644 index 0000000000000000000000000000000000000000..46c9b81ebbc2da7e6bcacece8cfbe2ede8033450 --- /dev/null +++ b/frontend/src/metabase/xray/SimpleStat.jsx @@ -0,0 +1,22 @@ +import React from 'react' +import Tooltip from 'metabase/components/Tooltip' +import Icon from 'metabase/components/Icon' + +const SimpleStat = ({ stat, showDescription }) => + <div> + <div className="flex align-center"> + <h3 className="mr1 text-grey-4">{stat.label}</h3> + { showDescription && ( + <Tooltip tooltip={stat.description}> + <Icon name='infooutlined' /> + </Tooltip> + )} + </div> + { /* call toString to ensure that values like true / false show up */ } + <h1 className="my1"> + {stat.value.toString()} + </h1> + </div> + +export default SimpleStat + diff --git a/frontend/src/metabase/xray/components/Constituent.jsx b/frontend/src/metabase/xray/components/Constituent.jsx new file mode 100644 index 0000000000000000000000000000000000000000..9204f85788e2c453ccb26a15ac6db0379b4107ae --- /dev/null +++ b/frontend/src/metabase/xray/components/Constituent.jsx @@ -0,0 +1,40 @@ +import React from 'react' +import { Link } from 'react-router' + +import Histogram from 'metabase/xray/Histogram' +import SimpleStat from 'metabase/xray/SimpleStat' + +const Constituent = ({constituent}) => + <Link + to={`xray/field/${constituent.field.id}/approximate`} + className="no-decoration" + > + <div className="Grid my3 bg-white bordered rounded shadowed shadow-hover no-decoration"> + <div className="Grid-cell Cell--1of3 border-right"> + <div className="p4"> + <h2 className="text-bold text-brand">{constituent.field.display_name}</h2> + <p className="text-measure text-paragraph">{constituent.field.description}</p> + + <div className="flex align-center"> + { constituent.min && ( + <SimpleStat + stat={constituent.min} + /> + )} + { constituent.max && ( + <SimpleStat + stat={constituent.max} + /> + )} + </div> + </div> + </div> + <div className="Grid-cell p3"> + <div style={{ height: 220 }}> + { constituent.histogram && (<Histogram histogram={constituent.histogram.value} />) } + </div> + </div> + </div> + </Link> + +export default Constituent diff --git a/frontend/src/metabase/xray/components/CostSelect.jsx b/frontend/src/metabase/xray/components/CostSelect.jsx new file mode 100644 index 0000000000000000000000000000000000000000..675c84e11f603931bd1f47e5b551398ccf049537 --- /dev/null +++ b/frontend/src/metabase/xray/components/CostSelect.jsx @@ -0,0 +1,41 @@ +import React from 'react' +import cx from 'classnames' +import { Link } from 'react-router' + +import Icon from 'metabase/components/Icon' +import Tooltip from 'metabase/components/Tooltip' + +import COSTS from 'metabase/xray/costs' + +const CostSelect = ({ currentCost, xrayType, id }) => + <ol className="bordered rounded shadowed bg-white flex align-center overflow-hidden"> + { Object.keys(COSTS).map(cost => { + const c = COSTS[cost] + return ( + <Link + to={`/xray/${xrayType}/${id}/${cost}`} + className="no-decoration" + > + <li + key={cost} + className={cx( + "flex align-center justify-center cursor-pointer bg-brand-hover text-white-hover transition-background transition-text text-grey-2", + { 'bg-brand text-white': currentCost === cost } + )} + > + <Tooltip + tooltip={c.description} + > + <Icon + size={32} + name={c.icon} + className="p1 border-right" + /> + </Tooltip> + </li> + </Link> + ) + })} + </ol> + +export default CostSelect diff --git a/frontend/src/metabase/xray/components/StatGroup.jsx b/frontend/src/metabase/xray/components/StatGroup.jsx new file mode 100644 index 0000000000000000000000000000000000000000..8531df06b304b077ca2ccfe9b147e3b4c40f9f07 --- /dev/null +++ b/frontend/src/metabase/xray/components/StatGroup.jsx @@ -0,0 +1,29 @@ +import React from 'react' +import { Heading } from 'metabase/xray/components/XRayLayout' +import SimpleStat from 'metabase/xray/SimpleStat' + +const atLeastOneStat = (fingerprint, stats) => + stats.filter(s => fingerprint[s]).length > 0 + +const StatGroup = ({ heading, fingerprint, stats, showDescriptions }) => + atLeastOneStat(fingerprint, stats) && ( + <div className="my4"> + <Heading heading={heading} /> + <div className="bordered rounded shadowed bg-white"> + <ol className="Grid Grid--1of4"> + { stats.map(stat => + !!fingerprint[stat] && ( + <li className="Grid-cell lg-p3 lg-px4 border-right border-bottom" key={stat}> + <SimpleStat + stat={fingerprint[stat]} + showDescription={showDescriptions} + /> + </li> + ) + )} + </ol> + </div> + </div> + ) + +export default StatGroup diff --git a/frontend/src/metabase/xray/components/XRayLayout.jsx b/frontend/src/metabase/xray/components/XRayLayout.jsx new file mode 100644 index 0000000000000000000000000000000000000000..6c25f9718156cd848d48ef13c19e56b325846a7d --- /dev/null +++ b/frontend/src/metabase/xray/components/XRayLayout.jsx @@ -0,0 +1,13 @@ +import React from 'react' + + +// A small wrapper to get consistent page structure +export const XRayPageWrapper = ({ children }) => + <div className="wrapper bg-slate-extra-light pb4 full-height" style={{ paddingLeft: '6em', paddingRight: '6em' }}> + { children } + </div> + + +// A unified heading for XRay pages +export const Heading = ({ heading }) => + <h2 className="py3">{heading}</h2> diff --git a/frontend/src/metabase/xray/containers/CardComparison.jsx b/frontend/src/metabase/xray/containers/CardComparison.jsx new file mode 100644 index 0000000000000000000000000000000000000000..9437fd16cac71e70638d2ae612fbffb287f1565c --- /dev/null +++ b/frontend/src/metabase/xray/containers/CardComparison.jsx @@ -0,0 +1,31 @@ +import React, { Component } from 'react' +import { connect } from 'react-redux' + +import { fetchCardComparison } from 'metabase/reference/reference' + +import LoadingAndErrorWrapper from 'metabase/components/LoadingAndErrorWrapper' + +const mapStateToProps = state => ({ + cardComparison: state.reference.cardComparison +}) + +const mapDispatchToProps = { + fetchCardComparison +} + +class CardComparison extends Component { + componentWillMount () { + const { cardId1, cardId2 } = this.props.params + console.log('ids', cardId1, cardId2) + this.props.fetchCardComparison(cardId1, cardId2) + } + render () { + return ( + <LoadingAndErrorWrapper loading={!this.props.cardComparison}> + { JSON.stringify(this.props.cardComparison, null, 2) } + </LoadingAndErrorWrapper> + ) + } +} + +export default connect(mapStateToProps, mapDispatchToProps)(CardComparison) diff --git a/frontend/src/metabase/xray/containers/CardXRay.jsx b/frontend/src/metabase/xray/containers/CardXRay.jsx new file mode 100644 index 0000000000000000000000000000000000000000..10d28fdfe49a519cd1058640878844b415c69f1a --- /dev/null +++ b/frontend/src/metabase/xray/containers/CardXRay.jsx @@ -0,0 +1,190 @@ +import React, { Component } from 'react' + +import { connect } from 'react-redux' + +import { fetchCardFingerPrint } from 'metabase/reference/reference' + +import LoadingAndErrorWrapper from 'metabase/components/LoadingAndErrorWrapper' +import SimpleStat from 'metabase/xray/SimpleStat' + +type Props = { + fetchCardFingerPrint: () => void, + fingerprint: {} +} + +const FingerPrintList = ({ fingerprint }) => + <div> + <ol className="full"> + { Object.keys(fingerprint).map(fieldName => { + const f = fingerprint[fieldName] + return ( + <li key={fieldName}> + <h4>{fieldName}</h4> + <div> + <ol className="Grid"> + <li className="Grid-cell"> + <SimpleStat name="Min" data={f.min} /> + </li> + <li className="Grid-cell"> + <SimpleStat name="Skewness" data={f.skewness} /> + </li> + <li className="Grid-cell"> + <SimpleStat name="Mean" data={f.mean} /> + </li> + </ol> + </div> + </li> + ) + })} + </ol> + </div> + +const FingerprintGrid = ({ fingerprint, fields, distribution }) => + <div className="full"> + <ol> + <li className="border-bottom border-dark"> + <ol className="Grid Grid--gutters"> + <li className="Grid-cell"> + <h3>Field</h3> + </li> + { fields.map(field => + <li className="Grid-cell"> + <h3>{field}</h3> + </li> + )} + { distribution && ( + <li className="Grid-cell"> + <h3>Distribution</h3> + </li> + )} + </ol> + </li> + { Object.keys(fingerprint).map(key => { + const field = fingerprint[key] + return ( + <li className="border-bottom"> + <ol className="Grid Grid--gutters"> + <li className="Grid-cell"> + <a className="link text-bold">{key}</a> + </li> + { fields.map(f => + <li className="Grid-cell"> + { field[f] } + </li> + )} + { /* + <li className="Grid-cell"> + { field['has-nils?'] } + </li> + <li className="Grid-cell"> + { field['all-distinct?'] } + </li> + <li className="Grid-cell"> + { field.mean } + </li> + <li className="Grid-cell"> + { field.min } + </li> + <li className="Grid-cell"> + { field.max } + </li> + <li className="Grid-cell"> + { field.median } + </li> + */} + { distribution && ( + <li className="Grid-cell"> + </li> + )} + </ol> + </li> + ) + })} + </ol> + </div> + +class CardXRay extends Component { + props: Props + + state = { + grid: true + } + + componentDidMount () { + this.props.fetchCardFingerPrint(this.props.params.cardId) + } + + + render () { + const { fingerprint } = this.props + return ( + <div className="wrapper" style={{ marginLeft: '6em', marginRight: '6em'}}> + <div className="my4 py4"> + <h1>Xray</h1> + </div> + <LoadingAndErrorWrapper loading={!fingerprint}> + { () => + <div className="full"> + { this.state.grid ?( + <div className="mt3"> + <div className="my4"> + <h2 className="py3 my3">Overview</h2> + <FingerprintGrid + fingerprint={fingerprint} + fields={['count', 'min', 'max', 'mean', 'median']} + distribution={false} + /> + </div> + <div className="my4"> + <h2 className="py3 my3">I am a cool math wizard</h2> + <FingerprintGrid + fingerprint={fingerprint} + fields={['skewness', 'has-nils?', 'all-distinct?', 'range-vs-spread', 'sum-of-squares', 'range-vs-sd']} + distribution={true} + /> + </div> + { fingerprint['CREATED_AT'] && ( + <div className="my4"> + <h2 className="py3 my3">Time breakdown</h2> + <div className="my3"> + <h4>Hour</h4> + </div> + <div className="my3"> + <h4>Day</h4> + </div> + <div className="my3"> + <h4>Month</h4> + </div> + <div className="my3"> + <h4>Quarter</h4> + </div> + </div> + )} + </div> + ) + : ( + <FingerPrintList fingerprint={fingerprint} /> + )} + <pre> + + <code> + { JSON.stringify(fingerprint, null, 2) } + </code> + </pre> + </div> + } + </LoadingAndErrorWrapper> + </div> + ) + } +} + +const mapStateToProps = state => ({ + fingerprint: state.reference.tableFingerprint, +}) + +const mapDispatchToProps = { + fetchCardFingerPrint: fetchCardFingerPrint +} + +export default connect(mapStateToProps, mapDispatchToProps)(CardXRay) diff --git a/frontend/src/metabase/xray/containers/FieldComparison.jsx b/frontend/src/metabase/xray/containers/FieldComparison.jsx new file mode 100644 index 0000000000000000000000000000000000000000..fb1204c8125eb1ec859bcedab9bfae66d62f3c99 --- /dev/null +++ b/frontend/src/metabase/xray/containers/FieldComparison.jsx @@ -0,0 +1,31 @@ +import React, { Component } from 'react' +import { connect } from 'react-redux' + +import { fetchFieldComparison } from 'metabase/reference/reference' + +import LoadingAndErrorWrapper from 'metabase/components/LoadingAndErrorWrapper' + +const mapStateToProps = state => ({ + fieldComparison: state.reference.fieldComparison +}) + +const mapDispatchToProps = { + fetchFieldComparison +} + +class FieldComparison extends Component { + componentWillMount () { + const { fieldId1, fieldId2 } = this.props.params + console.log('ids', fieldId1, fieldId2) + this.props.fetchFieldComparison(fieldId1, fieldId2) + } + render () { + return ( + <LoadingAndErrorWrapper loading={!this.props.fieldComparison}> + { JSON.stringify(this.props.fieldComparison, null, 2) } + </LoadingAndErrorWrapper> + ) + } +} + +export default connect(mapStateToProps, mapDispatchToProps)(FieldComparison) diff --git a/frontend/src/metabase/xray/containers/FieldXray.jsx b/frontend/src/metabase/xray/containers/FieldXray.jsx new file mode 100644 index 0000000000000000000000000000000000000000..1e050a6b8a77a38af89b0ad997c6b8362a3bb1a2 --- /dev/null +++ b/frontend/src/metabase/xray/containers/FieldXray.jsx @@ -0,0 +1,189 @@ +/* @flow */ +import React, { Component } from 'react' + +import { connect } from 'react-redux' +import title from 'metabase/hoc/Title' +import { Link } from 'react-router' + +import { isDate } from 'metabase/lib/schema_metadata' +import { fetchFieldFingerPrint } from 'metabase/reference/reference' +import { getFieldFingerprint } from 'metabase/reference/selectors' + +import COSTS from 'metabase/xray/costs' + +import { + PERIODICITY, + ROBOTS, + STATS_OVERVIEW, + VALUES_OVERVIEW +} from 'metabase/xray/stats' + +import Icon from 'metabase/components/Icon' +import LoadingAndErrorWrapper from 'metabase/components/LoadingAndErrorWrapper' +import CostSelect from 'metabase/xray/components/CostSelect' +import StatGroup from 'metabase/xray/components/StatGroup' +import Histogram from 'metabase/xray/Histogram' +import { Heading, XRayPageWrapper } from 'metabase/xray/components/XRayLayout' + +import type { Field } from 'metabase/meta/types/Field' +import type { Table } from 'metabase/meta/types/Table' + +type Props = { + fetchFieldFingerPrint: () => void, + fingerprint: { + table: Table, + field: Field, + histogram: { + value: {} + } + }, + params: { + cost: string, + fieldId: number + }, +} + +const Periodicity = ({fingerprint}) => + <div> + <Heading heading="Time breakdown" />, + <div className="bg-white bordered rounded shadowed"> + <div className="Grid Grid--gutters Grid--1of4"> + { PERIODICITY.map(period => + fingerprint[`histogram-${period}`] && ( + <div className="Grid-cell"> + <div className="p4 border-right border-bottom"> + <div style={{ height: 120}}> + <h4> + {fingerprint[`histogram-${period}`].label} + </h4> + <Histogram + histogram={fingerprint[`histogram-${period}`].value} + /> + </div> + </div> + </div> + ) + )} + </div> + </div> + </div> + +const mapStateToProps = state => ({ + fingerprint: getFieldFingerprint(state) +}) + +const mapDispatchToProps = { + fetchFieldFingerPrint +} + +@connect(mapStateToProps, mapDispatchToProps) +@title(({ fingerprint }) => fingerprint && fingerprint.field.display_name || "Field") +class FieldXRay extends Component { + props: Props + + state = { + error: null + } + + componentDidMount () { + this.fetchFieldFingerprint() + } + + async fetchFieldFingerprint() { + const { params } = this.props + const cost = COSTS[params.cost] + try { + await this.props.fetchFieldFingerPrint(params.fieldId, cost) + } catch (error) { + this.setState({ error }) + } + + } + + componentDidUpdate (prevProps: Props) { + if(prevProps.params.cost !== this.props.params.cost) { + this.fetchFieldFingerprint() + } + } + + render () { + const { fingerprint, params } = this.props + const { error } = this.state + return ( + <LoadingAndErrorWrapper + loading={!fingerprint} + error={error} + noBackground + > + { () => + <XRayPageWrapper> + <div className="full"> + <div className="my3 flex align-center"> + <div> + <Link + className="my2 px2 text-bold text-brand-hover inline-block bordered bg-white p1 h4 no-decoration rounded shadowed" + to={`/xray/table/${fingerprint.table.id}/approximate`} + > + {fingerprint.table.display_name} + </Link> + <h1 className="mt2 flex align-center"> + {fingerprint.field.display_name} + <Icon name="chevronright" className="mx1 text-grey-3" size={16} /> + <span className="text-grey-3">XRay</span> + </h1> + <p className="mt1 text-paragraph text-measure"> + {fingerprint.field.description} + </p> + </div> + <div className="ml-auto flex align-center"> + <h3 className="mr2 text-grey-3">Fidelity</h3> + <CostSelect + xrayType='field' + id={fingerprint.field.id} + currentCost={params.cost} + /> + </div> + </div> + <div className="mt4"> + <Heading heading="Distribution" /> + <div className="bg-white bordered shadowed"> + <div className="lg-p4"> + <div style={{ height: 300 }}> + <Histogram histogram={fingerprint.histogram.value} /> + </div> + </div> + </div> + </div> + + { isDate(fingerprint.field) && <Periodicity fingerprint={fingerprint} /> } + + <StatGroup + heading="Values overview" + fingerprint={fingerprint} + stats={VALUES_OVERVIEW} + /> + + <StatGroup + heading="Statistical overview" + fingerprint={fingerprint} + showDescriptions + stats={STATS_OVERVIEW} + /> + + <StatGroup + heading="Robots" + fingerprint={fingerprint} + showDescriptions + stats={ROBOTS} + /> + </div> + </XRayPageWrapper> + } + </LoadingAndErrorWrapper> + ) + } +} + +export default FieldXRay + + diff --git a/frontend/src/metabase/xray/containers/SegmentComparison.jsx b/frontend/src/metabase/xray/containers/SegmentComparison.jsx new file mode 100644 index 0000000000000000000000000000000000000000..53c08771f3d055e566eecd9956d677f7deb2b633 --- /dev/null +++ b/frontend/src/metabase/xray/containers/SegmentComparison.jsx @@ -0,0 +1,31 @@ +import React, { Component } from 'react' +import { connect } from 'react-redux' + +import { fetchSegmentComparison } from 'metabase/reference/reference' + +import LoadingAndErrorWrapper from 'metabase/components/LoadingAndErrorWrapper' + +const mapStateToProps = state => ({ + segmentComparison: state.reference.segmentComparison +}) + +const mapDispatchToProps = { + fetchSegmentComparison +} + +class SegmentComparison extends Component { + componentWillMount () { + const { segmentId1, segmentId2 } = this.props.params + console.log('ids', segmentId1, segmentId2) + this.props.fetchSegmentComparison(segmentId1, segmentId2) + } + render () { + return ( + <LoadingAndErrorWrapper loading={!this.props.segmentComparison}> + { JSON.stringify(this.props.segmentComparison, null, 2) } + </LoadingAndErrorWrapper> + ) + } +} + +export default connect(mapStateToProps, mapDispatchToProps)(SegmentComparison) diff --git a/frontend/src/metabase/xray/containers/SegmentXRay.jsx b/frontend/src/metabase/xray/containers/SegmentXRay.jsx new file mode 100644 index 0000000000000000000000000000000000000000..3994eec540fe727854a99b4f6bc37794d572ac55 --- /dev/null +++ b/frontend/src/metabase/xray/containers/SegmentXRay.jsx @@ -0,0 +1,134 @@ +/* @flow */ +import React, { Component } from 'react' +import { connect } from 'react-redux' +import title from 'metabase/hoc/Title' + +import { Link } from 'react-router' + +import LoadingAndErrorWrapper from 'metabase/components/LoadingAndErrorWrapper' +import { XRayPageWrapper } from 'metabase/xray/components/XRayLayout' +import { fetchSegmentFingerPrint } from 'metabase/reference/reference' + +import Icon from 'metabase/components/Icon' +import COSTS from 'metabase/xray/costs' +import CostSelect from 'metabase/xray/components/CostSelect' + +import { + getSegmentConstituents, + getSegmentFingerprint +} from 'metabase/reference/selectors' + +import Constituent from 'metabase/xray/components/Constituent' + +import type { Table } from 'metabase/meta/types/Table' +import type { Segment } from 'metabase/meta/types/Segment' + +type Props = { + fetchSegmentFingerPrint: () => void, + constituents: [], + fingerprint: { + table: Table, + segment: Segment, + }, + params: { + segmentId: number, + cost: string, + } +} + +const mapStateToProps = state => ({ + fingerprint: getSegmentFingerprint(state), + constituents: getSegmentConstituents(state) +}) + +const mapDispatchToProps = { + fetchSegmentFingerPrint +} + +@connect(mapStateToProps, mapDispatchToProps) +@title(({ fingerprint }) => fingerprint && fingerprint.segment.name || "Segment" ) +class SegmentXRay extends Component { + props: Props + + state = { + error: null + } + + componentDidMount () { + this.fetchSegmentFingerPrint() + } + + async fetchSegmentFingerPrint () { + const { params } = this.props + const cost = COSTS[params.cost] + try { + await this.props.fetchSegmentFingerPrint(params.segmentId, cost) + } catch (error) { + this.setState({ error }) + } + } + + componentDidUpdate (prevProps: Props) { + if(prevProps.params.cost !== this.props.params.cost) { + this.fetchSegmentFingerPrint() + } + } + + render () { + const { constituents, fingerprint, params } = this.props + const { error } = this.state + return ( + <XRayPageWrapper> + <LoadingAndErrorWrapper + loading={!constituents} + error={error} + noBackground + > + { () => + <div className="full"> + <div className="my4 flex align-center py2"> + <div> + <Link + className="my2 px2 text-bold text-brand-hover inline-block bordered bg-white p1 h4 no-decoration shadowed rounded" + to={`/xray/table/${fingerprint.table.id}/approximate`} + > + {fingerprint.table.display_name} + </Link> + <h1 className="mt2 flex align-center"> + {fingerprint.segment.name} + <Icon name="chevronright" className="mx1 text-grey-3" size={16} /> + <span className="text-grey-3">XRay</span> + </h1> + <p className="mt1 text-paragraph text-measure"> + {fingerprint.segment.description} + </p> + </div> + <div className="ml-auto flex align-center"> + <h3 className="mr2 text-grey-3">Fidelity</h3> + <CostSelect + currentCost={params.cost} + xrayType='segment' + id={fingerprint.segment.id} + /> + </div> + </div> + <ol> + { constituents.map(c => { + return ( + <li> + <Constituent + constituent={c} + /> + </li> + ) + })} + </ol> + </div> + } + </LoadingAndErrorWrapper> + </XRayPageWrapper> + ) + } +} + +export default SegmentXRay diff --git a/frontend/src/metabase/xray/containers/TableComparison.jsx b/frontend/src/metabase/xray/containers/TableComparison.jsx new file mode 100644 index 0000000000000000000000000000000000000000..0bfc85f737665e419310bfc253b58f63eb8913af --- /dev/null +++ b/frontend/src/metabase/xray/containers/TableComparison.jsx @@ -0,0 +1,31 @@ +import React, { Component } from 'react' +import { connect } from 'react-redux' + +import { fetchTableComparison } from 'metabase/reference/reference' + +import LoadingAndErrorWrapper from 'metabase/components/LoadingAndErrorWrapper' + +const mapStateToProps = state => ({ + tableComparison: state.reference.tableComparison +}) + +const mapDispatchToProps = { + fetchTableComparison +} + +class TableComparison extends Component { + componentWillMount () { + const { tableId1, tableId2 } = this.props.params + console.log('ids', tableId1, tableId2) + this.props.fetchTableComparison(tableId1, tableId2) + } + render () { + return ( + <LoadingAndErrorWrapper loading={!this.props.tableComparison}> + { JSON.stringify(this.props.tableComparison, null, 2) } + </LoadingAndErrorWrapper> + ) + } +} + +export default connect(mapStateToProps, mapDispatchToProps)(TableComparison) diff --git a/frontend/src/metabase/xray/containers/TableXRay.jsx b/frontend/src/metabase/xray/containers/TableXRay.jsx new file mode 100644 index 0000000000000000000000000000000000000000..41e93a58e38f2bea2cf55e037c6349b574f7f076 --- /dev/null +++ b/frontend/src/metabase/xray/containers/TableXRay.jsx @@ -0,0 +1,123 @@ +/* @flow */ +import React, { Component } from 'react' + +import { connect } from 'react-redux' +import title from 'metabase/hoc/Title' + +import { fetchTableFingerPrint } from 'metabase/reference/reference' +import { XRayPageWrapper } from 'metabase/xray/components/XRayLayout' + +import COSTS from 'metabase/xray/costs' + +import CostSelect from 'metabase/xray/components/CostSelect' +import Constituent from 'metabase/xray/components/Constituent' + +import { + getTableConstituents, + getTableFingerprint +} from 'metabase/reference/selectors' + +import Icon from 'metabase/components/Icon' +import LoadingAndErrorWrapper from 'metabase/components/LoadingAndErrorWrapper' + +import type { Table } from 'metabase/meta/types/Table' + +type Props = { + constituents: [], + fetchTableFingerPrint: () => void, + fingerprint: { + table: Table + }, + params: { + tableId: number, + cost: string + } +} + +const mapStateToProps = state => ({ + fingerprint: getTableFingerprint(state), + constituents: getTableConstituents(state) +}) + +const mapDispatchToProps = { + fetchTableFingerPrint +} + +@connect(mapStateToProps, mapDispatchToProps) +@title(({ fingerprint }) => fingerprint && fingerprint.table.display_name || "Table") +class TableXRay extends Component { + props: Props + + state = { + error: null + } + + componentDidMount () { + this.fetchTableFingerPrint() + } + + async fetchTableFingerPrint () { + const { params } = this.props + const cost = COSTS[params.cost] + try { + await this.props.fetchTableFingerPrint(params.tableId, cost) + } catch (error) { + this.setState({ error }) + } + } + + componentDidUpdate (prevProps: Props) { + if(prevProps.params.cost !== this.props.params.cost) { + this.fetchTableFingerPrint() + } + } + + render () { + const { constituents, fingerprint, params } = this.props + const { error } = this.state + + return ( + <XRayPageWrapper> + <LoadingAndErrorWrapper + loading={!constituents} + error={error} + noBackground + > + { () => + <div className="full"> + <div className="my4 flex align-center py2"> + <div> + <h1 className="mt2 flex align-center"> + {fingerprint.table.display_name} + <Icon name="chevronright" className="mx1 text-grey-3" size={16} /> + <span className="text-grey-3">XRay</span> + </h1> + <p className="m0 text-paragraph text-measure">{fingerprint.table.description}</p> + </div> + <div className="ml-auto flex align-center"> + <h3 className="mr2">Fidelity:</h3> + <CostSelect + xrayType='table' + currentCost={params.cost} + id={fingerprint.table.id} + /> + </div> + </div> + <ol> + { constituents.map((constituent, index) => + <li key={index}> + <Constituent + constituent={constituent} + /> + </li> + )} + </ol> + </div> + } + </LoadingAndErrorWrapper> + </XRayPageWrapper> + ) + } +} + +export default TableXRay diff --git a/frontend/src/metabase/xray/costs.js b/frontend/src/metabase/xray/costs.js new file mode 100644 index 0000000000000000000000000000000000000000..1bd710214e2d057a2fa6bc44634da3d626009042 --- /dev/null +++ b/frontend/src/metabase/xray/costs.js @@ -0,0 +1,49 @@ +/* Combinations of MaxQueryCost and MaxComputationCost values combined into + * human understandable groupings. + * for more info on the actual values see src/metabase/fingerprints/costs.clj + */ + +const approximate = { + display_name: "Approximate", + description: ` + Get a sense for this data by looking at a sample. + This is faster but less precise. + `, + method: { + max_query_cost: 'sample', + max_computation_cost: 'linear' + }, + icon: 'costapproximate' +} + +const exact = { + display_name: "Exact", + description: ` + Go deeper into this data by performing a full scan. + This is more precise but slower. + `, + method: { + max_query_cost: 'full-scan', + max_computation_cost: 'unbounded' + }, + icon: 'costexact' +} + +const extended = { + display_name: "Extended", + description: ` + Adds additional info about this entity by including related objects. + This is the slowest but highest fidelity method. + `, + method: { + max_query_cost: 'full-scan', + max_computation_cost: 'unbounded' + }, + icon: 'costextended' +} + +export default { + approximate, + exact, + extended +} diff --git a/frontend/src/metabase/xray/stats.js b/frontend/src/metabase/xray/stats.js new file mode 100644 index 0000000000000000000000000000000000000000..898eb67d5b38b29045936489ec7087736f430b3c --- /dev/null +++ b/frontend/src/metabase/xray/stats.js @@ -0,0 +1,34 @@ +// keys for common values interesting for most folks +export const VALUES_OVERVIEW = [ + 'min', + 'earliest', // date field min is expressed as earliest + 'max', + 'latest', // date field max is expressed as latest + 'count', + 'sum', + 'cardinality', + 'sd', + 'nil%', + 'mean', + 'median', + 'mean-median-spread' +] + +// keys for common values interesting for stat folks +export const STATS_OVERVIEW = [ + 'kurtosis', + 'skewness', + 'entropy', + 'var', + 'sum-of-square', +] + +export const ROBOTS = [ + 'cardinality-vs-count', + 'positive-definite?', + 'has-nils?', + 'all-distinct?', +] + +// periods we care about for showing periodicity +export const PERIODICITY = ['day', 'week', 'month', 'hour', 'quarter'] diff --git a/project.clj b/project.clj index 504c2d9d5979520b628a92dab9694b26c470faf4..ec42da52b8483fb2958c9d7d5aeeeaea6cb5a391 100644 --- a/project.clj +++ b/project.clj @@ -26,6 +26,7 @@ org.clojure/clojurescript]] ; fixed length queue implementation, used in log buffering [amalloy/ring-gzip-middleware "0.1.3"] ; Ring middleware to GZIP responses if client can handle it [aleph "0.4.3"] ; Async HTTP library; WebSockets + [bigml/histogram "4.1.3"] ; Streaming one-pass Histogram data structure [buddy/buddy-core "1.2.0"] ; various cryptograhpic functions [buddy/buddy-sign "1.5.0"] ; JSON Web Tokens; High-Level message signing library [cheshire "5.7.0"] ; fast JSON encoding (used by Ring JSON middleware) @@ -58,6 +59,7 @@ [environ "1.1.0"] ; easy environment management [hiccup "1.0.5"] ; HTML templating [honeysql "0.8.2"] ; Transform Clojure data structures to SQL + [kixi/stats "0.3.8"] ; Various statistic measures implemented as transducers [log4j/log4j "1.2.17" ; logging framework :exclusions [javax.mail/mail javax.jms/jms @@ -69,6 +71,7 @@ [net.sf.cssbox/cssbox "4.12" ; HTML / CSS rendering :exclusions [org.slf4j/slf4j-api]] [net.sourceforge.jtds/jtds "1.3.1"] ; Open Source SQL Server driver + [com.clearspring.analytics/stream "2.9.5"] ; Various sketching algorithms [org.clojars.pntblnk/clj-ldap "0.0.12"] ; LDAP client [org.liquibase/liquibase-core "3.5.3"] ; migration management (Java lib) [org.slf4j/slf4j-log4j12 "1.7.25"] ; abstraction for logging frameworks -- allows end user to plug in desired logging framework at deployment time @@ -77,10 +80,12 @@ [postgresql "9.3-1102.jdbc41"] ; Postgres driver [io.crate/crate-jdbc "2.1.6"] ; Crate JDBC driver [prismatic/schema "1.1.5"] ; Data schema declaration and validation library + [redux "0.1.4"] ; Utility functions for building and composing transducers [ring/ring-core "1.6.0"] [ring/ring-jetty-adapter "1.6.0"] ; Ring adapter using Jetty webserver (used to run a Ring server for unit tests) [ring/ring-json "0.4.0"] ; Ring middleware for reading/writing JSON automatically [stencil "0.5.0"] ; Mustache templates for Clojure + [tide "0.1.0-SNAPSHOT"] ; Various algorithms for working with timeseries [toucan "1.0.3" ; Model layer, hydration, and DB utilities :exclusions [honeysql]]] :repositories [["bintray" "https://dl.bintray.com/crate/crate"]] ; Repo for Crate JDBC driver diff --git a/src/metabase/api/fingerprint.clj b/src/metabase/api/fingerprint.clj new file mode 100644 index 0000000000000000000000000000000000000000..70c28be3599523af761886770021605bc875cd5c --- /dev/null +++ b/src/metabase/api/fingerprint.clj @@ -0,0 +1,134 @@ +(ns metabase.api.fingerprint + (:require [compojure.core :refer [GET]] + [metabase.api.common :as api] + [metabase.fingerprinting.core :as f] + [metabase.models + [card :refer [Card]] + [field :refer [Field]] + [metric :refer [Metric]] + [segment :refer [Segment]] + [table :refer [Table]]] + [schema.core :as s])) + +;; See metabase.fingerprinting.core/fingerprint for description of these settings. +(def ^:private ^:const MaxQueryCost + (s/maybe (s/enum "cache" + "sample" + "full-scan" + "joins"))) + +(def ^:private ^:const MaxComputationCost + (s/maybe (s/enum "linear" + "unbounded" + "yolo"))) + +(def ^:private ^:const Scale + (s/maybe (s/enum "month" + "week" + "day"))) + +(defn- max-cost + [query computation] + {:query (keyword query) + :computation (keyword computation)}) + +(api/defendpoint GET "/field/:id" + "Get fingerprint for a `Field` with ID." + [id max_query_cost max_computation_cost] + {max_query_cost MaxQueryCost + max_computation_cost MaxComputationCost} + (->> id + (api/read-check Field) + (f/fingerprint {:max-cost (max-cost max_query_cost + max_computation_cost)}) + f/x-ray)) + +(api/defendpoint GET "/table/:id" + "Get fingerprint for a `Tield` with ID." + [id max_query_cost max_computation_cost] + {max_query_cost MaxQueryCost + max_computation_cost MaxComputationCost} + (->> id + (api/read-check Table) + (f/fingerprint {:max-cost (max-cost max_query_cost + max_computation_cost)}) + f/x-ray)) + +(api/defendpoint GET "/segment/:id" + "Get fingerprint for a `Segment` with ID." + [id max_query_cost max_computation_cost] + {max_query_cost MaxQueryCost + max_computation_cost MaxComputationCost} + (->> id + (api/read-check Segment) + (f/fingerprint {:max-cost (max-cost max_query_cost + max_computation_cost)}) + f/x-ray)) + +(api/defendpoint GET "/card/:id" + "Get fingerprint for a `Card` with ID." + [id max_query_cost max_computation_cost] + {max_query_cost MaxQueryCost + max_computation_cost MaxComputationCost} + (->> id + (api/read-check Card) + (f/fingerprint {:max-cost (max-cost max_query_cost + max_computation_cost)}) + f/x-ray)) + +(api/defendpoint GET "/compare/fields/:id1/:id2" + "Get comparison fingerprints for `Field`s with ID1 and ID2." + [id1 id2 max_query_cost max_computation_cost] + {max_query_cost MaxQueryCost + max_computation_cost MaxComputationCost} + (->> [id1 id2] + (map (partial api/read-check Field)) + (apply f/compare-fingerprints + {:max-cost (max-cost max_query_cost max_computation_cost)}) + f/x-ray)) + +(api/defendpoint GET "/compare/tables/:id1/:id2" + "Get comparison fingerprints for `Table`s with ID1 and ID2." + [id1 id2 max_query_cost max_computation_cost] + {max_query_cost MaxQueryCost + max_computation_cost MaxComputationCost} + (->> [id1 id2] + (map (partial api/read-check Table)) + (apply f/compare-fingerprints + {:max-cost (max-cost max_query_cost max_computation_cost)}) + f/x-ray)) + +(api/defendpoint GET "/compare/cards/:id1/:id2" + "Get comparison fingerprints for `Card`s with ID1 and ID2." + [id1 id2 max_query_cost max_computation_cost] + {max_query_cost MaxQueryCost + max_computation_cost MaxComputationCost} + (->> [id1 id2] + (map (partial api/read-check Card)) + (apply f/compare-fingerprints + {:max-cost (max-cost max_query_cost max_computation_cost)}) + f/x-ray)) + +(api/defendpoint GET "/compare/segments/:id1/:id2" + "Get comparison fingerprints for `Segment`s with ID1 and ID2." + [id1 id2 max_query_cost max_computation_cost] + {max_query_cost MaxQueryCost + max_computation_cost MaxComputationCost} + (->> [id1 id2] + (map (partial api/read-check Segment)) + (apply f/compare-fingerprints + {:max-cost (max-cost max_query_cost max_computation_cost)}) + f/x-ray)) + +(api/defendpoint GET "/compare/segment/:sid/table/:tid" + "Compare `Segment` with `Table`." + [sid tid max_query_cost max_computation_cost] + {max_query_cost MaxQueryCost + max_computation_cost MaxComputationCost} + (f/x-ray + (f/compare-fingerprints + {:max-cost (max-cost max_query_cost max_computation_cost)} + (api/read-check Segment sid) + (api/read-check Table tid)))) + +(api/define-routes) diff --git a/src/metabase/api/routes.clj b/src/metabase/api/routes.clj index 90b1b20751da34b986d2c1ab3526e4239a6eb504..216991dff13b601f91a6c2cc7846088595a990b7 100644 --- a/src/metabase/api/routes.clj +++ b/src/metabase/api/routes.clj @@ -12,6 +12,7 @@ [email :as email] [embed :as embed] [field :as field] + [fingerprint :as fingerprint] [geojson :as geojson] [getting-started :as getting-started] [label :as label] @@ -61,6 +62,8 @@ (context "/email" [] (+auth email/routes)) (context "/embed" [] (+message-only-exceptions embed/routes)) (context "/field" [] (+auth field/routes)) + ;; TODO - fingerprint and comparison should be split out? + (context "/fingerprint" [] (+auth fingerprint/routes)) (context "/getting_started" [] (+auth getting-started/routes)) (context "/geojson" [] (+auth geojson/routes)) (context "/label" [] (+auth label/routes)) diff --git a/src/metabase/db/metadata_queries.clj b/src/metabase/db/metadata_queries.clj index e4ca1943f0e25b814d331fb39adac28bd54cfe7a..a4773b26a542e7903df58acb75432724a1cde9f2 100644 --- a/src/metabase/db/metadata_queries.clj +++ b/src/metabase/db/metadata_queries.clj @@ -22,6 +22,7 @@ (defn- field-query [{table-id :table_id} query] {:pre [(integer? table-id)]} (qp-query (db/select-one-field :db_id Table, :id table-id) + ;; this seeming useless `merge` statement IS in fact doing something important. `ql/query` is a threading macro for building queries. Do not remove (ql/query (merge query) (ql/source-table table-id)))) @@ -64,3 +65,30 @@ [field] (-> (field-query field (ql/aggregation {} (ql/count (ql/field-id (u/get-id field))))) first first int)) + +(defn db-id + "Fetch the database ID of a given entity." + [x] + (db/select-one-field :db_id 'Table :id (:table_id x))) + +(defn field-values + "Return all the values of FIELD for QUERY." + [field query] + (->> (qp/process-query + {:type :query + :database (db-id field) + :query (merge {:fields [[:field-id (:id field)]] + :source-table (:table_id field)} + query)}) + :data + :rows + (map first))) + +(defn query-values + "Return all values for QUERY." + [db-id query] + (-> (qp/process-query + {:type :query + :database db-id + :query query}) + :data)) diff --git a/src/metabase/fingerprinting/comparison.clj b/src/metabase/fingerprinting/comparison.clj new file mode 100644 index 0000000000000000000000000000000000000000..9515e76524ecd6420b1e102faabbcf66fbf372de --- /dev/null +++ b/src/metabase/fingerprinting/comparison.clj @@ -0,0 +1,117 @@ +(ns metabase.fingerprinting.comparison + "Fingerprint similarity comparison." + (:require [clojure.set :as set] + [kixi.stats.math :as math] + [metabase.fingerprinting + [fingerprinters :as fingerprinters] + [histogram :as h]] + [redux.core :as redux]) + (:import com.bigml.histogram.Histogram)) + +(def magnitude + "Transducer that claclulates magnitude (Euclidean norm) of given vector. + https://en.wikipedia.org/wiki/Euclidean_distance" + (redux/post-complete (redux/pre-step + math/sq) math/sqrt)) + +(defn cosine-distance + "Cosine distance between vectors `a` and `b`. + https://en.wikipedia.org/wiki/Cosine_similarity" + [a b] + (transduce identity + (redux/post-complete + (redux/fuse {:magnitude-a (redux/pre-step magnitude first) + :magnitude-b (redux/pre-step magnitude second) + :product (redux/pre-step + (partial apply *))}) + (fn [{:keys [magnitude-a magnitude-b product]}] + (- 1 (/ product magnitude-a magnitude-b)))) + (map vector a b))) + +(defmulti + ^{:doc "Difference between two features. + Confined to [0, 1] with 0 being same, and 1 orthogonal." + :arglists '([a v])} + difference #(mapv type %&)) + +(defmethod difference [Number Number] + [a b] + (cond + (every? zero? [a b]) 0 + (zero? (max a b)) 1 + :else (/ (- (max a b) (min a b)) + (max a b)))) + +(defmethod difference [Boolean Boolean] + [a b] + (if (= a b) 0 1)) + +(defmethod difference [clojure.lang.Sequential clojure.lang.Sequential] + [a b] + (* 0.5 (cosine-distance a b))) + +(defn chi-squared-distance + "Chi-squared distane between empirical probability distributions `p` and `q`. + https://stats.stackexchange.com/questions/184101/comparing-two-histograms-using-chi-square-distance" + [p q] + (reduce + (map (fn [pi qi] + (if (zero? (+ pi qi)) + 0 + (/ (math/sq (- pi qi)) + (+ pi qi)))) + p q))) + +(defn- unify-categories + "Given two PMFs add missing categories and align them so they both cover the + same set of categories." + [pmf-a pmf-b] + (let [categories-a (into #{} (map first) pmf-a) + categories-b (into #{} (map first) pmf-b)] + [(->> (set/difference categories-a categories-b) + (map #(vector % 0)) + (concat pmf-a) + (sort-by first)) + (->> (set/difference categories-b categories-a) + (map #(vector % 0)) + (concat pmf-b) + (sort-by first))])) + +(defmethod difference [Histogram Histogram] + [a b] + (let [[pdf-a pdf-b] (if (h/categorical? a) + (unify-categories (h/pdf a) (h/pdf b)) + (map h/pdf [a b]))] + ;; We are only interested in the shape, hence scale-free comparison + (chi-squared-distance (map second pdf-a) (map second pdf-b)))) + +(defn- flatten-map + ([m] (flatten-map nil m)) + ([prefix m] + (into {} + (mapcat (fn [[k v]] + (let [k (keyword (some-> prefix str (subs 1)) (name k))] + (if (map? v) + (flatten-map k v) + [[k v]])))) + m))) + +(defn pairwise-differences + "Pairwise differences of (feature) vectors `a` and `b`." + [a b] + (into {} + (map (fn [[k a] [_ b]] + [k (difference a b)]) + (flatten-map (fingerprinters/comparison-vector a)) + (flatten-map (fingerprinters/comparison-vector b))))) + +(def ^:private ^:const ^Double interestingness-thershold 0.2) + +(defn fingerprint-distance + "Distance metric between fingerprints `a` and `b`." + [a b] + (let [differences (pairwise-differences a b)] + {:distance (transduce (map val) + (redux/post-complete + magnitude + #(/ % (math/sqrt (count differences)))) + differences) + :components (sort-by val > differences) + :thereshold interestingness-thershold})) diff --git a/src/metabase/fingerprinting/core.clj b/src/metabase/fingerprinting/core.clj new file mode 100644 index 0000000000000000000000000000000000000000..c13ad250739a3ce7f2bd29e78550a305afd29419 --- /dev/null +++ b/src/metabase/fingerprinting/core.clj @@ -0,0 +1,135 @@ +(ns metabase.fingerprinting.core + "Fingerprinting (feature extraction) for various models." + (:require [clojure.walk :refer [postwalk]] + [metabase.db.metadata-queries :as metadata] + [metabase.fingerprinting + [comparison :as comparison] + [costs :as costs] + [fingerprinters :as f] + [feature-descriptions :refer [add-descriptions]]] + [medley.core :as m] + [metabase.models + [card :refer [Card]] + [field :refer [Field]] + [metric :refer [Metric]] + [segment :refer [Segment]] + [table :refer [Table]]] + [metabase.util :as u] + [redux.core :as redux])) + +(defn- fingerprint-field + "Transduce given column with corresponding fingerprinter." + [opts field data] + (transduce identity (f/fingerprinter opts field) data)) + +(defn- fingerprint-query + "Transuce each column in given dataset with corresponding fingerprinter." + [opts {:keys [rows cols]}] + (transduce identity + (->> cols + (remove :remapped_to) + (map-indexed (fn [i field] + (redux/pre-step (f/fingerprinter opts field) + #(nth % i)))) + (apply redux/juxt)) + rows)) + +(defmulti + ^{:doc "Given a model, fetch corresponding dataset and compute its fingerprint. + + Takes a map of options as first argument. Recognized options: + * `:max-cost` a map with keys `:computation` and `:query` which + limits maximal resource expenditure when computing + the fingerprint. + See `metabase.fingerprinting.costs` for details." + :arglists '([opts field])} + fingerprint #(type %2)) + +(def ^:private ^:const ^Long max-sample-size 10000) + +(defn- extract-query-opts + [{:keys [max-cost]}] + (cond-> {} + (costs/sample-only? max-cost) (assoc :limit max-sample-size))) + +(defmethod fingerprint (type Field) + [opts field] + {:fingerprint (->> (metadata/field-values field (extract-query-opts opts)) + (fingerprint-field opts field) + (merge {:table (Table (:table_id field))}))}) + +(defmethod fingerprint (type Table) + [opts table] + {:constituents (fingerprint-query opts (metadata/query-values + (:db_id table) + (merge (extract-query-opts opts) + {:source-table (:id table)}))) + :fingerprint {:table table}}) + +(defmethod fingerprint (type Card) + [opts card] + (let [resolution (let [[head _ resolution] (-> card + :dataset_query + :query + :breakout + first)] + (when (= head :datetime-field) + resolution)) + query (-> card :dataset_query :query) + {:keys [rows cols]} (->> query + (merge (extract-query-opts opts)) + (metadata/query-values (:database_id card))) + {:keys [breakout aggregation]} (group-by :source cols) + fields [(first breakout) + (or (first aggregation) (second breakout))]] + {:constituents [(fingerprint-field opts (first fields) (map first rows)) + (fingerprint-field opts (second fields) (map second rows))] + :fingerprint (merge + (fingerprint-field (assoc opts :resolution resolution) + fields rows) + {:card card + :table (Table (:table_id card))})})) + +(defmethod fingerprint (type Segment) + [opts segment] + {:constituents (fingerprint-query opts (metadata/query-values + (metadata/db-id segment) + (merge (extract-query-opts opts) + (:definition segment)))) + :fingerprint {:table (Table (:table_id segment)) + :segment segment}}) + +(defmethod fingerprint (type Metric) + [_ metric] + {:metric metric}) + +(defn compare-fingerprints + "Compare fingerprints of two models." + [opts a b] + (let [[a b] (map (partial fingerprint opts) [a b])] + {:constituents [a b] + :comparison (into {} + (map (fn [[k a] [_ b]] + [k (if (sequential? a) + (map comparison/fingerprint-distance a b) + (comparison/fingerprint-distance a b))]) + a b))})) + +(defn- trim-decimals + [decimal-places fingerprint] + (postwalk + (fn [x] + (if (float? x) + (u/round-to-decimals (+ (- (min (long (f/order-of-magnitude x)) 0)) + decimal-places) + x) + x)) + fingerprint)) + +(defn x-ray + "Turn the fingerprint structure into an x-ray." + [fingerprint] + (let [x-ray (comp add-descriptions (partial trim-decimals 2) f/x-ray)] + (-> fingerprint + (update :fingerprint x-ray) + (update :constituents (partial map x-ray))))) diff --git a/src/metabase/fingerprinting/costs.clj b/src/metabase/fingerprinting/costs.clj new file mode 100644 index 0000000000000000000000000000000000000000..28a2c15d150c7882ba1fb9a980f80d679127dcb9 --- /dev/null +++ b/src/metabase/fingerprinting/costs.clj @@ -0,0 +1,38 @@ +(ns metabase.fingerprinting.costs + "Predicates for limiting resource expanditure during fingerprinting." + (:require [schema.core :as s])) + +(def MaxCost + "Schema for max-cost parameter." + {:computation (s/enum :linear :unbounded :yolo) + :query (s/enum :cache :sample :full-scan :joins)}) + +(def ^{:arglists '([max-cost])} linear-computation? + "Limit computation to O(n) or better." + (comp #{:linear} :computation)) + +(def ^{:arglists '([max-cost])} unbounded-computation? + "Alow unbounded but always convergent computation. + Default if no cost limit is specified." + (comp (partial contains? #{:unbounded :yolo nil}) :computation)) + +(def ^{:arglists '([max-cost])} yolo-computation? + "Alow any computation including full blown machine learning." + (comp #{:yolo} :computation)) + +(def ^{:arglists '([max-cost])} cache-only? + "Use cached data only." + (comp #{:cache} :query)) + +(def ^{:arglists '([max-cost])} sample-only? + "Only sample data." + (comp #{:sample} :query)) + +(def ^{:arglists '([max-cost])} full-scan? + "Alow full table scans. + Default if no cost limit is specified." + (comp (partial contains? #{:full-scan :joins nil}) :query)) + +(def ^{:arglists '([max-cost])} alow-joins? + "Alow bringing in data from other tables if needed." + (comp #{:joins} :query)) diff --git a/src/metabase/fingerprinting/feature_descriptions.clj b/src/metabase/fingerprinting/feature_descriptions.clj new file mode 100644 index 0000000000000000000000000000000000000000..fd6552fb44e72b92ccb83f5c72cd57d8b6e35cd4 --- /dev/null +++ b/src/metabase/fingerprinting/feature_descriptions.clj @@ -0,0 +1,76 @@ +(ns metabase.fingerprinting.feature-descriptions + "Desciptions of all the fingerprint features exposed as x-rays." + (:require [medley.core :as m])) + +(def ^:private descriptions + {:histogram {:label "Distribution" + :description "Distribution of values." + :link "https://en.wikipedia.org/wiki/Probability_mass_function"} + :percentiles {:label "Percentiles" + :link "https://en.wikipedia.org/wiki/Percentile"} + :sum {:label "Sum" + :description "Sum of all values."} + :sum-of-squares {:label "Sum of squares" + :description "Sum of squares of all values."} + :%>mean {:label "Share of values greater than mean."} + :cv {:label "Coefficient of variation" + :description "Ratio between mean and standard deviation. Used as a dispersion measure." + :link "https://en.wikipedia.org/wiki/Coefficient_of_variation"} + :range-vs-sd {:label "Ratio between standard deviation and range of values."} + :mean-median-spread {:label "Relative mean-median spread" + :description "The lower the ratio, the more symmetric the distribution."} + :range {:label "Range" + :description "Range between the smallest and the largest value."} + :cardinality {:label "Cardinality" + :description "Number of different values."} + :min {:label "Minimal value"} + :max {:label "Maximal value"} + :mean {:label "Mean" + :description "Mean (expected) value." + :link "https://en.wikipedia.org/wiki/Mean"} + :median {:label "Median" + :description "Value seperating the data set in two equal halfs -- the \"middle\" value." + :link "https://en.wikipedia.org/wiki/Median"} + :var {:label "Variance" + :description "Measure of how far the values are spread from the mean." + :link "https://en.wikipedia.org/wiki/Variance"} + :sd {:label "Standard deviation" + :description "Measure of how far the values are spread from the mean." + :link "https://en.wikipedia.org/wiki/Standard_deviation"} + :count {:label "Count" + :description "Number of rows in the dataset." + } + :kurtosis {:label "Kurtosis" + :description "Descriptor of the shape of the distribution. Measures tail extremity (outliers)" + :link "https://en.wikipedia.org/wiki/Kurtosis"} + :skewness {:label "Skewness" + :description "Measure of asymmetry of the distribution." + :link "https://en.wikipedia.org/wiki/Skewness"} + :entropy {:label "Entropy" + :description "Measure of unpredictability of the state (ie. of its average information content)." + :link "https://en.wikipedia.org/wiki/Entropy_(information_theory)"} + :linear-regression {:label "Linear regression" + :description "Slope and intercept of a linear function fit to data." + :link "https://en.wikipedia.org/wiki/Linear_regression"} + :correlation {:label "Correlation" + :description "The quality of a least squares fitting -- the extent to which two variables have a linear relationship with each other." + :link "http://mathworld.wolfram.com/CorrelationCoefficient.html"} + :covariance {:label "Covariance" + :description "A measure of the joint variability." + :link "https://en.wikipedia.org/wiki/Covariance"} + :seasonal-decomposition {:label "Seasonal decomposition" + :description "Decomposes time series into seasonal, trend, and residual components." + :link "http://www.stat.washington.edu/courses/stat527/s13/readings/Cleveland_JASA_1979.pdf"} + :earliest {:label "The earliest value"} + :latest {:label "The latest value"} + :histogram-hour {:label "Distribution of hours in a day"} + :histogram-day {:label "Distribution of days of week"} + :histogram-month {:label "Distribution of months"} + :histogram-quarter {:label "Distribution of quarters"}}) + +(def ^{:arglists '([fingerprint])} add-descriptions + "Add descriptions of features to naked values where applicable." + (partial m/map-kv (fn [k v] + (if-let [description (descriptions k)] + [k (assoc description :value v)] + [k v])))) diff --git a/src/metabase/fingerprinting/fingerprinters.clj b/src/metabase/fingerprinting/fingerprinters.clj new file mode 100644 index 0000000000000000000000000000000000000000..c3990bcd4b6174e347a9c74c983b32997695d6ff --- /dev/null +++ b/src/metabase/fingerprinting/fingerprinters.clj @@ -0,0 +1,554 @@ +(ns metabase.fingerprinting.fingerprinters + "Fingerprinting (feature extraction) for various models." + (:require [bigml.histogram.core :as h.impl] + [clojure.math.numeric-tower :refer [ceil expt floor round]] ;;;;;; temp! + [clj-time + [coerce :as t.coerce] + [core :as t] + [format :as t.format] + [periodic :as t.periodic]] + [kixi.stats + [core :as stats] + [math :as math]] + [medley.core :as m] + [metabase.fingerprinting + [histogram :as h] + [costs :as costs]] + [metabase.util :as u] ;;;; temp! + [redux.core :as redux] + [tide.core :as tide]) + (:import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus)) + +(def ^:private ^:const percentiles (range 0 1 0.1)) + +(defn rollup + "Transducer that groups by `groupfn` and reduces each group with `f`. + Note the contructor airity of `f` needs to be free of side effects." + [f groupfn] + (let [init (f)] + (fn + ([] (transient {})) + ([acc] + (into {} + (map (fn [[k v]] + [k (f v)])) + (persistent! acc))) + ([acc x] + (let [k (groupfn x)] + (assoc! acc k (f (get acc k init) x))))))) + +(defn safe-divide + "Like `clojure.core//`, but returns nil if denominator is 0." + [x & denominators] + (when (or (and (not-empty denominators) (not-any? zero? denominators)) + (and (not (zero? x)) (empty? denominators))) + (apply / x denominators))) + +(defn growth + "Relative difference between `x1` an `x2`." + [x2 x1] + (when (every? some? [x2 x1]) + (safe-divide (* (if (neg? x1) -1 1) (- x2 x1)) x1))) + +(def ^:private ^:const ^Double cardinality-error 0.01) + +(defn cardinality + "Transducer that sketches cardinality using HyperLogLog++. + https://research.google.com/pubs/pub40671.html" + ([] (HyperLogLogPlus. 14 25)) + ([^HyperLogLogPlus acc] (.cardinality acc)) + ([^HyperLogLogPlus acc x] + (.offer acc x) + acc)) + +(def ^:private Num [:type/Number :type/*]) +(def ^:private DateTime [:type/DateTime :type/*]) +(def ^:private Category [:type/* :type/Category]) +; (def ^:private Any [:type/* :type/*]) +(def ^:private Text [:type/Text :type/*]) + +;;;;;;;;;;;;;;;;;; temporary cp until we merge the binning branch ;;;;;;;;;; + + +(defn- calculate-bin-width [min-value max-value num-bins] + (u/round-to-decimals 5 (/ (- max-value min-value) + num-bins))) + +(defn- calculate-num-bins [min-value max-value bin-width] + (long (ceil (/ (- max-value min-value) + bin-width)))) + +(defn- ceil-to + [precision x] + (let [scale (/ precision)] + (/ (ceil (* x scale)) scale))) + +(defn- floor-to + [precision x] + (let [scale (/ precision)] + (/ (floor (* x scale)) scale))) + +;;;;;;;; cast to long +(defn order-of-magnitude + "Return oder of magnitude." + [x] + (if (zero? x) + 0 + (long (floor (/ (math/log (math/abs x)) (math/log 10)))))) + +(def ^:private ^:const pleasing-numbers [1 1.25 2 2.5 3 5 7.5 10]) + +(defn- nicer-bin-width + [min-value max-value num-bins] + (let [min-bin-width (calculate-bin-width min-value max-value num-bins) + scale (expt 10 (order-of-magnitude min-bin-width))] + (->> pleasing-numbers + (map (partial * scale)) + (drop-while (partial > min-bin-width)) + first))) + +(defn- nicer-bounds + [min-value max-value bin-width] + [(floor-to bin-width min-value) (ceil-to bin-width max-value)]) + +(def ^:private ^:const max-steps 10) + +(defn- fixed-point + [f] + (fn [x] + (->> (iterate f x) + (partition 2 1) + (take max-steps) + (drop-while (partial apply not=)) + ffirst))) + +(def ^:private ^{:arglists '([binned-field])} nicer-breakout + (fixed-point + (fn + [{:keys [min-value max-value bin-width num-bins strategy] :as binned-field}] + (let [bin-width (if (= strategy :num-bins) + (nicer-bin-width min-value max-value num-bins) + bin-width) + [min-value max-value] (nicer-bounds min-value max-value bin-width)] + (-> binned-field + (assoc :min-value min-value + :max-value max-value + :num-bins (if (= strategy :num-bins) + num-bins + (calculate-num-bins min-value max-value bin-width)) + :bin-width bin-width)))))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(defn- equidistant-bins + [histogram] + (if (h/categorical? histogram) + (-> histogram h.impl/bins first :target :counts) + (let [{:keys [min max]} (h.impl/bounds histogram)] + (cond + (nil? min) [] + (= min max) [[min 1.0]] + :else (let [{:keys [min-value num-bins bin-width]} + (nicer-breakout + {:min-value min + :max-value max + :num-bins (->> histogram + h/optimal-bin-width + (calculate-num-bins min max)) + :strategy :num-bins})] + (->> min-value + (iterate (partial + bin-width)) + (take (inc num-bins)) + (map (fn [x] + [x (h.impl/sum histogram x)])) + (partition 2 1) + (map (fn [[[x s1] [_ s2]]] + [x (- s2 s1)])))))))) + +(defn- histogram->dataset + ([field histogram] (histogram->dataset identity field histogram)) + ([keyfn field histogram] + {:rows (let [norm (safe-divide (h.impl/total-count histogram))] + (for [[k v] (equidistant-bins histogram)] + [(keyfn k) (* v norm)])) + :columns [(:name field) "SHARE"] + :cols [(dissoc field :remapped_from) + {:name "SHARE" + :display_name "Share" + :description "Share of corresponding bin in the overall population." + :base_type :type/Float}]})) + +(defn- field-type + [field] + (if (sequential? field) + (mapv field-type field) + [(:base_type field) (or (:special_type field) :type/*)])) + +(defmulti + ^{:doc "Transducer that summarizes (_fingerprints_) given coll. What features + are extracted depends on the type of corresponding `Field`(s), amount + of data points available (some algorithms have a minimum data points + requirement) and `max-cost.computation` setting. + Note we are heavily using data sketches so some summary values may be + approximate." + :arglists '([opts field])} + fingerprinter #(field-type %2)) + +(defmulti + ^{:doc "Make fingerprint human readable." + :arglists '([fingerprint])} + x-ray :type) + +(defmethod x-ray :default + [fingerprint] + fingerprint) + +(defmulti + ^{:doc "Fingerprint feature vector for comparison/difference purposes." + :arglists '([fingerprint])} + comparison-vector :type) + +(defmethod comparison-vector :default + [fingerprint] + (dissoc fingerprint :type :field :has-nils?)) + +(defmethod fingerprinter Num + [{:keys [max-cost]} field] + (redux/post-complete + (redux/fuse {:histogram h/histogram + :cardinality cardinality + :kurtosis stats/kurtosis + :skewness stats/skewness + :sum (redux/with-xform + (remove nil?)) + :sum-of-squares (redux/with-xform + (comp (remove nil?) + (map math/sq)))}) + (fn [{:keys [histogram cardinality kurtosis skewness sum sum-of-squares]}] + (if (pos? (h/total-count histogram)) + (let [nil-count (h/nil-count histogram) + total-count (h/total-count histogram) + uniqueness (/ cardinality (max total-count 1)) + var (or (h.impl/variance histogram) 0) + sd (math/sqrt var) + min (h.impl/minimum histogram) + max (h.impl/maximum histogram) + mean (h.impl/mean histogram) + median (h.impl/median histogram) + range (- max min)] + (merge + {:histogram histogram + :percentiles (apply h.impl/percentiles histogram percentiles) + :positive-definite? (>= min 0) + :%>mean (- 1 ((h.impl/cdf histogram) mean)) + :uniqueness uniqueness + :var>sd? (> var sd) + :nil% (/ nil-count (clojure.core/max total-count 1)) + :has-nils? (pos? nil-count) + :0<=x<=1? (<= 0 min max 1) + :-1<=x<=1? (<= -1 min max 1) + :cv (safe-divide sd mean) + :range-vs-sd (safe-divide sd range) + :mean-median-spread (safe-divide (- mean median) range) + :min-vs-max (safe-divide min max) + :range range + :cardinality cardinality + :min min + :max max + :mean mean + :median median + :var var + :sd sd + :count total-count + :kurtosis kurtosis + :skewness skewness + :all-distinct? (>= uniqueness (- 1 cardinality-error)) + :entropy (h/entropy histogram) + :type Num + :field field} + (when (costs/full-scan? max-cost) + {:sum sum + :sum-of-squares sum-of-squares}))) + {:count 0 + :type Num + :field field})))) + +(defmethod comparison-vector Num + [fingerprint] + (select-keys fingerprint + [:histogram :mean :median :min :max :sd :count :kurtosis + :skewness :entropy :nil% :uniqueness :range :min-vs-max])) + +(defmethod x-ray Num + [{:keys [field count] :as fingerprint}] + (if (pos? count) + (-> fingerprint + (update :histogram (partial histogram->dataset field)) + (dissoc :has-nils? :var>sd? :0<=x<=1? :-1<=x<=1? :all-distinct? + :positive-definite? :var>sd? :uniqueness :min-vs-max)) + fingerprint)) + +(defmethod fingerprinter [Num Num] + [_ field] + (redux/post-complete + (redux/fuse {:linear-regression (stats/simple-linear-regression first second) + :correlation (stats/correlation first second) + :covariance (stats/covariance first second)}) + #(assoc % :type [Num Num] + :field field))) + +(def ^:private ^{:arglists '([t])} to-double + "Coerce `DateTime` to `Double`." + (comp double t.coerce/to-long)) + +(def ^:private ^{:arglists '([t])} from-double + "Coerce `Double` into a `DateTime`." + (comp t.coerce/from-long long)) + +(defn- fill-timeseries + "Given a coll of `[DateTime, Any]` pairs with periodicty `step` fill missing + periods with 0." + [step ts] + (let [ts-index (into {} ts)] + (into [] + (comp (map to-double) + (take-while (partial >= (-> ts last first))) + (map (fn [t] + [t (ts-index t 0)]))) + (some-> ts + ffirst + from-double + (t.periodic/periodic-seq step))))) + +(defn- decompose-timeseries + "Decompose given timeseries with expected periodicty `period` into trend, + seasonal component, and reminder. + `period` can be one of `:day`, `week`, or `:month`." + [period ts] + (let [period (case period + :month 12 + :week 52 + :day 365)] + (when (>= (count ts) (* 2 period)) + (select-keys (tide/decompose period ts) [:trend :seasonal :reminder])))) + +(defmethod fingerprinter [DateTime Num] + [{:keys [max-cost resolution query]} field] + (redux/post-complete + (redux/pre-step + (redux/fuse {:linear-regression (stats/simple-linear-regression first second) + :series (if (nil? resolution) + conj + (redux/post-complete + conj + (partial fill-timeseries + (case resolution + :month (t/months 1) + :week (t/weeks 1) + :day (t/days 1)))))}) + (fn [[x y]] + [(-> x t.format/parse to-double) y])) + (fn [{:keys [series linear-regression]}] + (let [ys-r (->> series (map second) reverse not-empty)] + (merge {:resolution resolution + :type [DateTime Num] + :field field + :series series + :linear-regression linear-regression + :seasonal-decomposition + (when (and resolution + (costs/unbounded-computation? max-cost)) + (decompose-timeseries resolution series))} + (when (costs/alow-joins? series) + {:YoY 0 + :MoM 0 + :WoW 0 + :DoD 0})))))) + +(defmethod comparison-vector [DateTime Num] + [fingerprint] + (dissoc fingerprint :type :resolution :field)) + +(defmethod x-ray [DateTime Num] + [fingerprint] + (dissoc fingerprint :series)) + +;; This one needs way more thinking +;; +;; (defmethod fingerprinter [Category Any] +;; [opts [x y]] +;; (rollup (redux/pre-step (fingerprinter opts y) second) first)) + +(defmethod fingerprinter Text + [_ field] + (redux/post-complete + (redux/fuse {:histogram (redux/pre-step + h/histogram + (stats/somef (comp count u/jdbc-clob->str)))}) + (fn [{:keys [histogram]}] + (let [nil-count (h/nil-count histogram) + total-count (h/total-count histogram)] + {:min (h.impl/minimum histogram) + :max (h.impl/maximum histogram) + :histogram histogram + :count total-count + :nil% (/ nil-count (max total-count 1)) + :has-nils? (pos? nil-count) + :type Text + :field field})))) + +(defmethod x-ray Text + [{:keys [field] :as fingerprint}] + (update fingerprint :histogram (partial histogram->dataset field))) + +(defn- quarter + [dt] + (-> dt t/month (/ 3) Math/ceil long)) + +(defmethod fingerprinter DateTime + [_ field] + (redux/post-complete + (redux/pre-step + (redux/fuse {:histogram (redux/pre-step h/histogram t.coerce/to-long) + :histogram-hour (redux/pre-step h/histogram-categorical + (stats/somef t/hour)) + :histogram-day (redux/pre-step h/histogram-categorical + (stats/somef t/day-of-week)) + :histogram-month (redux/pre-step h/histogram-categorical + (stats/somef t/month)) + :histogram-quarter (redux/pre-step h/histogram-categorical + (stats/somef quarter))}) + t.format/parse) + (fn [{:keys [histogram histogram-hour histogram-day histogram-month + histogram-quarter]}] + (let [nil-count (h/nil-count histogram) + total-count (h/total-count histogram)] + {:earliest (h.impl/minimum histogram) + :latest (h.impl/maximum histogram) + :histogram histogram + :percentiles (apply h.impl/percentiles histogram percentiles) + :histogram-hour histogram-hour + :histogram-day histogram-day + :histogram-month histogram-month + :histogram-quarter histogram-quarter + :count total-count + :nil% (/ nil-count (max total-count 1)) + :has-nils? (pos? nil-count) + :entropy (h/entropy histogram) + :type DateTime + :field field})))) + +(defmethod comparison-vector DateTime + [fingerprint] + (dissoc fingerprint :type :percentiles :field :has-nils?)) + +(defn- round-to-month + [dt] + (if (<= (t/day dt) 15) + (t/floor dt t/month) + (t/date-time (t/year dt) (inc (t/month dt))))) + +(defn- month-frequencies + [earliest latest] + (let [earilest (round-to-month latest) + latest (round-to-month latest) + start-month (t/month earliest) + duration (t/in-months (t/interval earliest latest))] + (->> (range (dec start-month) (+ start-month duration)) + (map #(inc (mod % 12))) + frequencies))) + +(defn- quarter-frequencies + [earliest latest] + (let [earilest (round-to-month latest) + latest (round-to-month latest) + start-quarter (quarter earliest) + duration (round (/ (t/in-months (t/interval earliest latest)) 3))] + (->> (range (dec start-quarter) (+ start-quarter duration)) + (map #(inc (mod % 4))) + frequencies))) + +(defn- weigh-periodicity + [weights card] + (let [baseline (apply min (vals weights))] + (update card :rows (partial map (fn [[k v]] + [k (* v (/ baseline (weights k)))]))))) + +(defmethod x-ray DateTime + [{:keys [field earliest latest count] :as fingerprint}] + (if (pos? count) + (let [earliest (from-double earliest) + latest (from-double latest)] + (-> fingerprint + (assoc :earliest earliest) + (assoc :latest latest) + (update :histogram (partial histogram->dataset from-double field)) + (update :percentiles (partial m/map-vals from-double)) + (update :histogram-hour (partial histogram->dataset + {:name "HOUR" + :display_name "Hour of day" + :base_type :type/Integer + :special_type :type/Category})) + (update :histogram-day (partial histogram->dataset + {:name "DAY" + :display_name "Day of week" + :base_type :type/Integer + :special_type :type/Category})) + (update :histogram-month (comp + (partial weigh-periodicity + (month-frequencies earliest latest)) + (partial histogram->dataset + {:name "MONTH" + :display_name "Month of year" + :base_type :type/Integer + :special_type :type/Category}))) + (update :histogram-quarter (comp + (partial weigh-periodicity + (quarter-frequencies earliest latest)) + (partial histogram->dataset + {:name "QUARTER" + :display_name "Quarter of year" + :base_type :type/Integer + :special_type :type/Category}))))) + (select-keys fingerprint [:count :type :field]))) + +(defmethod fingerprinter Category + [_ field] + (redux/post-complete + (redux/fuse {:histogram h/histogram-categorical + :cardinality cardinality}) + (fn [{:keys [histogram cardinality]}] + (let [nil-count (h/nil-count histogram) + total-count (h/total-count histogram) + uniqueness (/ cardinality (max total-count 1))] + {:histogram histogram + :uniqueness uniqueness + :nil% (/ nil-count (max total-count 1)) + :has-nils? (pos? nil-count) + :cardinality cardinality + :count total-count + :entropy (h/entropy histogram) + :type Category + :field field})))) + +(defmethod comparison-vector Category + [fingerprint] + (dissoc fingerprint :type :cardinality :field :has-nils?)) + +(defmethod x-ray Category + [{:keys [field] :as fingerprint}] + (update fingerprint :histogram (partial histogram->dataset field))) + +(defmethod fingerprinter :default + [_ field] + (redux/post-complete + (redux/fuse {:total-count stats/count + :nil-count (redux/with-xform stats/count (filter nil?))}) + (fn [{:keys [total-count nil-count]}] + {:count total-count + :nil% (/ nil-count (max total-count 1)) + :has-nils? (pos? nil-count) + :type [nil (field-type field)] + :field field}))) + +(prefer-method fingerprinter Category Text) +(prefer-method fingerprinter Num Category) diff --git a/src/metabase/fingerprinting/histogram.clj b/src/metabase/fingerprinting/histogram.clj new file mode 100644 index 0000000000000000000000000000000000000000..5e4a491ede3dc13b80f2b70ab90184c54b3f96d6 --- /dev/null +++ b/src/metabase/fingerprinting/histogram.clj @@ -0,0 +1,82 @@ +(ns metabase.fingerprinting.histogram + "Wrappers and additional functionality for `bigml.histogram`." + (:require [bigml.histogram.core :as impl] + [kixi.stats.math :as math] + [redux.core :as redux]) + (:import com.bigml.histogram.Histogram)) + +(defn histogram + "Transducer that summarizes numerical data with a histogram." + ([] (impl/create)) + ([^Histogram histogram] histogram) + ([^Histogram histogram x] (impl/insert-simple! histogram x))) + +(defn histogram-categorical + "Transducer that summarizes categorical data with a histogram." + ([] (impl/create)) + ([^Histogram histogram] histogram) + ([^Histogram histogram x] (impl/insert-categorical! histogram (when x 1) x))) + +(def ^{:arglists '([^Histogram histogram])} categorical? + "Returns true if given histogram holds categorical values." + (comp (complement #{:none :unset}) impl/target-type)) + +(def ^:private ^:const ^Long pdf-sample-points 100) + +(defn pdf + "Probability density function of given histogram. + Obtained by sampling density at `pdf-sample-points` points from the histogram + or at each target if histogram holds categorical data. + https://en.wikipedia.org/wiki/Probability_density_function" + [^Histogram histogram] + (if (categorical? histogram) + (let [norm (/ (impl/total-count histogram))] + (for [[target count] (-> histogram impl/bins first :target :counts)] + [target (* count norm)])) + (let [{:keys [min max]} (impl/bounds histogram)] + (cond + (nil? min) [] + (= min max) [[min 1.0]] + :else (let [step (/ (- max min) pdf-sample-points)] + (transduce (take pdf-sample-points) + (fn + ([] {:total-density 0 + :densities (transient [])}) + ([{:keys [total-density densities]}] + (for [[x density] (persistent! densities)] + [x (/ density total-density)])) + ([acc x] + (let [d (impl/density histogram x)] + (-> acc + (update :densities conj! [x d]) + (update :total-density + d))))) + (iterate (partial + step) min))))))) + +(def ^{:arglists '([^Histogram histogram])} nil-count + "Return number of nil values histogram holds." + (comp :count impl/missing-bin)) + +(defn total-count + "Return total number (including nils) of values histogram holds." + [^Histogram histogram] + (+ (impl/total-count histogram) + (nil-count histogram))) + +(defn entropy + "Calculate (Shannon) entropy of given histogram. + https://en.wikipedia.org/wiki/Entropy_(information_theory)" + [^Histogram histogram] + (transduce (comp (map second) + (remove zero?) + (map #(* % (math/log %)))) + (redux/post-complete + -) + (pdf histogram))) + +(defn optimal-bin-width + "Determine optimal bin width (and consequently number of bins) for a given + histogram using Freedman-Diaconis rule. + https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule" + [^Histogram histogram] + (let [{first-q 0.25 third-q 0.75} (impl/percentiles histogram 0.25 0.75)] + (when first-q + (* 2 (- third-q first-q) (math/pow (impl/total-count histogram) (/ -3)))))) diff --git a/src/metabase/types.clj b/src/metabase/types.clj index 80c62f665e14a870888923d6fd1dae60e39b67d0..6c7c8d450319ff1b18acb2b774c85d39e92be8d6 100644 --- a/src/metabase/types.clj +++ b/src/metabase/types.clj @@ -5,7 +5,6 @@ (derive :type/Dictionary :type/Collection) (derive :type/Array :type/Collection) - ;;; Numeric Types (derive :type/Number :type/*) diff --git a/test/metabase/fingerprinting_test.clj b/test/metabase/fingerprinting_test.clj new file mode 100644 index 0000000000000000000000000000000000000000..98deffe4ebdc75889a41d3629e2caadbbcf94c57 --- /dev/null +++ b/test/metabase/fingerprinting_test.clj @@ -0,0 +1,154 @@ +(ns metabase.fingerprinting-test + (:require [clj-time.coerce :as t.coerce] + [clj-time.core :as t] + [expectations :refer :all] + [metabase.fingerprinting + [core :as f.core] + [costs :refer :all] + [fingerprinters :as f :refer :all] + [histogram :as h :refer :all]] + [redux.core :as redux])) + +(def ^:private numbers [0.1 0.4 0.2 nil 0.5 0.3 0.51 0.55 0.22]) +(def ^:private datetimes ["2015-06-01" nil "2015-06-11" "2015-01-01" + "2016-06-31" "2017-09-01" "2016-04-15" "2017-11-02"]) +(def ^:private categories [:foo :baz :bar :bar nil :foo]) + +(def ^:private hist (transduce identity histogram (take 100 (cycle numbers)))) +(def ^:private hist-c (transduce identity histogram-categorical + (take 100 (cycle categories)))) + +(expect + [2 + (/ 4) + nil + nil] + [(safe-divide 4 2) + (safe-divide 4) + (safe-divide 0) + (safe-divide 4 0)]) + +(expect + [(/ 23 100) + 0.5 + -1.0 + -5.0 + 1.2] + [(growth 123 100) + (growth -0.1 -0.2) + (growth -0.4 -0.2) + (growth -0.4 0.1) + (growth 0.1 -0.5)]) + +(expect + [100.0 + 11] + [(total-count hist) + (nil-count hist)]) + +(expect + [-0.0 + true] + (let [all-ones (entropy (transduce identity histogram (repeat 10 1)))] + [all-ones + (> (entropy hist) (entropy hist-c) all-ones)])) + +(expect + [{:foo 2 + :bar 10} + {}] + [(transduce identity (rollup (redux/pre-step + :y) :x) + [{:x :foo :y 1} + {:x :foo :y 1} + {:x :bar :y 5} + {:x :bar :y 3} + {:x :bar :y 2}]) + (transduce identity (rollup (redux/pre-step + :y) :x) [])]) + +(expect + [1 + 1 + 2 + 4] + [(#'f/quarter (t/date-time 2017 1)) + (#'f/quarter (t/date-time 2017 3)) + (#'f/quarter (t/date-time 2017 5)) + (#'f/quarter (t/date-time 2017 12))]) + +(expect + {:limit (var-get #'f.core/max-sample-size)} + (#'f.core/extract-query-opts {:max-cost {:query :sample}})) + +(defn- make-timestamp + [y m] + (-> (t/date-time y m) + ((var f/to-double)))) + +(expect + [[(make-timestamp 2016 1) 12] + [(make-timestamp 2016 2) 0] + [(make-timestamp 2016 3) 4] + [(make-timestamp 2016 4) 0] + [(make-timestamp 2016 5) 0] + [(make-timestamp 2016 6) 0] + [(make-timestamp 2016 7) 0] + [(make-timestamp 2016 8) 0] + [(make-timestamp 2016 9) 0] + [(make-timestamp 2016 10) 0] + [(make-timestamp 2016 11) 0] + [(make-timestamp 2016 12) 0] + [(make-timestamp 2017 1) 25]] + (#'f/fill-timeseries (t/months 1) [[(make-timestamp 2016 1) 12] + [(make-timestamp 2016 3) 4] + [(make-timestamp 2017 1) 25]])) + +;; Also low-key tests if fingerprinters can survive nils. +(expect + [(var-get #'f/Num) + (var-get #'f/DateTime) + (var-get #'f/Category) + (var-get #'f/Text) + [nil [:type/NeverBeforeSeen :type/*]]] + [(-> (#'f.core/fingerprint-field {} {:base_type :type/Number} numbers) :type) + (-> (#'f.core/fingerprint-field {} {:base_type :type/DateTime} datetimes) + :type) + (-> (#'f.core/fingerprint-field {} {:base_type :type/Text + :special_type :type/Category} + categories) + :type) + (->> categories + (map str) + (#'f.core/fingerprint-field {} {:base_type :type/Text}) + :type) + (-> (#'f.core/fingerprint-field {} {:base_type :type/NeverBeforeSeen} numbers) + :type)]) + +(expect + [true + true + true + true + false + false + true + true + true + true + true + true + false + false] + [(-> {:computation :linear} linear-computation? boolean) + (-> {:computation :unbounded} unbounded-computation? boolean) + (-> {:computation :yolo} unbounded-computation? boolean) + (-> {:computation :yolo} yolo-computation? boolean) + (-> {:computation :unbounded} linear-computation? boolean) + (-> {:computation :unbounded} yolo-computation? boolean) + (-> {:query :cache} cache-only? boolean) + (-> {:query :sample} sample-only? boolean) + (-> {:query :full-scan} full-scan? boolean) + (-> {:query :joins} full-scan? boolean) + (-> {:query :joins} alow-joins? boolean) + (-> nil full-scan? boolean) + (-> nil alow-joins? boolean) + (-> {:query :sample} full-scan? boolean)])