From bfc269c451e4f769e04bf64561255c9c7a84c80d Mon Sep 17 00:00:00 2001 From: Tom Robinson <tlrobinson@gmail.com> Date: Fri, 24 Jan 2020 16:22:48 -0800 Subject: [PATCH] Fix notebook query cleaning (#11792) * Fix notebook query cleaning, add tests/docs/flow types for steps.js. Resolves #11334 --- .../lib/queries/StructuredQuery.js | 60 ++++-- .../components/notebook/NotebookSteps.jsx | 7 +- .../components/notebook/lib/steps.js | 157 ++++++++++++-- .../StructuredQuery-clean.unit.spec.js | 43 +++- .../notebook/lib/steps.unit.spec.js | 204 ++++++++++++++++-- 5 files changed, 413 insertions(+), 58 deletions(-) diff --git a/frontend/src/metabase-lib/lib/queries/StructuredQuery.js b/frontend/src/metabase-lib/lib/queries/StructuredQuery.js index b96c786ec35..ccb355bc4e0 100644 --- a/frontend/src/metabase-lib/lib/queries/StructuredQuery.js +++ b/frontend/src/metabase-lib/lib/queries/StructuredQuery.js @@ -337,7 +337,7 @@ export default class StructuredQuery extends AtomicQuery { /** * Removes invalid clauses from the query (and source-query, recursively) */ - clean() { + clean(): StructuredQuery { if (!this.hasMetadata()) { console.warn("Warning: can't clean query without metadata!"); return this; @@ -351,7 +351,7 @@ export default class StructuredQuery extends AtomicQuery { query = query.setSourceQuery(sourceQuery.clean()); } - query = query + return query .cleanJoins() .cleanExpressions() .cleanFilters() @@ -359,50 +359,68 @@ export default class StructuredQuery extends AtomicQuery { .cleanBreakouts() .cleanSorts() .cleanLimit() - .cleanFields(); + .cleanFields() + .cleanEmpty(); + } - // remove empty queries - const newSourceQuery = query.sourceQuery(); - if (newSourceQuery && (query.isEmpty() || !query.hasAnyClauses())) { - return newSourceQuery; + /** + * Removes empty/useless layers of nesting (recursively) + */ + cleanNesting(): StructuredQuery { + // first clean the sourceQuery, if any, recursively + const sourceQuery = this.sourceQuery(); + if (sourceQuery) { + return this.setSourceQuery(sourceQuery.cleanNesting()).cleanEmpty(); + } else { + return this; } - - return query; } - cleanJoins() { + cleanJoins(): StructuredQuery { return this._cleanClauseList("joins"); } - cleanExpressions() { + cleanExpressions(): StructuredQuery { return this; // TODO } - cleanFilters() { + cleanFilters(): StructuredQuery { return this._cleanClauseList("filters"); } - cleanAggregations() { + cleanAggregations(): StructuredQuery { return this._cleanClauseList("aggregations"); } - cleanBreakouts() { + cleanBreakouts(): StructuredQuery { return this._cleanClauseList("breakouts"); } - cleanSorts() { + cleanSorts(): StructuredQuery { return this; // TODO } - cleanLimit() { + cleanLimit(): StructuredQuery { return this; // TODO } - cleanFields() { + cleanFields(): StructuredQuery { return this; // TODO } - isValid() { + /** + * If this query is empty and there's a source-query, strip off this query, returning the source-query + */ + cleanEmpty(): StructuredQuery { + const sourceQuery = this.sourceQuery(); + if (sourceQuery && !this.hasAnyClauses()) { + return sourceQuery; + } else { + return this; + } + } + + isValid(): boolean { if (!this.hasData()) { return false; } @@ -1341,7 +1359,7 @@ export default class StructuredQuery extends AtomicQuery { // NESTING - nest() { + nest(): StructuredQuery { return this._updateQuery(query => ({ "source-query": query })); } @@ -1463,7 +1481,9 @@ export default class StructuredQuery extends AtomicQuery { return this.rootQuery().sourceTableId(); } - setSourceQuery(sourceQuery: DatasetQuery | StructuredQuery): StructuredQuery { + setSourceQuery( + sourceQuery: StructuredQuery | StructuredQueryObject, + ): StructuredQuery { if (sourceQuery instanceof StructuredQuery) { if (this.sourceQuery() === sourceQuery) { return this; diff --git a/frontend/src/metabase/query_builder/components/notebook/NotebookSteps.jsx b/frontend/src/metabase/query_builder/components/notebook/NotebookSteps.jsx index 73b0690f80a..41612880cc7 100644 --- a/frontend/src/metabase/query_builder/components/notebook/NotebookSteps.jsx +++ b/frontend/src/metabase/query_builder/components/notebook/NotebookSteps.jsx @@ -51,12 +51,7 @@ export default class NotebookSteps extends React.Component { {steps.map((step, index) => { // pass a version of updateQuery that cleans subsequent steps etc const updateQuery = async datasetQuery => { - let query = step.query.setDatasetQuery(datasetQuery); - // clean all subsequent steps - for (let i = index + 1; i < steps.length; i++) { - query = steps[i].clean(query); - } - await query.update(); + await step.update(datasetQuery).update(); // mark the step as "closed" since we can assume it's been added or removed by the updateQuery this.closeStep(step.id); }; diff --git a/frontend/src/metabase/query_builder/components/notebook/lib/steps.js b/frontend/src/metabase/query_builder/components/notebook/lib/steps.js index 6e2bf33ebd3..9961c003b17 100644 --- a/frontend/src/metabase/query_builder/components/notebook/lib/steps.js +++ b/frontend/src/metabase/query_builder/components/notebook/lib/steps.js @@ -1,8 +1,100 @@ +/* @flow weak */ + import StructuredQuery from "metabase-lib/lib/queries/StructuredQuery"; +import type Question from "metabase-lib/lib/Question"; + +import type { StructuredDatasetQuery } from "metabase/meta/types/Card"; import _ from "underscore"; -const STEPS = [ +// This converts an MBQL query into a sequence of notebook "steps", with special logic to determine which steps are +// allowed to be added at every other step, generating a preview query at each step, how to delete a step, +// ensuring steps that become invalid after modifying an upstream step are removed, etc. + +type StepType = + | "data" + | "join" + | "expression" + | "filter" + | "summarize" + | "sort" + | "limit"; + +type StepDefinition = NormalStepDefinition | SubStepDefinition; + +type NormalStepDefinition = { + // a type name for the step + type: StepType, + // returns true if the step can be added to the provided query + valid: (query: StructuredQuery) => boolean, + // returns true if the provided query already has this step + active: (query: StructuredQuery) => boolean, + // logic to remove invalid clauses that were added by this step from the provided query + clean: (query: StructuredQuery) => StructuredQuery, + // logic to remove this step from the provided query + revert?: (query: StructuredQuery) => StructuredQuery, +}; + +type SubStepDefinition = { + // a type name for the step + type: StepType, + // returns true if the step can be added to the provided query + valid: (query: StructuredQuery, subStepIndex: number) => boolean, + // returns true if the provided query already has this step + active: (query: StructuredQuery, subStepIndex: number) => boolean, + // logic to remove invalid clauses that were added by this step from the provided query + clean: (query: StructuredQuery, subStepIndex: number) => StructuredQuery, + // logic to remove this step from the provided query + revert?: (query: StructuredQuery, subStepIndex: number) => StructuredQuery, + // how many sub-steps are there + subSteps?: (query: StructuredQuery) => number, +}; + +// identifier for this step, e.x. `0:data` (or `0:join:1` for sub-steps) +type StepId = string; + +type Step = { + id: StepId, + // the step type, corresponds with type in StepDefinition + type: StepType, + // if there are nested queries, this indicates the level of nested (0 being the root query) + stageIndex: number, + // if there are sub-steps, this indicates the index of the sub-step + itemIndex: number, + // is this step currently allowed? + valid: boolean, + // is this step currently applied? + active: boolean, + // is this step visible? (if it's active or just added) + visible: boolean, + // the query for this "stage" + query: StructuredQuery, + // a query to preview query at this step + previewQuery: ?StructuredQuery, + // remove this step + revert: ?(query: StructuredQuery) => StructuredQuery, + // remove invalid clauses set by this step + clean: (query: StructuredQuery) => StructuredQuery, + // update the query at this step and clean subsequent queries + update: (datasetQuery: StructuredDatasetQuery) => StructuredQuery, + // any valid actions that can be applied after this step + actions: StepAction[], + // pointer to the next step, if any + next: ?Step, + // pointer to the previous step, if any + previous: ?Step, +}; + +type StepAction = { + type: StepType, + action: Function, +}; + +type OpenSteps = { + [id: StepId]: boolean, +}; + +const STEPS: StepDefinition[] = [ { type: "data", valid: query => !query.sourceQuery(), @@ -89,7 +181,10 @@ const STEPS = [ /** * Returns an array of "steps" to be displayed in the notebook for one "stage" (nesting) of a query */ -export function getQuestionSteps(question, openSteps = {}) { +export function getQuestionSteps( + question: Question, + openSteps: OpenSteps = {}, +): Step[] { const allSteps = []; let query = question.query(); @@ -98,9 +193,7 @@ export function getQuestionSteps(question, openSteps = {}) { const allowsNesting = database && database.hasFeature("nested-queries"); // strip empty source queries - while (query.sourceQuery() && !query.hasAnyClauses()) { - query = query.sourceQuery(); - } + query = query.cleanNesting(); // add a level of nesting, if valid if (allowsNesting && query.hasBreakouts()) { @@ -121,32 +214,66 @@ export function getQuestionSteps(question, openSteps = {}) { } } + // set up pointers to the next and previous steps + for (const [index, step] of allSteps.entries()) { + step.previous = allSteps[index - 1]; + step.next = allSteps[index + 1]; + } + return allSteps; } /** * Returns an array of "steps" to be displayed in the notebook for one "stage" (nesting) of a query */ -export function getStageSteps(query, stageIndex, openSteps) { +export function getStageSteps( + stageQuery: StructuredQuery, + stageIndex: number, + openSteps: OpenSteps, +): { steps: Step[], actions: StepAction[] } { const getId = (step, itemIndex) => `${stageIndex}:${step.type}` + (itemIndex > 0 ? `:${itemIndex}` : ``); function getStep(STEP, itemIndex = null) { - return { - id: getId(STEP, itemIndex), + const id = getId(STEP, itemIndex); + const step: Step = { + id: id, type: STEP.type, stageIndex: stageIndex, itemIndex: itemIndex, - query: query, - valid: STEP.valid(query, itemIndex), - active: STEP.active(query, itemIndex), + query: stageQuery, + valid: STEP.valid(stageQuery, itemIndex), + active: STEP.active(stageQuery, itemIndex), visible: - STEP.valid(query, itemIndex) && - (STEP.active(query, itemIndex) || openSteps[getId(STEP, itemIndex)]), + STEP.valid(stageQuery, itemIndex) && + (STEP.active(stageQuery, itemIndex) || openSteps[id]), revert: STEP.revert ? query => STEP.revert(query, itemIndex) : null, clean: query => STEP.clean(query, itemIndex), + update: datasetQuery => { + let newQuery = stageQuery.setDatasetQuery(datasetQuery); + // clean each subsequent step individually. we have to do this rather than calling newQuery.clean() in case + // the current step is in a temporarily invalid state + let current = step; + while ((current = current.next)) { + // when switching to the next stage we need to setSourceQuery + if ( + current.previous && + current.previous.stageIndex < current.stageIndex + ) { + newQuery = current.query.setSourceQuery(newQuery.query()); + } + newQuery = current.clean(newQuery); + } + // strip empty source queries + return newQuery.cleanNesting(); + }, + // `actions`, `previewQuery`, `next` and `previous` will be set later actions: [], + previewQuery: null, + next: null, + previous: null, }; + return step; } // get the currently visible steps, flattening "items" @@ -154,7 +281,7 @@ export function getStageSteps(query, stageIndex, openSteps) { STEPS.map(STEP => { if (STEP.subSteps) { // add 1 for the initial or next action button - const itemIndexes = _.range(0, STEP.subSteps(query) + 1); + const itemIndexes = _.range(0, STEP.subSteps(stageQuery) + 1); return itemIndexes.map(itemIndex => getStep(STEP, itemIndex)); } else { return [getStep(STEP)]; @@ -162,7 +289,7 @@ export function getStageSteps(query, stageIndex, openSteps) { }), ); - let previewQuery = query; + let previewQuery = stageQuery; let actions = []; // iterate over steps in reverse so we can revert query for previewing and accumulate valid actions diff --git a/frontend/test/metabase-lib/lib/queries/StructuredQuery-clean.unit.spec.js b/frontend/test/metabase-lib/lib/queries/StructuredQuery-clean.unit.spec.js index b8aa68d2f35..f0998855a05 100644 --- a/frontend/test/metabase-lib/lib/queries/StructuredQuery-clean.unit.spec.js +++ b/frontend/test/metabase-lib/lib/queries/StructuredQuery-clean.unit.spec.js @@ -1,4 +1,8 @@ -import { ORDERS, PRODUCTS } from "__support__/sample_dataset_fixture"; +import { + SAMPLE_DATASET, + ORDERS, + PRODUCTS, +} from "__support__/sample_dataset_fixture"; const JOIN = { "source-table": PRODUCTS.id, @@ -232,4 +236,41 @@ describe("StructuredQuery", () => { }); }); }); + + describe("cleanNesting", () => { + it("should not modify empty queries with no source-query", () => { + expect( + SAMPLE_DATASET.question() + .query() + .cleanNesting() + .datasetQuery(), + ).toEqual({ + type: "query", + database: SAMPLE_DATASET.id, + query: { "source-table": null }, + }); + }); + it("should remove outer empty queries", () => { + expect( + ORDERS.query() + .updateLimit(10) + .nest() + .nest() + .nest() + .cleanNesting() + .query(), + ).toEqual({ "source-table": ORDERS.id, limit: 10 }); + }); + it("should remove intermediate empty queries", () => { + expect( + ORDERS.query() + .nest() + .nest() + .nest() + .updateLimit(10) + .cleanNesting() + .query(), + ).toEqual({ "source-query": { "source-table": ORDERS.id }, limit: 10 }); + }); + }); }); diff --git a/frontend/test/metabase/query_builder/components/notebook/lib/steps.unit.spec.js b/frontend/test/metabase/query_builder/components/notebook/lib/steps.unit.spec.js index b362c58f490..f426afb6cae 100644 --- a/frontend/test/metabase/query_builder/components/notebook/lib/steps.unit.spec.js +++ b/frontend/test/metabase/query_builder/components/notebook/lib/steps.unit.spec.js @@ -1,19 +1,191 @@ import { getQuestionSteps } from "metabase/query_builder/components/notebook/lib/steps"; -import { SAMPLE_DATASET, ORDERS } from "__support__/sample_dataset_fixture"; - -describe("getQuestionSteps", () => { - it("should return data step with no actions for new query", () => { - const steps = getQuestionSteps(SAMPLE_DATASET.question()); - expect(steps.length).toBe(1); - expect(steps.map(s => s.type)).toEqual(["data"]); - expect(steps.map(s => s.actions.map(a => a.type))).toEqual([[]]); - }); - it("should return data step with actions for query with data selected", () => { - const steps = getQuestionSteps(ORDERS.question()); - expect(steps.length).toBe(1); - expect(steps.map(s => s.type)).toEqual(["data"]); - expect(steps.map(s => s.actions.map(a => a.type))).toEqual([ - ["join", "expression", "filter", "summarize", "sort", "limit"], - ]); +import { + SAMPLE_DATASET, + ORDERS, + PRODUCTS, +} from "__support__/sample_dataset_fixture"; + +const rawDataQuery = { + "source-table": ORDERS.id, +}; + +const summarizedQuery = { + ...rawDataQuery, + aggregation: [["count"]], + breakout: [ + [ + "fk->", + ["field-id", ORDERS.PRODUCT_ID.id], + ["field-id", PRODUCTS.CATEGORY.id], + ], + ], +}; + +const filteredQuery = { + ...rawDataQuery, + filter: ["=", ["field-id", ORDERS.USER_ID.id], 1], +}; + +const filteredAndSummarizedQuery = { + ...summarizedQuery, + ...filteredQuery, +}; + +const postAggregationFilterQuery = { + "source-query": filteredAndSummarizedQuery, + filter: [">", ["field-literal", "count", "type/Integer"], 10], +}; + +const getQuestionStepsForMBQLQuery = query => + getQuestionSteps( + SAMPLE_DATASET.question() + .query() + .setQuery(query) + .question(), + ); + +describe("new query", () => { + const steps = getQuestionStepsForMBQLQuery({}); + describe("getQuestionSteps", () => { + it("should return data step with no actions", () => { + expect(steps.length).toBe(1); + expect(steps.map(s => s.type)).toEqual(["data"]); + expect(steps.map(s => s.actions.map(a => a.type))).toEqual([[]]); + }); + }); +}); + +describe("raw data query", () => { + const steps = getQuestionStepsForMBQLQuery(rawDataQuery); + describe("getQuestionSteps", () => { + it("should return data step with actions", () => { + expect(steps.length).toBe(1); + expect(steps.map(s => s.type)).toEqual(["data"]); + expect(steps.map(s => s.actions.map(a => a.type))).toEqual([ + ["join", "expression", "filter", "summarize", "sort", "limit"], + ]); + }); + }); +}); + +describe("filtered and summarized query", () => { + const steps = getQuestionStepsForMBQLQuery(filteredAndSummarizedQuery); + describe("getQuestionSteps", () => { + it("`getQuestionSteps()` should return data, filter, and summarize steps", () => { + expect(steps.map(s => s.type)).toEqual(["data", "filter", "summarize"]); + }); + }); + describe("query", () => { + it(`should be the full query for data step`, () => { + expect(steps[0].query.query()).toEqual(filteredAndSummarizedQuery); + }); + it(`should be the full query for filter step`, () => { + expect(steps[1].query.query()).toEqual(filteredAndSummarizedQuery); + }); + it(`should be the full query for summarize step`, () => { + expect(steps[2].query.query()).toEqual(filteredAndSummarizedQuery); + }); + }); + describe("previewQuery", () => { + it(`shouldn't include filter, summarize for data step`, () => { + expect(steps[0].previewQuery.query()).toEqual(rawDataQuery); + }); + it(`shouldn't include summarize for filter step`, () => { + expect(steps[1].previewQuery.query()).toEqual(filteredQuery); + }); + it(`shouldn't be the full query for summarize step`, () => { + filteredAndSummarizedQuery; + }); + }); + describe("update", () => { + it("should remove all steps when changing the table", () => { + const newQuery = steps[0].update( + steps[0].query.setTableId(PRODUCTS.id).datasetQuery(), + ); + expect(newQuery.query()).toEqual({ "source-table": PRODUCTS.id }); + }); + it("shouldn't remove summarize when removing filter", () => { + const newQuery = steps[1].update( + steps[1].revert(steps[1].query).datasetQuery(), + ); + expect(newQuery.query()).toEqual(summarizedQuery); + }); + it("shouldn't remove filter when removing summarize", () => { + const newQuery = steps[2].update( + steps[2].revert(steps[2].query).datasetQuery(), + ); + expect(newQuery.query()).toEqual(filteredQuery); + }); + }); +}); + +describe("filtered and summarized query with post-aggregation filter", () => { + const steps = getQuestionStepsForMBQLQuery(postAggregationFilterQuery); + describe("getQuestionSteps", () => { + it("`getQuestionSteps()` should return data, filter, summarize, and filter steps", () => { + expect(steps.map(s => s.type)).toEqual([ + "data", + "filter", + "summarize", + "filter", + ]); + }); + }); + describe("query", () => { + it(`should be the source-query for data step`, () => { + expect(steps[0].query.query()).toEqual(filteredAndSummarizedQuery); + }); + it(`should be the source-query for filter step`, () => { + expect(steps[1].query.query()).toEqual(filteredAndSummarizedQuery); + }); + it(`should be the source-query for summarize step`, () => { + expect(steps[2].query.query()).toEqual(filteredAndSummarizedQuery); + }); + it(`should be the original query for post-aggregation filter step`, () => { + expect(steps[3].query.query()).toEqual(postAggregationFilterQuery); + }); + }); + describe("previewQuery", () => { + it(`shouldn't include filter, summarize, or post-aggregrationfilter for data step`, () => { + expect(steps[0].previewQuery.query()).toEqual(rawDataQuery); + }); + it(`shouldn't include summarize or post-aggregrationfilter filter step`, () => { + expect(steps[1].previewQuery.query()).toEqual(filteredQuery); + }); + it(`shouldn't include post-aggregrationfilter for summarize step`, () => { + filteredAndSummarizedQuery; + }); + it(`should be the original query for post-aggregation filter step`, () => { + expect(steps[3].previewQuery.query()).toEqual(postAggregationFilterQuery); + }); + }); + describe("update", () => { + it("should remove all steps when changing the table", () => { + const newQuery = steps[0].update( + steps[0].query.setTableId(PRODUCTS.id).datasetQuery(), + ); + expect(newQuery.query()).toEqual({ "source-table": PRODUCTS.id }); + }); + it("shouldn't remove summarize or post-aggregrationfilter when removing filter", () => { + const newQuery = steps[1].update( + steps[1].revert(steps[1].query).datasetQuery(), + ); + expect(newQuery.query()).toEqual({ + ...postAggregationFilterQuery, + "source-query": summarizedQuery, + }); + }); + it("should remove post-aggregrationfilter when removing summarize", () => { + const newQuery = steps[2].update( + steps[2].revert(steps[2].query).datasetQuery(), + ); + expect(newQuery.query()).toEqual(filteredQuery); + }); + it("should remove empty layer of nesting but not remove filter or summarize when removing post-aggregation filter", () => { + const newQuery = steps[3].update( + steps[3].revert(steps[3].query).datasetQuery(), + ); + expect(newQuery.query()).toEqual(filteredAndSummarizedQuery); + }); }); }); -- GitLab