From bfc269c451e4f769e04bf64561255c9c7a84c80d Mon Sep 17 00:00:00 2001
From: Tom Robinson <tlrobinson@gmail.com>
Date: Fri, 24 Jan 2020 16:22:48 -0800
Subject: [PATCH] Fix notebook query cleaning (#11792)

* Fix notebook query cleaning, add tests/docs/flow types for steps.js. Resolves #11334
---
 .../lib/queries/StructuredQuery.js            |  60 ++++--
 .../components/notebook/NotebookSteps.jsx     |   7 +-
 .../components/notebook/lib/steps.js          | 157 ++++++++++++--
 .../StructuredQuery-clean.unit.spec.js        |  43 +++-
 .../notebook/lib/steps.unit.spec.js           | 204 ++++++++++++++++--
 5 files changed, 413 insertions(+), 58 deletions(-)

diff --git a/frontend/src/metabase-lib/lib/queries/StructuredQuery.js b/frontend/src/metabase-lib/lib/queries/StructuredQuery.js
index b96c786ec35..ccb355bc4e0 100644
--- a/frontend/src/metabase-lib/lib/queries/StructuredQuery.js
+++ b/frontend/src/metabase-lib/lib/queries/StructuredQuery.js
@@ -337,7 +337,7 @@ export default class StructuredQuery extends AtomicQuery {
   /**
    * Removes invalid clauses from the query (and source-query, recursively)
    */
-  clean() {
+  clean(): StructuredQuery {
     if (!this.hasMetadata()) {
       console.warn("Warning: can't clean query without metadata!");
       return this;
@@ -351,7 +351,7 @@ export default class StructuredQuery extends AtomicQuery {
       query = query.setSourceQuery(sourceQuery.clean());
     }
 
-    query = query
+    return query
       .cleanJoins()
       .cleanExpressions()
       .cleanFilters()
@@ -359,50 +359,68 @@ export default class StructuredQuery extends AtomicQuery {
       .cleanBreakouts()
       .cleanSorts()
       .cleanLimit()
-      .cleanFields();
+      .cleanFields()
+      .cleanEmpty();
+  }
 
-    // remove empty queries
-    const newSourceQuery = query.sourceQuery();
-    if (newSourceQuery && (query.isEmpty() || !query.hasAnyClauses())) {
-      return newSourceQuery;
+  /**
+   * Removes empty/useless layers of nesting (recursively)
+   */
+  cleanNesting(): StructuredQuery {
+    // first clean the sourceQuery, if any, recursively
+    const sourceQuery = this.sourceQuery();
+    if (sourceQuery) {
+      return this.setSourceQuery(sourceQuery.cleanNesting()).cleanEmpty();
+    } else {
+      return this;
     }
-
-    return query;
   }
 
-  cleanJoins() {
+  cleanJoins(): StructuredQuery {
     return this._cleanClauseList("joins");
   }
 
-  cleanExpressions() {
+  cleanExpressions(): StructuredQuery {
     return this; // TODO
   }
 
-  cleanFilters() {
+  cleanFilters(): StructuredQuery {
     return this._cleanClauseList("filters");
   }
 
-  cleanAggregations() {
+  cleanAggregations(): StructuredQuery {
     return this._cleanClauseList("aggregations");
   }
 
-  cleanBreakouts() {
+  cleanBreakouts(): StructuredQuery {
     return this._cleanClauseList("breakouts");
   }
 
-  cleanSorts() {
+  cleanSorts(): StructuredQuery {
     return this; // TODO
   }
 
-  cleanLimit() {
+  cleanLimit(): StructuredQuery {
     return this; // TODO
   }
 
-  cleanFields() {
+  cleanFields(): StructuredQuery {
     return this; // TODO
   }
 
-  isValid() {
+  /**
+   * If this query is empty and there's a source-query, strip off this query, returning the source-query
+   */
+  cleanEmpty(): StructuredQuery {
+    const sourceQuery = this.sourceQuery();
+    if (sourceQuery && !this.hasAnyClauses()) {
+      return sourceQuery;
+    } else {
+      return this;
+    }
+  }
+
+  isValid(): boolean {
     if (!this.hasData()) {
       return false;
     }
@@ -1341,7 +1359,7 @@ export default class StructuredQuery extends AtomicQuery {
 
   // NESTING
 
-  nest() {
+  nest(): StructuredQuery {
     return this._updateQuery(query => ({ "source-query": query }));
   }
 
@@ -1463,7 +1481,9 @@ export default class StructuredQuery extends AtomicQuery {
     return this.rootQuery().sourceTableId();
   }
 
-  setSourceQuery(sourceQuery: DatasetQuery | StructuredQuery): StructuredQuery {
+  setSourceQuery(
+    sourceQuery: StructuredQuery | StructuredQueryObject,
+  ): StructuredQuery {
     if (sourceQuery instanceof StructuredQuery) {
       if (this.sourceQuery() === sourceQuery) {
         return this;
diff --git a/frontend/src/metabase/query_builder/components/notebook/NotebookSteps.jsx b/frontend/src/metabase/query_builder/components/notebook/NotebookSteps.jsx
index 73b0690f80a..41612880cc7 100644
--- a/frontend/src/metabase/query_builder/components/notebook/NotebookSteps.jsx
+++ b/frontend/src/metabase/query_builder/components/notebook/NotebookSteps.jsx
@@ -51,12 +51,7 @@ export default class NotebookSteps extends React.Component {
         {steps.map((step, index) => {
           // pass a version of updateQuery that cleans subsequent steps etc
           const updateQuery = async datasetQuery => {
-            let query = step.query.setDatasetQuery(datasetQuery);
-            // clean all subsequent steps
-            for (let i = index + 1; i < steps.length; i++) {
-              query = steps[i].clean(query);
-            }
-            await query.update();
+            await step.update(datasetQuery).update();
             // mark the step as "closed" since we can assume it's been added or removed by the updateQuery
             this.closeStep(step.id);
           };
diff --git a/frontend/src/metabase/query_builder/components/notebook/lib/steps.js b/frontend/src/metabase/query_builder/components/notebook/lib/steps.js
index 6e2bf33ebd3..9961c003b17 100644
--- a/frontend/src/metabase/query_builder/components/notebook/lib/steps.js
+++ b/frontend/src/metabase/query_builder/components/notebook/lib/steps.js
@@ -1,8 +1,100 @@
+/* @flow weak */
+
 import StructuredQuery from "metabase-lib/lib/queries/StructuredQuery";
+import type Question from "metabase-lib/lib/Question";
+
+import type { StructuredDatasetQuery } from "metabase/meta/types/Card";
 
 import _ from "underscore";
 
-const STEPS = [
+// This converts an MBQL query into a sequence of notebook "steps", with special logic to determine which steps are
+// allowed to be added at every other step, generating a preview query at each step, how to delete a step,
+// ensuring steps that become invalid after modifying an upstream step are removed, etc.
+
+type StepType =
+  | "data"
+  | "join"
+  | "expression"
+  | "filter"
+  | "summarize"
+  | "sort"
+  | "limit";
+
+type StepDefinition = NormalStepDefinition | SubStepDefinition;
+
+type NormalStepDefinition = {
+  // a type name for the step
+  type: StepType,
+  // returns true if the step can be added to the provided query
+  valid: (query: StructuredQuery) => boolean,
+  // returns true if the provided query already has this step
+  active: (query: StructuredQuery) => boolean,
+  // logic to remove invalid clauses that were added by this step from the provided query
+  clean: (query: StructuredQuery) => StructuredQuery,
+  // logic to remove this step from the provided query
+  revert?: (query: StructuredQuery) => StructuredQuery,
+};
+
+type SubStepDefinition = {
+  // a type name for the step
+  type: StepType,
+  // returns true if the step can be added to the provided query
+  valid: (query: StructuredQuery, subStepIndex: number) => boolean,
+  // returns true if the provided query already has this step
+  active: (query: StructuredQuery, subStepIndex: number) => boolean,
+  // logic to remove invalid clauses that were added by this step from the provided query
+  clean: (query: StructuredQuery, subStepIndex: number) => StructuredQuery,
+  // logic to remove this step from the provided query
+  revert?: (query: StructuredQuery, subStepIndex: number) => StructuredQuery,
+  // how many sub-steps are there
+  subSteps?: (query: StructuredQuery) => number,
+};
+
+// identifier for this step, e.x. `0:data` (or `0:join:1` for sub-steps)
+type StepId = string;
+
+type Step = {
+  id: StepId,
+  // the step type, corresponds with type in StepDefinition
+  type: StepType,
+  // if there are nested queries, this indicates the level of nested (0 being the root query)
+  stageIndex: number,
+  // if there are sub-steps, this indicates the index of the sub-step
+  itemIndex: number,
+  // is this step currently allowed?
+  valid: boolean,
+  // is this step currently applied?
+  active: boolean,
+  // is this step visible? (if it's active or just added)
+  visible: boolean,
+  // the query for this "stage"
+  query: StructuredQuery,
+  // a query to preview query at this step
+  previewQuery: ?StructuredQuery,
+  // remove this step
+  revert: ?(query: StructuredQuery) => StructuredQuery,
+  // remove invalid clauses set by this step
+  clean: (query: StructuredQuery) => StructuredQuery,
+  // update the query at this step and clean subsequent queries
+  update: (datasetQuery: StructuredDatasetQuery) => StructuredQuery,
+  // any valid actions that can be applied after this step
+  actions: StepAction[],
+  // pointer to the next step, if any
+  next: ?Step,
+  // pointer to the previous step, if any
+  previous: ?Step,
+};
+
+type StepAction = {
+  type: StepType,
+  action: Function,
+};
+
+type OpenSteps = {
+  [id: StepId]: boolean,
+};
+
+const STEPS: StepDefinition[] = [
   {
     type: "data",
     valid: query => !query.sourceQuery(),
@@ -89,7 +181,10 @@ const STEPS = [
 /**
  * Returns an array of "steps" to be displayed in the notebook for one "stage" (nesting) of a query
  */
-export function getQuestionSteps(question, openSteps = {}) {
+export function getQuestionSteps(
+  question: Question,
+  openSteps: OpenSteps = {},
+): Step[] {
   const allSteps = [];
 
   let query = question.query();
@@ -98,9 +193,7 @@ export function getQuestionSteps(question, openSteps = {}) {
     const allowsNesting = database && database.hasFeature("nested-queries");
 
     // strip empty source queries
-    while (query.sourceQuery() && !query.hasAnyClauses()) {
-      query = query.sourceQuery();
-    }
+    query = query.cleanNesting();
 
     // add a level of nesting, if valid
     if (allowsNesting && query.hasBreakouts()) {
@@ -121,32 +214,66 @@ export function getQuestionSteps(question, openSteps = {}) {
     }
   }
 
+  // set up pointers to the next and previous steps
+  for (const [index, step] of allSteps.entries()) {
+    step.previous = allSteps[index - 1];
+    step.next = allSteps[index + 1];
+  }
+
   return allSteps;
 }
 
 /**
  * Returns an array of "steps" to be displayed in the notebook for one "stage" (nesting) of a query
  */
-export function getStageSteps(query, stageIndex, openSteps) {
+export function getStageSteps(
+  stageQuery: StructuredQuery,
+  stageIndex: number,
+  openSteps: OpenSteps,
+): { steps: Step[], actions: StepAction[] } {
   const getId = (step, itemIndex) =>
     `${stageIndex}:${step.type}` + (itemIndex > 0 ? `:${itemIndex}` : ``);
 
   function getStep(STEP, itemIndex = null) {
-    return {
-      id: getId(STEP, itemIndex),
+    const id = getId(STEP, itemIndex);
+    const step: Step = {
+      id: id,
       type: STEP.type,
       stageIndex: stageIndex,
       itemIndex: itemIndex,
-      query: query,
-      valid: STEP.valid(query, itemIndex),
-      active: STEP.active(query, itemIndex),
+      query: stageQuery,
+      valid: STEP.valid(stageQuery, itemIndex),
+      active: STEP.active(stageQuery, itemIndex),
       visible:
-        STEP.valid(query, itemIndex) &&
-        (STEP.active(query, itemIndex) || openSteps[getId(STEP, itemIndex)]),
+        STEP.valid(stageQuery, itemIndex) &&
+        (STEP.active(stageQuery, itemIndex) || openSteps[id]),
       revert: STEP.revert ? query => STEP.revert(query, itemIndex) : null,
       clean: query => STEP.clean(query, itemIndex),
+      update: datasetQuery => {
+        let newQuery = stageQuery.setDatasetQuery(datasetQuery);
+        // clean each subsequent step individually. we have to do this rather than calling newQuery.clean() in case
+        // the current step is in a temporarily invalid state
+        let current = step;
+        while ((current = current.next)) {
+          // when switching to the next stage we need to setSourceQuery
+          if (
+            current.previous &&
+            current.previous.stageIndex < current.stageIndex
+          ) {
+            newQuery = current.query.setSourceQuery(newQuery.query());
+          }
+          newQuery = current.clean(newQuery);
+        }
+        // strip empty source queries
+        return newQuery.cleanNesting();
+      },
+      // `actions`, `previewQuery`, `next` and `previous` will be set later
       actions: [],
+      previewQuery: null,
+      next: null,
+      previous: null,
     };
+    return step;
   }
 
   // get the currently visible steps, flattening "items"
@@ -154,7 +281,7 @@ export function getStageSteps(query, stageIndex, openSteps) {
     STEPS.map(STEP => {
       if (STEP.subSteps) {
         // add 1 for the initial or next action button
-        const itemIndexes = _.range(0, STEP.subSteps(query) + 1);
+        const itemIndexes = _.range(0, STEP.subSteps(stageQuery) + 1);
         return itemIndexes.map(itemIndex => getStep(STEP, itemIndex));
       } else {
         return [getStep(STEP)];
@@ -162,7 +289,7 @@ export function getStageSteps(query, stageIndex, openSteps) {
     }),
   );
 
-  let previewQuery = query;
+  let previewQuery = stageQuery;
 
   let actions = [];
   // iterate over steps in reverse so we can revert query for previewing and accumulate valid actions
diff --git a/frontend/test/metabase-lib/lib/queries/StructuredQuery-clean.unit.spec.js b/frontend/test/metabase-lib/lib/queries/StructuredQuery-clean.unit.spec.js
index b8aa68d2f35..f0998855a05 100644
--- a/frontend/test/metabase-lib/lib/queries/StructuredQuery-clean.unit.spec.js
+++ b/frontend/test/metabase-lib/lib/queries/StructuredQuery-clean.unit.spec.js
@@ -1,4 +1,8 @@
-import { ORDERS, PRODUCTS } from "__support__/sample_dataset_fixture";
+import {
+  SAMPLE_DATASET,
+  ORDERS,
+  PRODUCTS,
+} from "__support__/sample_dataset_fixture";
 
 const JOIN = {
   "source-table": PRODUCTS.id,
@@ -232,4 +236,41 @@ describe("StructuredQuery", () => {
       });
     });
   });
+
+  describe("cleanNesting", () => {
+    it("should not modify empty queries with no source-query", () => {
+      expect(
+        SAMPLE_DATASET.question()
+          .query()
+          .cleanNesting()
+          .datasetQuery(),
+      ).toEqual({
+        type: "query",
+        database: SAMPLE_DATASET.id,
+        query: { "source-table": null },
+      });
+    });
+    it("should remove outer empty queries", () => {
+      expect(
+        ORDERS.query()
+          .updateLimit(10)
+          .nest()
+          .nest()
+          .nest()
+          .cleanNesting()
+          .query(),
+      ).toEqual({ "source-table": ORDERS.id, limit: 10 });
+    });
+    it("should remove intermediate empty queries", () => {
+      expect(
+        ORDERS.query()
+          .nest()
+          .nest()
+          .nest()
+          .updateLimit(10)
+          .cleanNesting()
+          .query(),
+      ).toEqual({ "source-query": { "source-table": ORDERS.id }, limit: 10 });
+    });
+  });
 });
diff --git a/frontend/test/metabase/query_builder/components/notebook/lib/steps.unit.spec.js b/frontend/test/metabase/query_builder/components/notebook/lib/steps.unit.spec.js
index b362c58f490..f426afb6cae 100644
--- a/frontend/test/metabase/query_builder/components/notebook/lib/steps.unit.spec.js
+++ b/frontend/test/metabase/query_builder/components/notebook/lib/steps.unit.spec.js
@@ -1,19 +1,191 @@
 import { getQuestionSteps } from "metabase/query_builder/components/notebook/lib/steps";
-import { SAMPLE_DATASET, ORDERS } from "__support__/sample_dataset_fixture";
-
-describe("getQuestionSteps", () => {
-  it("should return data step with no actions for new query", () => {
-    const steps = getQuestionSteps(SAMPLE_DATASET.question());
-    expect(steps.length).toBe(1);
-    expect(steps.map(s => s.type)).toEqual(["data"]);
-    expect(steps.map(s => s.actions.map(a => a.type))).toEqual([[]]);
-  });
-  it("should return data step with actions for query with data selected", () => {
-    const steps = getQuestionSteps(ORDERS.question());
-    expect(steps.length).toBe(1);
-    expect(steps.map(s => s.type)).toEqual(["data"]);
-    expect(steps.map(s => s.actions.map(a => a.type))).toEqual([
-      ["join", "expression", "filter", "summarize", "sort", "limit"],
-    ]);
+import {
+  SAMPLE_DATASET,
+  ORDERS,
+  PRODUCTS,
+} from "__support__/sample_dataset_fixture";
+
+const rawDataQuery = {
+  "source-table": ORDERS.id,
+};
+
+const summarizedQuery = {
+  ...rawDataQuery,
+  aggregation: [["count"]],
+  breakout: [
+    [
+      "fk->",
+      ["field-id", ORDERS.PRODUCT_ID.id],
+      ["field-id", PRODUCTS.CATEGORY.id],
+    ],
+  ],
+};
+
+const filteredQuery = {
+  ...rawDataQuery,
+  filter: ["=", ["field-id", ORDERS.USER_ID.id], 1],
+};
+
+const filteredAndSummarizedQuery = {
+  ...summarizedQuery,
+  ...filteredQuery,
+};
+
+const postAggregationFilterQuery = {
+  "source-query": filteredAndSummarizedQuery,
+  filter: [">", ["field-literal", "count", "type/Integer"], 10],
+};
+
+const getQuestionStepsForMBQLQuery = query =>
+  getQuestionSteps(
+    SAMPLE_DATASET.question()
+      .query()
+      .setQuery(query)
+      .question(),
+  );
+
+describe("new query", () => {
+  const steps = getQuestionStepsForMBQLQuery({});
+  describe("getQuestionSteps", () => {
+    it("should return data step with no actions", () => {
+      expect(steps.length).toBe(1);
+      expect(steps.map(s => s.type)).toEqual(["data"]);
+      expect(steps.map(s => s.actions.map(a => a.type))).toEqual([[]]);
+    });
+  });
+});
+
+describe("raw data query", () => {
+  const steps = getQuestionStepsForMBQLQuery(rawDataQuery);
+  describe("getQuestionSteps", () => {
+    it("should return data step with actions", () => {
+      expect(steps.length).toBe(1);
+      expect(steps.map(s => s.type)).toEqual(["data"]);
+      expect(steps.map(s => s.actions.map(a => a.type))).toEqual([
+        ["join", "expression", "filter", "summarize", "sort", "limit"],
+      ]);
+    });
+  });
+});
+
+describe("filtered and summarized query", () => {
+  const steps = getQuestionStepsForMBQLQuery(filteredAndSummarizedQuery);
+  describe("getQuestionSteps", () => {
+    it("`getQuestionSteps()` should return data, filter, and summarize steps", () => {
+      expect(steps.map(s => s.type)).toEqual(["data", "filter", "summarize"]);
+    });
+  });
+  describe("query", () => {
+    it(`should be the full query for data step`, () => {
+      expect(steps[0].query.query()).toEqual(filteredAndSummarizedQuery);
+    });
+    it(`should be the full query for filter step`, () => {
+      expect(steps[1].query.query()).toEqual(filteredAndSummarizedQuery);
+    });
+    it(`should be the full query for summarize step`, () => {
+      expect(steps[2].query.query()).toEqual(filteredAndSummarizedQuery);
+    });
+  });
+  describe("previewQuery", () => {
+    it(`shouldn't include filter, summarize for data step`, () => {
+      expect(steps[0].previewQuery.query()).toEqual(rawDataQuery);
+    });
+    it(`shouldn't include summarize for filter step`, () => {
+      expect(steps[1].previewQuery.query()).toEqual(filteredQuery);
+    });
+    it(`shouldn't be the full query for summarize step`, () => {
+      filteredAndSummarizedQuery;
+    });
+  });
+  describe("update", () => {
+    it("should remove all steps when changing the table", () => {
+      const newQuery = steps[0].update(
+        steps[0].query.setTableId(PRODUCTS.id).datasetQuery(),
+      );
+      expect(newQuery.query()).toEqual({ "source-table": PRODUCTS.id });
+    });
+    it("shouldn't remove summarize when removing filter", () => {
+      const newQuery = steps[1].update(
+        steps[1].revert(steps[1].query).datasetQuery(),
+      );
+      expect(newQuery.query()).toEqual(summarizedQuery);
+    });
+    it("shouldn't remove filter when removing summarize", () => {
+      const newQuery = steps[2].update(
+        steps[2].revert(steps[2].query).datasetQuery(),
+      );
+      expect(newQuery.query()).toEqual(filteredQuery);
+    });
+  });
+});
+
+describe("filtered and summarized query with post-aggregation filter", () => {
+  const steps = getQuestionStepsForMBQLQuery(postAggregationFilterQuery);
+  describe("getQuestionSteps", () => {
+    it("`getQuestionSteps()` should return data, filter, summarize, and filter steps", () => {
+      expect(steps.map(s => s.type)).toEqual([
+        "data",
+        "filter",
+        "summarize",
+        "filter",
+      ]);
+    });
+  });
+  describe("query", () => {
+    it(`should be the source-query for data step`, () => {
+      expect(steps[0].query.query()).toEqual(filteredAndSummarizedQuery);
+    });
+    it(`should be the source-query for filter step`, () => {
+      expect(steps[1].query.query()).toEqual(filteredAndSummarizedQuery);
+    });
+    it(`should be the source-query for summarize step`, () => {
+      expect(steps[2].query.query()).toEqual(filteredAndSummarizedQuery);
+    });
+    it(`should be the original query for post-aggregation filter step`, () => {
+      expect(steps[3].query.query()).toEqual(postAggregationFilterQuery);
+    });
+  });
+  describe("previewQuery", () => {
+    it(`shouldn't include filter, summarize, or post-aggregrationfilter for data step`, () => {
+      expect(steps[0].previewQuery.query()).toEqual(rawDataQuery);
+    });
+    it(`shouldn't include summarize or post-aggregrationfilter filter step`, () => {
+      expect(steps[1].previewQuery.query()).toEqual(filteredQuery);
+    });
+    it(`shouldn't include post-aggregrationfilter for summarize step`, () => {
+      filteredAndSummarizedQuery;
+    });
+    it(`should be the original query for post-aggregation filter step`, () => {
+      expect(steps[3].previewQuery.query()).toEqual(postAggregationFilterQuery);
+    });
+  });
+  describe("update", () => {
+    it("should remove all steps when changing the table", () => {
+      const newQuery = steps[0].update(
+        steps[0].query.setTableId(PRODUCTS.id).datasetQuery(),
+      );
+      expect(newQuery.query()).toEqual({ "source-table": PRODUCTS.id });
+    });
+    it("shouldn't remove summarize or post-aggregrationfilter when removing filter", () => {
+      const newQuery = steps[1].update(
+        steps[1].revert(steps[1].query).datasetQuery(),
+      );
+      expect(newQuery.query()).toEqual({
+        ...postAggregationFilterQuery,
+        "source-query": summarizedQuery,
+      });
+    });
+    it("should remove post-aggregrationfilter when removing summarize", () => {
+      const newQuery = steps[2].update(
+        steps[2].revert(steps[2].query).datasetQuery(),
+      );
+      expect(newQuery.query()).toEqual(filteredQuery);
+    });
+    it("should remove empty layer of nesting but not remove filter or summarize when removing post-aggregation filter", () => {
+      const newQuery = steps[3].update(
+        steps[3].revert(steps[3].query).datasetQuery(),
+      );
+      expect(newQuery.query()).toEqual(filteredAndSummarizedQuery);
+    });
   });
 });
-- 
GitLab