From 064d815e99c4134030f55515208304a0c0a29f26 Mon Sep 17 00:00:00 2001
From: Ariya Hidayat <ariya@metabase.com>
Date: Tue, 16 Nov 2021 15:58:49 -0800
Subject: [PATCH] Fuzz the parser of custom expression (#18942)

---
 .github/workflows/fuzzing.yml                 |  25 ++
 .../lib/expressions/fuzz.parser.unit.spec.js  |  26 ++
 .../metabase/lib/expressions/generator.js     | 381 +++++++++++++-----
 .../test/metabase/lib/expressions/prng.js     |  24 ++
 4 files changed, 354 insertions(+), 102 deletions(-)
 create mode 100644 frontend/test/metabase/lib/expressions/fuzz.parser.unit.spec.js
 create mode 100644 frontend/test/metabase/lib/expressions/prng.js

diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml
index 5795b14042a..946367f236b 100644
--- a/.github/workflows/fuzzing.yml
+++ b/.github/workflows/fuzzing.yml
@@ -42,3 +42,28 @@ jobs:
       env:
         MB_FUZZ: 1
       name: Run fuzz testing on the tokenizer
+
+  fe-fuzz-parser:
+    runs-on: ubuntu-20.04
+    timeout-minutes: 7
+    steps:
+    - uses: actions/checkout@v2
+    - name: Prepare Node.js
+      uses: actions/setup-node@v1
+      with:
+        node-version: 14.x
+    - name: Get M2 cache
+      uses: actions/cache@v2
+      with:
+        path: ~/.m2
+        key: ${{ runner.os }}-cljs-${{ hashFiles('**/shadow-cljs.edn') }}
+    - name: Get yarn cache
+      uses: actions/cache@v2
+      with:
+        path: ~/.cache/yarn
+        key: ${{ runner.os }}-yarn-${{ hashFiles('**/yarn.lock') }}
+    - run: yarn install --frozen-lockfile --prefer-offline
+    - run: yarn test-unit frontend/test/metabase/lib/expressions/fuzz.parser.unit.spec.js
+      env:
+        MB_FUZZ: 1
+      name: Run fuzz testing on the parser
diff --git a/frontend/test/metabase/lib/expressions/fuzz.parser.unit.spec.js b/frontend/test/metabase/lib/expressions/fuzz.parser.unit.spec.js
new file mode 100644
index 00000000000..5855afcf5f1
--- /dev/null
+++ b/frontend/test/metabase/lib/expressions/fuzz.parser.unit.spec.js
@@ -0,0 +1,26 @@
+import { parse } from "metabase/lib/expressions/parser";
+
+import { generateExpression } from "./generator";
+
+const fuzz = process.env.MB_FUZZ ? describe : describe.skip;
+
+const handle = source => {
+  const { cst } = parse({ source, tokenVector: null, startRule: "expression" });
+  return cst;
+};
+
+describe("metabase/lib/expressions/parser", () => {
+  // quick sanity check before the real fuzzing
+  it("should parse custom expresssion", () => {
+    expect(() => handle("CASE([Deal],[Price]*7e-1,[Price])")).not.toThrow();
+  });
+});
+
+fuzz("FUZZING metabase/lib/expressions/parser", () => {
+  for (let seed = 1; seed < 1e4; ++seed) {
+    it("should parse generated expression from seed " + seed, () => {
+      const { expression } = generateExpression(seed);
+      expect(() => handle(expression)).not.toThrow();
+    });
+  }
+});
diff --git a/frontend/test/metabase/lib/expressions/generator.js b/frontend/test/metabase/lib/expressions/generator.js
index 2d6338c4228..e405d667401 100644
--- a/frontend/test/metabase/lib/expressions/generator.js
+++ b/frontend/test/metabase/lib/expressions/generator.js
@@ -1,46 +1,20 @@
-// Simple Fast Counter - as recommended by PRACTRAND
-const sfc32 = (a, b, c, d) => {
-  return () => {
-    a >>>= 0;
-    b >>>= 0;
-    c >>>= 0;
-    d >>>= 0;
-    let t = (a + b) | 0;
-    a = b ^ (b >>> 9);
-    b = (c + (c << 3)) | 0;
-    c = (c << 21) | (c >>> 11);
-    d = (d + 1) | 0;
-    t = (t + d) | 0;
-    c = (c + t) | 0;
-    return (t >>> 0) / 4294967296;
-  };
-};
-
-export function generateExpression(seed) {
-  const u32seed = seed ^ 0xc0fefe;
-  const mathRandom = sfc32(0x9e3779b9, 0x243f6a88, 0xb7e15162, u32seed);
-  [...Array(15)].forEach(mathRandom);
-
-  const randomInt = max => Math.floor(max * mathRandom());
+import { createRandom } from "./prng";
+
+export function generateExpression(seed, depth = 13) {
+  const random = createRandom(seed);
+
+  const randomInt = max => Math.floor(max * random());
   const randomItem = items => items[randomInt(items.length)];
   const oneOf = functions => () => randomItem(functions).apply(null, []);
   const listOf = (n, functions) => () =>
     [...Array(n)].map(_ => oneOf(functions)());
 
-  const NODE = {
-    Literal: 1,
-    Field: 2,
-    Unary: 3,
-    Binary: 4,
-    FunctionCall: 5,
-    Group: 6,
-  };
-
   const zero = () => 0;
   const one = () => 1;
   const integer = () => randomInt(1e6);
-  const float1 = () => String(integer()) + ".";
-  const float2 = () => float1() + String(integer());
+  const float = () => String(integer()) + "." + String(integer());
+
+  const string = () => '"' + characters() + '"';
 
   const uppercase = () => String.fromCharCode(65 + randomInt(26)); // A..Z
   const lowercase = () => String.fromCharCode(97 + randomInt(26)); // a..z
@@ -58,120 +32,323 @@ export function generateExpression(seed) {
     return [start, ...part].join("");
   };
 
-  const literal = () => {
+  const identifier = () => {
+    const len = randomInt(7);
+    const start = oneOf([uppercase, lowercase, underscore])();
+    const part = listOf(len, [uppercase, lowercase, underscore, digit])();
+    return [start, ...part].join("");
+  };
+
+  const NODE = {
+    Literal: 1,
+    Field: 2,
+    Unary: 3,
+    Binary: 4,
+    FunctionCall: 5,
+    Group: 6,
+  };
+
+  const randomizeCase = str =>
+    str
+      .split("")
+      .map(ch => (randomInt(10) < 3 ? ch.toUpperCase() : ch))
+      .join("");
+
+  const format = node => {
+    const spaces = () => listOf(1, [space, () => ""])().join("");
+    const blank = ch => spaces() + ch + spaces();
+    let str = null;
+    const { type, value, op, left, right, child, params } = node;
+    switch (type) {
+      case NODE.Field:
+      case NODE.Literal:
+        str = value;
+        break;
+      case NODE.Unary:
+        str = blank(op) + format(child);
+        break;
+      case NODE.Binary:
+        str = format(left) + blank(op) + format(right);
+        break;
+      case NODE.FunctionCall:
+        str =
+          randomizeCase(value) +
+          blank("(") +
+          params.map(format).join(", ") +
+          blank(")");
+        break;
+      case NODE.Group:
+        str = blank("(") + format(child) + blank(")");
+        break;
+    }
+
+    if (str === null) {
+      throw new Error(`Unknown AST node ${type}`);
+    }
+    return String(str);
+  };
+
+  const numberExpression = () => {
+    --depth;
+    const node =
+      depth <= 0
+        ? numberLiteral()
+        : oneOf([
+            numberLiteral,
+            field,
+            unaryMinus,
+            binary,
+            numberTransform,
+            power,
+            stringLength,
+            numberGroup,
+          ])();
+    ++depth;
+    return node;
+  };
+
+  const numberLiteral = () => {
     const exp = () => randomItem(["", "-", "+"]) + randomInt(1e2);
-    const number = () => oneOf([zero, one, integer, float1, float2])();
+    const number = () => oneOf([zero, one, integer, float])(); // LIMITATION: no dangling decimal point, e.g. "3."
     const sci = () => number() + randomItem(["e", "E"]) + exp();
-    const string = () => '"' + characters() + '"';
     return {
       type: NODE.Literal,
-      value: oneOf([number, sci, string])(),
+      value: oneOf([number, sci])(),
     };
   };
 
-  const identifier = () => {
-    const len = randomInt(7);
-    const start = oneOf([uppercase, lowercase, underscore])();
-    const part = listOf(len, [uppercase, lowercase, underscore, digit])();
-    return [start, ...part].join("");
+  const validIdentifier = () => {
+    const KEYWORDS = ["and", "or", "not"];
+    let candidate;
+    do {
+      candidate = identifier();
+    } while (KEYWORDS.includes(candidate.toLowerCase()));
+    return candidate;
   };
 
   const field = () => {
     const fk = () => "[" + identifier() + " → " + identifier() + "]";
     const bracketedName = () => "[" + identifier() + "]";
-    const name = oneOf([identifier, fk, bracketedName])();
+    const name = oneOf([validIdentifier, fk, bracketedName])();
     return {
       type: NODE.Field,
       value: name,
     };
   };
 
-  const unary = () => {
+  // LIMITATION: no negative on negative, e.g. "--4"
+  const unaryMinus = () => {
     return {
       type: NODE.Unary,
-      op: randomItem(["-", "NOT "]),
-      child: expression(),
+      op: "-",
+      child: oneOf([numberLiteral])(),
     };
   };
 
   const binary = () => {
     return {
       type: NODE.Binary,
-      op: randomItem([
-        "+",
-        "-",
-        "*",
-        "/",
-        "=",
-        "!=",
-        "<",
-        ">",
-        "<=",
-        ">=",
-        " AND ",
-        " OR ",
+      op: randomItem(["+", "-", "*", "/"]),
+      left: numberExpression(),
+      right: numberExpression(),
+    };
+  };
+
+  const numberTransform = () => {
+    return {
+      type: NODE.FunctionCall,
+      value: randomItem([
+        "abs",
+        "ceil",
+        "exp",
+        "floor",
+        "log",
+        "round",
+        "sqrt",
       ]),
-      left: expression(),
-      right: expression(),
+      params: [numberExpression()],
     };
   };
 
-  const call = () => {
-    const count = randomInt(5);
+  const power = () => {
     return {
       type: NODE.FunctionCall,
-      value: identifier(),
-      params: listOf(count, [expression])(),
+      value: "power",
+      params: listOf(2, [numberExpression])(),
     };
   };
 
-  const group = () => {
+  const stringLength = () => {
+    return {
+      type: NODE.FunctionCall,
+      value: "length",
+      params: [stringExpression()],
+    };
+  };
+
+  const numberGroup = () => {
     return {
       type: NODE.Group,
-      child: primary(),
+      child: numberExpression(),
     };
   };
 
-  const primary = () => {
+  const booleanExpression = () => {
     --depth;
-    const node = oneOf([field, literal, unary, binary, call, group])();
+    const node =
+      depth <= 0
+        ? field()
+        : oneOf([
+            field,
+            logicalNot,
+            logicalBinary,
+            comparison,
+            stringCheck,
+            valueCheck,
+            dateCheck,
+            logicalGroup,
+          ])();
     ++depth;
     return node;
   };
-  const expression = () => (depth <= 0 ? literal() : primary());
 
-  const format = node => {
-    const spaces = () => listOf(1, [space, () => ""])().join("");
-    const blank = ch => spaces() + ch + spaces();
-    let str = null;
-    const { type, value, op, left, right, child, params } = node;
-    switch (type) {
-      case NODE.Field:
-      case NODE.Literal:
-        str = value;
-        break;
-      case NODE.Unary:
-        str = blank(op) + " " + format(child);
-        break;
-      case NODE.Binary:
-        str = format(left) + blank(op) + format(right);
-        break;
-      case NODE.FunctionCall:
-        str = value + blank("(") + params.map(format).join(", ") + blank(")");
-        break;
-      case NODE.Group:
-        str = blank("(") + format(child) + blank(")");
-        break;
-    }
+  // LIMITATION: no NOT on NOT, e.g. "NOT NOT [HighlyRated]"
+  const logicalNot = () => {
+    return {
+      type: NODE.Unary,
+      op: "NOT ",
+      child: oneOf([field, comparison, logicalGroup])(),
+    };
+  };
 
-    if (str === null) {
-      throw new Error(`Unknown AST node ${type}`);
-    }
-    return str;
+  const logicalBinary = () => {
+    return {
+      type: NODE.Binary,
+      op: randomItem([" AND ", " OR "]),
+      left: booleanExpression(),
+      right: booleanExpression(),
+    };
+  };
+
+  const comparison = () => {
+    return {
+      type: NODE.Binary,
+      op: randomItem(["=", "!=", "<", ">", "<=", ">="]),
+      left: numberExpression(),
+      right: numberExpression(),
+    };
+  };
+
+  const stringCheck = () => {
+    return {
+      type: NODE.FunctionCall,
+      value: randomItem(["contains", "startsWith", "endsWith"]),
+      params: listOf(2, [stringExpression])(),
+    };
+  };
+
+  const valueCheck = () => {
+    return {
+      type: NODE.FunctionCall,
+      value: randomItem(["isNull", "isEmpty"]),
+      params: [field()], // LIMITATION: only works on fields
+    };
   };
 
-  let depth = 17;
+  const dateCheck = () => oneOf([betweenDates, intervalDates])();
+
+  const betweenDates = () => {
+    return {
+      type: NODE.FunctionCall,
+      value: "between",
+      params: [field(), stringExpression(), stringExpression()],
+    };
+  };
+
+  const intervalDates = () => {
+    return {
+      type: NODE.FunctionCall,
+      value: "interval",
+      params: [field(), numberExpression(), stringExpression()],
+    };
+  };
+
+  const logicalGroup = () => {
+    return {
+      type: NODE.Group,
+      child: booleanExpression(),
+    };
+  };
+
+  const stringExpression = () => {
+    --depth;
+    const node =
+      depth <= 0
+        ? stringLiteral()
+        : oneOf([
+            stringLiteral,
+            field,
+            stringConcat,
+            stringTransform,
+            stringReplace,
+            substring,
+            regexextract,
+          ])();
+    ++depth;
+    return node;
+  };
+
+  const stringLiteral = () => {
+    return {
+      type: NODE.Literal,
+      value: string(),
+    };
+  };
+
+  const stringConcat = () => {
+    const count = 1 + randomInt(5);
+    return {
+      type: NODE.FunctionCall,
+      value: "concat",
+      params: listOf(count, [stringExpression])(),
+    };
+  };
+
+  const stringTransform = () => {
+    return {
+      type: NODE.FunctionCall,
+      value: randomItem(["ltrim", "trim", "rtrim", "lower", "upper"]),
+      params: [stringExpression()],
+    };
+  };
+
+  const stringReplace = () => {
+    return {
+      type: NODE.FunctionCall,
+      value: "replace",
+      params: [field(), stringExpression(), stringExpression()],
+    };
+  };
+
+  const substring = () => {
+    return {
+      type: NODE.FunctionCall,
+      value: "substring",
+      params: [stringExpression(), numberExpression(), numberExpression()],
+    };
+  };
+
+  const regexextract = () => {
+    return {
+      type: NODE.FunctionCall,
+      value: "regexextract",
+      params: [field(), stringLiteral()], // FIXME: maybe regexpLiteral?
+    };
+  };
+
+  const tree = oneOf([numberExpression, booleanExpression, stringExpression])();
+
+  const expression = format(tree);
 
-  const tree = expression();
-  return { tree, expression: format(tree) };
+  return { tree, expression };
 }
diff --git a/frontend/test/metabase/lib/expressions/prng.js b/frontend/test/metabase/lib/expressions/prng.js
new file mode 100644
index 00000000000..bf76ff2a2c0
--- /dev/null
+++ b/frontend/test/metabase/lib/expressions/prng.js
@@ -0,0 +1,24 @@
+// Simple Fast Counter - as recommended by PRACTRAND
+const sfc32 = (a, b, c, d) => {
+  return () => {
+    a >>>= 0;
+    b >>>= 0;
+    c >>>= 0;
+    d >>>= 0;
+    let t = (a + b) | 0;
+    a = b ^ (b >>> 9);
+    b = (c + (c << 3)) | 0;
+    c = (c << 21) | (c >>> 11);
+    d = (d + 1) | 0;
+    t = (t + d) | 0;
+    c = (c + t) | 0;
+    return (t >>> 0) / 4294967296;
+  };
+};
+
+export function createRandom(seed) {
+  const u32seed = seed ^ 0xc0fefe;
+  const mathRandom = sfc32(0x9e3779b9, 0x243f6a88, 0xb7e15162, u32seed);
+  [...Array(15)].forEach(mathRandom);
+  return mathRandom;
+}
-- 
GitLab