From 064d815e99c4134030f55515208304a0c0a29f26 Mon Sep 17 00:00:00 2001 From: Ariya Hidayat <ariya@metabase.com> Date: Tue, 16 Nov 2021 15:58:49 -0800 Subject: [PATCH] Fuzz the parser of custom expression (#18942) --- .github/workflows/fuzzing.yml | 25 ++ .../lib/expressions/fuzz.parser.unit.spec.js | 26 ++ .../metabase/lib/expressions/generator.js | 381 +++++++++++++----- .../test/metabase/lib/expressions/prng.js | 24 ++ 4 files changed, 354 insertions(+), 102 deletions(-) create mode 100644 frontend/test/metabase/lib/expressions/fuzz.parser.unit.spec.js create mode 100644 frontend/test/metabase/lib/expressions/prng.js diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index 5795b14042a..946367f236b 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -42,3 +42,28 @@ jobs: env: MB_FUZZ: 1 name: Run fuzz testing on the tokenizer + + fe-fuzz-parser: + runs-on: ubuntu-20.04 + timeout-minutes: 7 + steps: + - uses: actions/checkout@v2 + - name: Prepare Node.js + uses: actions/setup-node@v1 + with: + node-version: 14.x + - name: Get M2 cache + uses: actions/cache@v2 + with: + path: ~/.m2 + key: ${{ runner.os }}-cljs-${{ hashFiles('**/shadow-cljs.edn') }} + - name: Get yarn cache + uses: actions/cache@v2 + with: + path: ~/.cache/yarn + key: ${{ runner.os }}-yarn-${{ hashFiles('**/yarn.lock') }} + - run: yarn install --frozen-lockfile --prefer-offline + - run: yarn test-unit frontend/test/metabase/lib/expressions/fuzz.parser.unit.spec.js + env: + MB_FUZZ: 1 + name: Run fuzz testing on the parser diff --git a/frontend/test/metabase/lib/expressions/fuzz.parser.unit.spec.js b/frontend/test/metabase/lib/expressions/fuzz.parser.unit.spec.js new file mode 100644 index 00000000000..5855afcf5f1 --- /dev/null +++ b/frontend/test/metabase/lib/expressions/fuzz.parser.unit.spec.js @@ -0,0 +1,26 @@ +import { parse } from "metabase/lib/expressions/parser"; + +import { generateExpression } from "./generator"; + +const fuzz = process.env.MB_FUZZ ? describe : describe.skip; + +const handle = source => { + const { cst } = parse({ source, tokenVector: null, startRule: "expression" }); + return cst; +}; + +describe("metabase/lib/expressions/parser", () => { + // quick sanity check before the real fuzzing + it("should parse custom expresssion", () => { + expect(() => handle("CASE([Deal],[Price]*7e-1,[Price])")).not.toThrow(); + }); +}); + +fuzz("FUZZING metabase/lib/expressions/parser", () => { + for (let seed = 1; seed < 1e4; ++seed) { + it("should parse generated expression from seed " + seed, () => { + const { expression } = generateExpression(seed); + expect(() => handle(expression)).not.toThrow(); + }); + } +}); diff --git a/frontend/test/metabase/lib/expressions/generator.js b/frontend/test/metabase/lib/expressions/generator.js index 2d6338c4228..e405d667401 100644 --- a/frontend/test/metabase/lib/expressions/generator.js +++ b/frontend/test/metabase/lib/expressions/generator.js @@ -1,46 +1,20 @@ -// Simple Fast Counter - as recommended by PRACTRAND -const sfc32 = (a, b, c, d) => { - return () => { - a >>>= 0; - b >>>= 0; - c >>>= 0; - d >>>= 0; - let t = (a + b) | 0; - a = b ^ (b >>> 9); - b = (c + (c << 3)) | 0; - c = (c << 21) | (c >>> 11); - d = (d + 1) | 0; - t = (t + d) | 0; - c = (c + t) | 0; - return (t >>> 0) / 4294967296; - }; -}; - -export function generateExpression(seed) { - const u32seed = seed ^ 0xc0fefe; - const mathRandom = sfc32(0x9e3779b9, 0x243f6a88, 0xb7e15162, u32seed); - [...Array(15)].forEach(mathRandom); - - const randomInt = max => Math.floor(max * mathRandom()); +import { createRandom } from "./prng"; + +export function generateExpression(seed, depth = 13) { + const random = createRandom(seed); + + const randomInt = max => Math.floor(max * random()); const randomItem = items => items[randomInt(items.length)]; const oneOf = functions => () => randomItem(functions).apply(null, []); const listOf = (n, functions) => () => [...Array(n)].map(_ => oneOf(functions)()); - const NODE = { - Literal: 1, - Field: 2, - Unary: 3, - Binary: 4, - FunctionCall: 5, - Group: 6, - }; - const zero = () => 0; const one = () => 1; const integer = () => randomInt(1e6); - const float1 = () => String(integer()) + "."; - const float2 = () => float1() + String(integer()); + const float = () => String(integer()) + "." + String(integer()); + + const string = () => '"' + characters() + '"'; const uppercase = () => String.fromCharCode(65 + randomInt(26)); // A..Z const lowercase = () => String.fromCharCode(97 + randomInt(26)); // a..z @@ -58,120 +32,323 @@ export function generateExpression(seed) { return [start, ...part].join(""); }; - const literal = () => { + const identifier = () => { + const len = randomInt(7); + const start = oneOf([uppercase, lowercase, underscore])(); + const part = listOf(len, [uppercase, lowercase, underscore, digit])(); + return [start, ...part].join(""); + }; + + const NODE = { + Literal: 1, + Field: 2, + Unary: 3, + Binary: 4, + FunctionCall: 5, + Group: 6, + }; + + const randomizeCase = str => + str + .split("") + .map(ch => (randomInt(10) < 3 ? ch.toUpperCase() : ch)) + .join(""); + + const format = node => { + const spaces = () => listOf(1, [space, () => ""])().join(""); + const blank = ch => spaces() + ch + spaces(); + let str = null; + const { type, value, op, left, right, child, params } = node; + switch (type) { + case NODE.Field: + case NODE.Literal: + str = value; + break; + case NODE.Unary: + str = blank(op) + format(child); + break; + case NODE.Binary: + str = format(left) + blank(op) + format(right); + break; + case NODE.FunctionCall: + str = + randomizeCase(value) + + blank("(") + + params.map(format).join(", ") + + blank(")"); + break; + case NODE.Group: + str = blank("(") + format(child) + blank(")"); + break; + } + + if (str === null) { + throw new Error(`Unknown AST node ${type}`); + } + return String(str); + }; + + const numberExpression = () => { + --depth; + const node = + depth <= 0 + ? numberLiteral() + : oneOf([ + numberLiteral, + field, + unaryMinus, + binary, + numberTransform, + power, + stringLength, + numberGroup, + ])(); + ++depth; + return node; + }; + + const numberLiteral = () => { const exp = () => randomItem(["", "-", "+"]) + randomInt(1e2); - const number = () => oneOf([zero, one, integer, float1, float2])(); + const number = () => oneOf([zero, one, integer, float])(); // LIMITATION: no dangling decimal point, e.g. "3." const sci = () => number() + randomItem(["e", "E"]) + exp(); - const string = () => '"' + characters() + '"'; return { type: NODE.Literal, - value: oneOf([number, sci, string])(), + value: oneOf([number, sci])(), }; }; - const identifier = () => { - const len = randomInt(7); - const start = oneOf([uppercase, lowercase, underscore])(); - const part = listOf(len, [uppercase, lowercase, underscore, digit])(); - return [start, ...part].join(""); + const validIdentifier = () => { + const KEYWORDS = ["and", "or", "not"]; + let candidate; + do { + candidate = identifier(); + } while (KEYWORDS.includes(candidate.toLowerCase())); + return candidate; }; const field = () => { const fk = () => "[" + identifier() + " → " + identifier() + "]"; const bracketedName = () => "[" + identifier() + "]"; - const name = oneOf([identifier, fk, bracketedName])(); + const name = oneOf([validIdentifier, fk, bracketedName])(); return { type: NODE.Field, value: name, }; }; - const unary = () => { + // LIMITATION: no negative on negative, e.g. "--4" + const unaryMinus = () => { return { type: NODE.Unary, - op: randomItem(["-", "NOT "]), - child: expression(), + op: "-", + child: oneOf([numberLiteral])(), }; }; const binary = () => { return { type: NODE.Binary, - op: randomItem([ - "+", - "-", - "*", - "/", - "=", - "!=", - "<", - ">", - "<=", - ">=", - " AND ", - " OR ", + op: randomItem(["+", "-", "*", "/"]), + left: numberExpression(), + right: numberExpression(), + }; + }; + + const numberTransform = () => { + return { + type: NODE.FunctionCall, + value: randomItem([ + "abs", + "ceil", + "exp", + "floor", + "log", + "round", + "sqrt", ]), - left: expression(), - right: expression(), + params: [numberExpression()], }; }; - const call = () => { - const count = randomInt(5); + const power = () => { return { type: NODE.FunctionCall, - value: identifier(), - params: listOf(count, [expression])(), + value: "power", + params: listOf(2, [numberExpression])(), }; }; - const group = () => { + const stringLength = () => { + return { + type: NODE.FunctionCall, + value: "length", + params: [stringExpression()], + }; + }; + + const numberGroup = () => { return { type: NODE.Group, - child: primary(), + child: numberExpression(), }; }; - const primary = () => { + const booleanExpression = () => { --depth; - const node = oneOf([field, literal, unary, binary, call, group])(); + const node = + depth <= 0 + ? field() + : oneOf([ + field, + logicalNot, + logicalBinary, + comparison, + stringCheck, + valueCheck, + dateCheck, + logicalGroup, + ])(); ++depth; return node; }; - const expression = () => (depth <= 0 ? literal() : primary()); - const format = node => { - const spaces = () => listOf(1, [space, () => ""])().join(""); - const blank = ch => spaces() + ch + spaces(); - let str = null; - const { type, value, op, left, right, child, params } = node; - switch (type) { - case NODE.Field: - case NODE.Literal: - str = value; - break; - case NODE.Unary: - str = blank(op) + " " + format(child); - break; - case NODE.Binary: - str = format(left) + blank(op) + format(right); - break; - case NODE.FunctionCall: - str = value + blank("(") + params.map(format).join(", ") + blank(")"); - break; - case NODE.Group: - str = blank("(") + format(child) + blank(")"); - break; - } + // LIMITATION: no NOT on NOT, e.g. "NOT NOT [HighlyRated]" + const logicalNot = () => { + return { + type: NODE.Unary, + op: "NOT ", + child: oneOf([field, comparison, logicalGroup])(), + }; + }; - if (str === null) { - throw new Error(`Unknown AST node ${type}`); - } - return str; + const logicalBinary = () => { + return { + type: NODE.Binary, + op: randomItem([" AND ", " OR "]), + left: booleanExpression(), + right: booleanExpression(), + }; + }; + + const comparison = () => { + return { + type: NODE.Binary, + op: randomItem(["=", "!=", "<", ">", "<=", ">="]), + left: numberExpression(), + right: numberExpression(), + }; + }; + + const stringCheck = () => { + return { + type: NODE.FunctionCall, + value: randomItem(["contains", "startsWith", "endsWith"]), + params: listOf(2, [stringExpression])(), + }; + }; + + const valueCheck = () => { + return { + type: NODE.FunctionCall, + value: randomItem(["isNull", "isEmpty"]), + params: [field()], // LIMITATION: only works on fields + }; }; - let depth = 17; + const dateCheck = () => oneOf([betweenDates, intervalDates])(); + + const betweenDates = () => { + return { + type: NODE.FunctionCall, + value: "between", + params: [field(), stringExpression(), stringExpression()], + }; + }; + + const intervalDates = () => { + return { + type: NODE.FunctionCall, + value: "interval", + params: [field(), numberExpression(), stringExpression()], + }; + }; + + const logicalGroup = () => { + return { + type: NODE.Group, + child: booleanExpression(), + }; + }; + + const stringExpression = () => { + --depth; + const node = + depth <= 0 + ? stringLiteral() + : oneOf([ + stringLiteral, + field, + stringConcat, + stringTransform, + stringReplace, + substring, + regexextract, + ])(); + ++depth; + return node; + }; + + const stringLiteral = () => { + return { + type: NODE.Literal, + value: string(), + }; + }; + + const stringConcat = () => { + const count = 1 + randomInt(5); + return { + type: NODE.FunctionCall, + value: "concat", + params: listOf(count, [stringExpression])(), + }; + }; + + const stringTransform = () => { + return { + type: NODE.FunctionCall, + value: randomItem(["ltrim", "trim", "rtrim", "lower", "upper"]), + params: [stringExpression()], + }; + }; + + const stringReplace = () => { + return { + type: NODE.FunctionCall, + value: "replace", + params: [field(), stringExpression(), stringExpression()], + }; + }; + + const substring = () => { + return { + type: NODE.FunctionCall, + value: "substring", + params: [stringExpression(), numberExpression(), numberExpression()], + }; + }; + + const regexextract = () => { + return { + type: NODE.FunctionCall, + value: "regexextract", + params: [field(), stringLiteral()], // FIXME: maybe regexpLiteral? + }; + }; + + const tree = oneOf([numberExpression, booleanExpression, stringExpression])(); + + const expression = format(tree); - const tree = expression(); - return { tree, expression: format(tree) }; + return { tree, expression }; } diff --git a/frontend/test/metabase/lib/expressions/prng.js b/frontend/test/metabase/lib/expressions/prng.js new file mode 100644 index 00000000000..bf76ff2a2c0 --- /dev/null +++ b/frontend/test/metabase/lib/expressions/prng.js @@ -0,0 +1,24 @@ +// Simple Fast Counter - as recommended by PRACTRAND +const sfc32 = (a, b, c, d) => { + return () => { + a >>>= 0; + b >>>= 0; + c >>>= 0; + d >>>= 0; + let t = (a + b) | 0; + a = b ^ (b >>> 9); + b = (c + (c << 3)) | 0; + c = (c << 21) | (c >>> 11); + d = (d + 1) | 0; + t = (t + d) | 0; + c = (c + t) | 0; + return (t >>> 0) / 4294967296; + }; +}; + +export function createRandom(seed) { + const u32seed = seed ^ 0xc0fefe; + const mathRandom = sfc32(0x9e3779b9, 0x243f6a88, 0xb7e15162, u32seed); + [...Array(15)].forEach(mathRandom); + return mathRandom; +} -- GitLab