Skip to content
Snippets Groups Projects
Unverified Commit 064d815e authored by Ariya Hidayat's avatar Ariya Hidayat Committed by GitHub
Browse files

Fuzz the parser of custom expression (#18942)

parent 2d5766a8
No related branches found
No related tags found
No related merge requests found
......@@ -42,3 +42,28 @@ jobs:
env:
MB_FUZZ: 1
name: Run fuzz testing on the tokenizer
fe-fuzz-parser:
runs-on: ubuntu-20.04
timeout-minutes: 7
steps:
- uses: actions/checkout@v2
- name: Prepare Node.js
uses: actions/setup-node@v1
with:
node-version: 14.x
- name: Get M2 cache
uses: actions/cache@v2
with:
path: ~/.m2
key: ${{ runner.os }}-cljs-${{ hashFiles('**/shadow-cljs.edn') }}
- name: Get yarn cache
uses: actions/cache@v2
with:
path: ~/.cache/yarn
key: ${{ runner.os }}-yarn-${{ hashFiles('**/yarn.lock') }}
- run: yarn install --frozen-lockfile --prefer-offline
- run: yarn test-unit frontend/test/metabase/lib/expressions/fuzz.parser.unit.spec.js
env:
MB_FUZZ: 1
name: Run fuzz testing on the parser
import { parse } from "metabase/lib/expressions/parser";
import { generateExpression } from "./generator";
const fuzz = process.env.MB_FUZZ ? describe : describe.skip;
const handle = source => {
const { cst } = parse({ source, tokenVector: null, startRule: "expression" });
return cst;
};
describe("metabase/lib/expressions/parser", () => {
// quick sanity check before the real fuzzing
it("should parse custom expresssion", () => {
expect(() => handle("CASE([Deal],[Price]*7e-1,[Price])")).not.toThrow();
});
});
fuzz("FUZZING metabase/lib/expressions/parser", () => {
for (let seed = 1; seed < 1e4; ++seed) {
it("should parse generated expression from seed " + seed, () => {
const { expression } = generateExpression(seed);
expect(() => handle(expression)).not.toThrow();
});
}
});
// Simple Fast Counter - as recommended by PRACTRAND
const sfc32 = (a, b, c, d) => {
return () => {
a >>>= 0;
b >>>= 0;
c >>>= 0;
d >>>= 0;
let t = (a + b) | 0;
a = b ^ (b >>> 9);
b = (c + (c << 3)) | 0;
c = (c << 21) | (c >>> 11);
d = (d + 1) | 0;
t = (t + d) | 0;
c = (c + t) | 0;
return (t >>> 0) / 4294967296;
};
};
export function generateExpression(seed) {
const u32seed = seed ^ 0xc0fefe;
const mathRandom = sfc32(0x9e3779b9, 0x243f6a88, 0xb7e15162, u32seed);
[...Array(15)].forEach(mathRandom);
const randomInt = max => Math.floor(max * mathRandom());
import { createRandom } from "./prng";
export function generateExpression(seed, depth = 13) {
const random = createRandom(seed);
const randomInt = max => Math.floor(max * random());
const randomItem = items => items[randomInt(items.length)];
const oneOf = functions => () => randomItem(functions).apply(null, []);
const listOf = (n, functions) => () =>
[...Array(n)].map(_ => oneOf(functions)());
const NODE = {
Literal: 1,
Field: 2,
Unary: 3,
Binary: 4,
FunctionCall: 5,
Group: 6,
};
const zero = () => 0;
const one = () => 1;
const integer = () => randomInt(1e6);
const float1 = () => String(integer()) + ".";
const float2 = () => float1() + String(integer());
const float = () => String(integer()) + "." + String(integer());
const string = () => '"' + characters() + '"';
const uppercase = () => String.fromCharCode(65 + randomInt(26)); // A..Z
const lowercase = () => String.fromCharCode(97 + randomInt(26)); // a..z
......@@ -58,120 +32,323 @@ export function generateExpression(seed) {
return [start, ...part].join("");
};
const literal = () => {
const identifier = () => {
const len = randomInt(7);
const start = oneOf([uppercase, lowercase, underscore])();
const part = listOf(len, [uppercase, lowercase, underscore, digit])();
return [start, ...part].join("");
};
const NODE = {
Literal: 1,
Field: 2,
Unary: 3,
Binary: 4,
FunctionCall: 5,
Group: 6,
};
const randomizeCase = str =>
str
.split("")
.map(ch => (randomInt(10) < 3 ? ch.toUpperCase() : ch))
.join("");
const format = node => {
const spaces = () => listOf(1, [space, () => ""])().join("");
const blank = ch => spaces() + ch + spaces();
let str = null;
const { type, value, op, left, right, child, params } = node;
switch (type) {
case NODE.Field:
case NODE.Literal:
str = value;
break;
case NODE.Unary:
str = blank(op) + format(child);
break;
case NODE.Binary:
str = format(left) + blank(op) + format(right);
break;
case NODE.FunctionCall:
str =
randomizeCase(value) +
blank("(") +
params.map(format).join(", ") +
blank(")");
break;
case NODE.Group:
str = blank("(") + format(child) + blank(")");
break;
}
if (str === null) {
throw new Error(`Unknown AST node ${type}`);
}
return String(str);
};
const numberExpression = () => {
--depth;
const node =
depth <= 0
? numberLiteral()
: oneOf([
numberLiteral,
field,
unaryMinus,
binary,
numberTransform,
power,
stringLength,
numberGroup,
])();
++depth;
return node;
};
const numberLiteral = () => {
const exp = () => randomItem(["", "-", "+"]) + randomInt(1e2);
const number = () => oneOf([zero, one, integer, float1, float2])();
const number = () => oneOf([zero, one, integer, float])(); // LIMITATION: no dangling decimal point, e.g. "3."
const sci = () => number() + randomItem(["e", "E"]) + exp();
const string = () => '"' + characters() + '"';
return {
type: NODE.Literal,
value: oneOf([number, sci, string])(),
value: oneOf([number, sci])(),
};
};
const identifier = () => {
const len = randomInt(7);
const start = oneOf([uppercase, lowercase, underscore])();
const part = listOf(len, [uppercase, lowercase, underscore, digit])();
return [start, ...part].join("");
const validIdentifier = () => {
const KEYWORDS = ["and", "or", "not"];
let candidate;
do {
candidate = identifier();
} while (KEYWORDS.includes(candidate.toLowerCase()));
return candidate;
};
const field = () => {
const fk = () => "[" + identifier() + "" + identifier() + "]";
const bracketedName = () => "[" + identifier() + "]";
const name = oneOf([identifier, fk, bracketedName])();
const name = oneOf([validIdentifier, fk, bracketedName])();
return {
type: NODE.Field,
value: name,
};
};
const unary = () => {
// LIMITATION: no negative on negative, e.g. "--4"
const unaryMinus = () => {
return {
type: NODE.Unary,
op: randomItem(["-", "NOT "]),
child: expression(),
op: "-",
child: oneOf([numberLiteral])(),
};
};
const binary = () => {
return {
type: NODE.Binary,
op: randomItem([
"+",
"-",
"*",
"/",
"=",
"!=",
"<",
">",
"<=",
">=",
" AND ",
" OR ",
op: randomItem(["+", "-", "*", "/"]),
left: numberExpression(),
right: numberExpression(),
};
};
const numberTransform = () => {
return {
type: NODE.FunctionCall,
value: randomItem([
"abs",
"ceil",
"exp",
"floor",
"log",
"round",
"sqrt",
]),
left: expression(),
right: expression(),
params: [numberExpression()],
};
};
const call = () => {
const count = randomInt(5);
const power = () => {
return {
type: NODE.FunctionCall,
value: identifier(),
params: listOf(count, [expression])(),
value: "power",
params: listOf(2, [numberExpression])(),
};
};
const group = () => {
const stringLength = () => {
return {
type: NODE.FunctionCall,
value: "length",
params: [stringExpression()],
};
};
const numberGroup = () => {
return {
type: NODE.Group,
child: primary(),
child: numberExpression(),
};
};
const primary = () => {
const booleanExpression = () => {
--depth;
const node = oneOf([field, literal, unary, binary, call, group])();
const node =
depth <= 0
? field()
: oneOf([
field,
logicalNot,
logicalBinary,
comparison,
stringCheck,
valueCheck,
dateCheck,
logicalGroup,
])();
++depth;
return node;
};
const expression = () => (depth <= 0 ? literal() : primary());
const format = node => {
const spaces = () => listOf(1, [space, () => ""])().join("");
const blank = ch => spaces() + ch + spaces();
let str = null;
const { type, value, op, left, right, child, params } = node;
switch (type) {
case NODE.Field:
case NODE.Literal:
str = value;
break;
case NODE.Unary:
str = blank(op) + " " + format(child);
break;
case NODE.Binary:
str = format(left) + blank(op) + format(right);
break;
case NODE.FunctionCall:
str = value + blank("(") + params.map(format).join(", ") + blank(")");
break;
case NODE.Group:
str = blank("(") + format(child) + blank(")");
break;
}
// LIMITATION: no NOT on NOT, e.g. "NOT NOT [HighlyRated]"
const logicalNot = () => {
return {
type: NODE.Unary,
op: "NOT ",
child: oneOf([field, comparison, logicalGroup])(),
};
};
if (str === null) {
throw new Error(`Unknown AST node ${type}`);
}
return str;
const logicalBinary = () => {
return {
type: NODE.Binary,
op: randomItem([" AND ", " OR "]),
left: booleanExpression(),
right: booleanExpression(),
};
};
const comparison = () => {
return {
type: NODE.Binary,
op: randomItem(["=", "!=", "<", ">", "<=", ">="]),
left: numberExpression(),
right: numberExpression(),
};
};
const stringCheck = () => {
return {
type: NODE.FunctionCall,
value: randomItem(["contains", "startsWith", "endsWith"]),
params: listOf(2, [stringExpression])(),
};
};
const valueCheck = () => {
return {
type: NODE.FunctionCall,
value: randomItem(["isNull", "isEmpty"]),
params: [field()], // LIMITATION: only works on fields
};
};
let depth = 17;
const dateCheck = () => oneOf([betweenDates, intervalDates])();
const betweenDates = () => {
return {
type: NODE.FunctionCall,
value: "between",
params: [field(), stringExpression(), stringExpression()],
};
};
const intervalDates = () => {
return {
type: NODE.FunctionCall,
value: "interval",
params: [field(), numberExpression(), stringExpression()],
};
};
const logicalGroup = () => {
return {
type: NODE.Group,
child: booleanExpression(),
};
};
const stringExpression = () => {
--depth;
const node =
depth <= 0
? stringLiteral()
: oneOf([
stringLiteral,
field,
stringConcat,
stringTransform,
stringReplace,
substring,
regexextract,
])();
++depth;
return node;
};
const stringLiteral = () => {
return {
type: NODE.Literal,
value: string(),
};
};
const stringConcat = () => {
const count = 1 + randomInt(5);
return {
type: NODE.FunctionCall,
value: "concat",
params: listOf(count, [stringExpression])(),
};
};
const stringTransform = () => {
return {
type: NODE.FunctionCall,
value: randomItem(["ltrim", "trim", "rtrim", "lower", "upper"]),
params: [stringExpression()],
};
};
const stringReplace = () => {
return {
type: NODE.FunctionCall,
value: "replace",
params: [field(), stringExpression(), stringExpression()],
};
};
const substring = () => {
return {
type: NODE.FunctionCall,
value: "substring",
params: [stringExpression(), numberExpression(), numberExpression()],
};
};
const regexextract = () => {
return {
type: NODE.FunctionCall,
value: "regexextract",
params: [field(), stringLiteral()], // FIXME: maybe regexpLiteral?
};
};
const tree = oneOf([numberExpression, booleanExpression, stringExpression])();
const expression = format(tree);
const tree = expression();
return { tree, expression: format(tree) };
return { tree, expression };
}
// Simple Fast Counter - as recommended by PRACTRAND
const sfc32 = (a, b, c, d) => {
return () => {
a >>>= 0;
b >>>= 0;
c >>>= 0;
d >>>= 0;
let t = (a + b) | 0;
a = b ^ (b >>> 9);
b = (c + (c << 3)) | 0;
c = (c << 21) | (c >>> 11);
d = (d + 1) | 0;
t = (t + d) | 0;
c = (c + t) | 0;
return (t >>> 0) / 4294967296;
};
};
export function createRandom(seed) {
const u32seed = seed ^ 0xc0fefe;
const mathRandom = sfc32(0x9e3779b9, 0x243f6a88, 0xb7e15162, u32seed);
[...Array(15)].forEach(mathRandom);
return mathRandom;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment