diff --git a/frontend/src/metabase/lib/expressions/process.js b/frontend/src/metabase/lib/expressions/process.js index 71fb25184a7711a6f9e51a69c9e30b5587461183..5917ecce08ab477dbbd990de9f244a60af883af4 100644 --- a/frontend/src/metabase/lib/expressions/process.js +++ b/frontend/src/metabase/lib/expressions/process.js @@ -5,14 +5,12 @@ export function processSource(options) { const parse = require("./parser").parse; const compile = require("./compile").compile; const suggest = require("./suggest").suggest; - const syntax = require("./syntax").syntax; const { source, targetOffset } = options; let expression; let suggestions = []; let helpText; - let syntaxTree; let compileError; // PARSE @@ -46,19 +44,11 @@ export function processSource(options) { } } - // SYNTAX - try { - syntaxTree = syntax({ cst, tokenVector, ...options }); - } catch (e) { - console.warn("syntax error", e); - } - return { source, expression, helpText, suggestions, - syntaxTree, compileError, }; } diff --git a/frontend/src/metabase/lib/expressions/syntax.js b/frontend/src/metabase/lib/expressions/syntax.js deleted file mode 100644 index 7af684cabb634cd30a71104ed59dbd6146067f58..0000000000000000000000000000000000000000 --- a/frontend/src/metabase/lib/expressions/syntax.js +++ /dev/null @@ -1,399 +0,0 @@ -import { ExpressionCstVisitor, parse } from "./parser"; -import { - lexerWithRecovery, - Identifier, - WhiteSpace, - LParen, - RParen, - IdentifierString, - FunctionName, - isTokenType, - CLAUSE_TOKENS, -} from "./lexer"; - -import { MBQL_CLAUSES, getMBQLName } from "."; - -const TOKENIZED_NODES = new Set(["dimension", "metric", "aggregation"]); - -function syntaxNode(type, ...children) { - return { - type: type, - tokenized: TOKENIZED_NODES.has(type), - children: children.filter(child => child), - }; -} - -function tokenNode(...args) { - let [type, token] = args.length === 1 ? ["token", args[0]] : args; - // allow passing the array token - if (Array.isArray(token)) { - if (token.length !== 1) { - console.warn( - `Passed token array of length ${token.length} to tokenNode()`, - token, - ); - } - token = token[0]; - } - return ( - token && { - type: type, - text: token.image, - start: token.startOffset, - end: token.endOffset, - _token: token, - } - ); -} - -export class ExpressionSyntaxVisitor extends ExpressionCstVisitor { - constructor(options) { - super(); - this._options = options; - this.validateVisitor(); - } - - any(ctx) { - return this.visit(ctx.expression); - } - expression(ctx) { - return this.visit(ctx.expression); - } - aggregation(ctx) { - return this.visit(ctx.expression); - } - number(ctx) { - return this.visit(ctx.expression); - } - string(ctx) { - return this.visit(ctx.expression); - } - boolean(ctx) { - return this.visit(ctx.expression); - } - - booleanExpression(ctx) { - return this.visit(ctx.expression); - } - logicalOrExpression(ctx) { - return this._logicalExpression(ctx.operands, ctx.operators); - } - logicalAndExpression(ctx) { - return this._logicalExpression(ctx.operands, ctx.operators); - } - booleanUnaryExpression(ctx) { - return this.visit(ctx.expression); - } - logicalNotExpression(ctx) { - return syntaxNode( - "filter", - tokenNode(ctx.operators), - this.visit(ctx.operands), - ); - } - relationalExpression(ctx) { - return this._logicalExpression(ctx.operands, ctx.operators); - } - - _logicalExpression(operands = [], operators = []) { - const initial = []; - for (let i = 0; i < operands.length; i++) { - initial.push(this.visit(operands[i])); - if (i < operators.length) { - initial.push(tokenNode(operators[i])); - } - } - return initial.length === 0 - ? null - : initial.length === 1 - ? initial[0] - : syntaxNode("filter", ...initial); - } - - additionExpression(ctx) { - return this._arithmeticExpression(ctx.operands, ctx.operators); - } - multiplicationExpression(ctx) { - return this._arithmeticExpression(ctx.operands, ctx.operators); - } - - _arithmeticExpression(operands = [], operators = []) { - const initial = []; - for (let i = 0; i < operands.length; i++) { - initial.push(this.visit(operands[i])); - if (i < operators.length) { - initial.push(tokenNode(operators[i])); - } - } - return initial.length === 0 - ? null - : initial.length === 1 - ? initial[0] - : syntaxNode("math", ...initial); - } - - functionExpression(ctx) { - const parts = []; - parts.push(tokenNode("function-name", ctx.functionName)); - if (ctx.LParen) { - const args = []; - if (ctx.arguments) { - for (let i = 0; i < ctx.arguments.length; i++) { - args.push(this.visit(ctx.arguments[i])); - if (ctx.Comma && ctx.Comma[i]) { - args.push(tokenNode(ctx.Comma[i])); - } - } - } - // NOTE: inserting a "group" node to match fallbackParser behavior - parts.push( - syntaxNode( - "group", - tokenNode("open-paren", ctx.LParen), - ...args, - tokenNode("close-paren", ctx.RParen), - ), - ); - } - - const fn = getMBQLName(ctx.functionName[0].image); - const clause = MBQL_CLAUSES[fn]; - - return syntaxNode(clause.type, ...parts); - } - - caseExpression(ctx) { - return this.functionExpression(ctx); - } - - identifierExpression(ctx) { - return syntaxNode(ctx.resolveAs, this.visit(ctx.identifierName)); - } - - identifier(ctx) { - return syntaxNode("identifier", tokenNode(ctx.Identifier)); - } - identifierString(ctx) { - return syntaxNode("identifier", tokenNode(ctx.IdentifierString)); - } - stringLiteral(ctx) { - return syntaxNode("string-literal", tokenNode(ctx.StringLiteral)); - } - numberLiteral(ctx) { - return syntaxNode( - "number-literal", - tokenNode(ctx.Minus), - tokenNode(ctx.NumberLiteral), - ); - } - atomicExpression(ctx) { - return this.visit(ctx.expression); - } - parenthesisExpression(ctx) { - return syntaxNode( - "group", - tokenNode(ctx.LParen), - this.visit(ctx.expression), - tokenNode(ctx.RParen), - ); - } -} - -// DEFAULT PARSER -export function defaultParser(options) { - const { cst, tokenVector } = parse(options); - const visitor = new ExpressionSyntaxVisitor({ tokenVector, ...options }); - const visited = cst && visitor.visit(cst); - return ( - visited && - recoverTokens( - visited, - options.source, - options.recover ? recoveredNode : recoveredWhitespaceNode, - ) - ); -} - -// RECOVERY PARSER -export function recoveryParser(options) { - return defaultParser({ ...options, recover: true }); -} - -// FALLBACK PARSER: -// hand-rolled parser that parses enough for syntax highlighting -export function fallbackParser({ source, startRule }) { - const { tokens } = mergeTokenGroups(lexerWithRecovery.tokenize(source)); - function nextNonWhitespace(index) { - while (++index < tokens.length && tokens[index].tokenType === WhiteSpace) { - // this block intentionally left blank - } - return tokens[index]; - } - - const root = { type: "group", children: [] }; - let current = root; - let outsideAggregation = startRule === "aggregation"; - - const stack = []; - const push = element => { - current.children.push(element); - stack.push(current); - current = element; - }; - const pop = () => { - if (stack.length === 0) { - return; - } - current = stack.pop(); - }; - for (let i = 0; i < tokens.length; i++) { - const t = tokens[i]; - const next = nextNonWhitespace(i); - if (isTokenType(t.tokenType, FunctionName)) { - const { type } = CLAUSE_TOKENS.get(t.tokenType); - const clause = syntaxNode(type, tokenNode("function-name", t)); - if (next && next.tokenType === LParen) { - if (type === "aggregation") { - outsideAggregation = false; - } - push(clause); - } else { - current.children.push(clause); - } - } else if ( - isTokenType(t.tokenType, Identifier) || - isTokenType(t.tokenType, IdentifierString) - ) { - current.children.push( - syntaxNode( - outsideAggregation ? "metric" : "unknown", // "dimension" + "segment" - syntaxNode("identifier", tokenNode(t)), - ), - ); - } else if (t.tokenType === LParen) { - push(syntaxNode("group")); - current.children.push(tokenNode("open-paren", t)); - } else if (t.tokenType === RParen) { - current.children.push(tokenNode("close-paren", t)); - pop(); - if (current.type === "aggregation") { - outsideAggregation = true; - pop(); - } - } else { - current.children.push(tokenNode(t)); - } - } - return root; -} - -// merges all token groups (e.x. whitespace, comments) into a single array of tokens -function mergeTokenGroups(results) { - const tokens = []; - const groups = [results.tokens, ...Object.values(results.groups)]; - // eslint-disable-next-line no-constant-condition - while (true) { - let firstGroupIndex = -1; - let firstStartOffset = Infinity; - for (let i = 0; i < groups.length; i++) { - const token = groups[i][0]; - if (token && token.startOffset < firstStartOffset) { - firstStartOffset = token.startOffset; - firstGroupIndex = i; - } - } - if (firstGroupIndex >= 0) { - tokens.push(groups[firstGroupIndex].shift()); - } else { - break; - } - } - return { ...results, tokens, groups: {} }; -} - -// inserts whitespace tokens back into the syntax tree -function recoveredWhitespaceNode(text, extra = {}) { - if (!/^\s+$/.test(text)) { - throw new Error("Recovered non-whitespace: " + text); - } - return { - type: "whitespace", - ...extra, - text, - }; -} - -function recoveredNode(text, extra = {}) { - return { - type: /^\s+$/.test(text) ? "whitespace" : "recovered", - ...extra, - text, - }; -} - -// NOTE: could we use token groups instead to collect whitespace tokens? -// https://sap.github.io/chevrotain/docs/features/token_grouping.html -function recoverTokens(root, source, recovered = recoveredNode) { - const getRecoveredToken = (start, end) => - recovered(source.substring(start, end), { start, end: end - 1 }); - - function recover(node) { - if (node.children) { - const children = []; - let previous = null; - for (const child of node.children) { - // call recover on the child first to get start/end on non-terminals - const current = recover(child); - // if the current node doesn't start where the previous node ended then add whitespace token back in - if (previous && current.start > previous.end + 1) { - children.push(getRecoveredToken(previous.end + 1, current.start)); - } - children.push(current); - previous = current; - } - return { - ...node, - children, - // add start/end to non-terminals - start: children[0].start, - end: children[children.length - 1].end, - }; - } else { - return node; - } - } - - const node = recover(root); - if (node.start > 0) { - node.children.unshift(getRecoveredToken(0, node.start)); - node.start = 0; - } - if (node.end < source.length - 1) { - node.children.push(getRecoveredToken(node.end + 1, source.length)); - node.end = source.length - 1; - } - return node; -} - -// MAIN EXPORTED FUNCTIONS: - -const DEFAULT_STRATEGIES = [recoveryParser, fallbackParser]; - -export function syntax({ strategies = DEFAULT_STRATEGIES, ...options } = {}) { - for (const strategy of strategies) { - try { - return strategy(options); - } catch (e) { - // console.warn(e) - } - } - throw new Error("Unable to parse: " + options.source); -} - -export function serialize(node) { - if (node.children) { - return node.children.map(serialize).join(""); - } else { - return node.text || ""; - } -} diff --git a/frontend/test/metabase/lib/expressions/process.unit.spec.js b/frontend/test/metabase/lib/expressions/process.unit.spec.js index 1d7703a91c296ca7ae0ec1925c4f0a5d1e1d0ae1..da5784e20f5cedf2fda863e44899f20a5297c88d 100644 --- a/frontend/test/metabase/lib/expressions/process.unit.spec.js +++ b/frontend/test/metabase/lib/expressions/process.unit.spec.js @@ -8,14 +8,11 @@ describe("metabase/lib/expressions/process", () => { ).not.toThrow(); }); it("should handle valid input", () => { - const { compileError, syntaxTree } = processSource({ + const { compileError } = processSource({ source: "1", targetOffset: null, }); expect(compileError).toBeUndefined(); - expect(syntaxTree).toBeDefined(); - expect(syntaxTree.children).toBeDefined(); - expect(syntaxTree.children.length).toEqual(1); }); it("should handle invalid input", () => { const { compileError } = processSource({ diff --git a/frontend/test/metabase/lib/expressions/syntax.unit.spec.js b/frontend/test/metabase/lib/expressions/syntax.unit.spec.js deleted file mode 100644 index 5fd712c482d4eb3a20c6c7f25e1ce4598bf35fff..0000000000000000000000000000000000000000 --- a/frontend/test/metabase/lib/expressions/syntax.unit.spec.js +++ /dev/null @@ -1,68 +0,0 @@ -import { - defaultParser, - fallbackParser, - recoveryParser, - serialize, -} from "metabase/lib/expressions/syntax"; - -import { shared, aggregationOpts } from "./__support__/expressions"; - -const partialInputCases = { - expression: [ - ['1 + "Total', "missing quote at the end"], - ["1 +", "ending in +"], - ["1 + (", "ending in open paren"], - ["1 + (2", "ending in number"], - ["1 + (2 + 3", "missing paren at the end"], - ["1 (2 + 3)", "missing operator in the middle"], - ], - aggregation: [ - ["Sum", "aggregation without arguments"], - ["Sum(", "aggregation with open paren"], - ], - filter: [], -}; - -describe("metabase/lib/expressions/syntax", () => { - for (const parser of [defaultParser, recoveryParser, fallbackParser]) { - describe(`${parser.name}()`, () => { - for (const [name, cases, opts] of shared) { - describe(name, () => { - for (const [source, mbql, description] of cases) { - if (mbql) { - it(`should parse ${description}`, () => { - const tree = parser({ source, ...opts }); - expect(serialize(tree)).toEqual(source); - }); - } - } - // defaultParser doesn't support partial input - if (parser !== defaultParser) { - describe("with partial inputs", () => { - for (const [source, description] of partialInputCases[name]) { - it(`should parse ${description}`, () => { - const tree = parser({ source, ...opts }); - expect(serialize(tree)).toEqual(source); - }); - } - }); - } - }); - } - - // NOTE: only add tests below for things that don't fit the shared test cases above - - it(`should parse and serialize source with leading whitespace`, () => { - const source = " Sum(A)"; - const tree = parser({ source, ...aggregationOpts }); - expect(serialize(tree)).toEqual(source); - }); - - it(`should parse and serialize source with trailing whitespace`, () => { - const source = "Sum(A) "; - const tree = parser({ source, ...aggregationOpts }); - expect(serialize(tree)).toEqual(source); - }); - }); - } -});