Skip to content
Snippets Groups Projects
Unverified Commit 94339237 authored by john-metabase's avatar john-metabase Committed by GitHub
Browse files

Updates param parser to ignore SQL comments (#28196)

* Updates param parser to ignore SQL comments
* Adds code to disable SQL comment handling in Mongo native query param parsing
parent 69ed17e9
Branches
Tags
No related merge requests found
......@@ -209,7 +209,7 @@
(defn- parse-and-substitute [param->value x]
(if-not (string? x)
x
(u/prog1 (substitute param->value (params.parse/parse x))
(u/prog1 (substitute param->value (params.parse/parse x false))
(when-not (= x <>)
(log/debug (tru "Substituted {0} -> {1}" (pr-str x) (pr-str <>)))))))
......
......@@ -12,44 +12,57 @@
(set! *warn-on-reflection* true)
(def ^:private StringOrToken (s/cond-pre s/Str (s/enum :optional-begin :param-begin :optional-end :param-end)))
(def ^:private StringOrToken (s/cond-pre s/Str {:token s/Keyword
:text s/Str}))
(def ^:private ParsedToken (s/cond-pre s/Str Param Optional))
(defn- split-on-token-string
"Split string `s` once when substring `token-str` is encountered; replace `token-str` with `token` keyword instead.
(split-on-token \"ABxCxD\" \"x\" :x) ;; -> [\"AB\" :x \"CxD\"]"
[^String s token-str token]
(when-let [index (str/index-of s token-str)]
(let [before (.substring s 0 index)
after (.substring s (+ index (count token-str)) (count s))]
[before token after])))
(defn- split-on-token-pattern
"Like `split-on-token-string`, but splits on a regular expression instead, replacing the matched group with `token`.
The pattern match an entire query, and return 3 groups — everything before the match; the match itself; and
everything after the match."
[s re token]
(when-let [[_ before _ after] (re-matches re s)]
[before token after]))
(defn- split-on-token
[s token-str token]
((if (string? token-str)
split-on-token-string
split-on-token-pattern) s token-str token))
(defn- tokenize-one [s token-str token]
(defn- combine-adjacent-strings
"Returns any adjacent strings in coll combined together"
[coll]
(apply concat
(for [subseq (partition-by string? coll)]
(if (string? (first subseq))
[(apply str subseq)]
subseq))))
(defn- find-token
"Returns a vector of [index match] for string or regex pattern found in s"
[s pattern]
(if (string? pattern)
(when-let [index (str/index-of s pattern)]
[index pattern])
(let [m (re-matcher pattern s)]
(when (.find m)
[(.start m) (subs s (.start m) (.end m))]))))
(defn- tokenize-one [s pattern token]
(loop [acc [], s s]
(if (empty? s)
acc
(if-let [[before token after] (split-on-token s token-str token)]
(recur (into acc [before token]) after)
(if-let [[index text] (find-token s pattern)]
(recur (conj acc (subs s 0 index) {:text text :token token})
(subs s (+ index (count text))))
(conj acc s)))))
(def ^:private param-token-patterns
[["[[" :optional-begin]
["]]" :optional-end]
;; param-begin should only match the last two opening brackets in a sequence of > 2, e.g.
;; [{$match: {{{x}}, field: 1}}] should parse to ["[$match: {" (param "x") ", field: 1}}]"]
[#"(?s)\{\{(?!\{)" :param-begin]
["}}" :param-end]])
(def ^:private sql-token-patterns
(concat
[["/*" :block-comment-begin]
["*/" :block-comment-end]
["--" :line-comment-begin]
["\n" :newline]]
param-token-patterns))
(s/defn ^:private tokenize :- [StringOrToken]
[s :- s/Str]
[s :- s/Str, handle-sql-comments :- s/Bool]
(reduce
(fn [strs [token-str token]]
(filter
......@@ -61,79 +74,96 @@
(tokenize-one s token-str token)))
strs)))
[s]
[["[[" :optional-begin]
["]]" :optional-end]
;; param-begin should only match the last two opening brackets in a sequence of > 2, e.g.
;; [{$match: {{{x}}, field: 1}}] should parse to ["[$match: {" (param "x") ", field: 1}}]"]
[#"(?s)(.*?)(\{\{(?!\{))(.*)" :param-begin]
["}}" :param-end]]))
(if handle-sql-comments
sql-token-patterns
param-token-patterns)))
(defn- param [& [k & more]]
(when (or (seq more)
(not (string? k)))
(throw (ex-info (tru "Invalid '{{...}}' clause: expected a param name")
{:type qp.error-type/invalid-query})))
{:type qp.error-type/invalid-query})))
(let [k (str/trim k)]
(when (empty? k)
(throw (ex-info (tru "'{{...}}' clauses cannot be empty.")
{:type qp.error-type/invalid-query})))
{:type qp.error-type/invalid-query})))
(params/->Param k)))
(defn- optional [& parsed]
(when-not (some params/Param? parsed)
(throw (ex-info (tru "'[[...]]' clauses must contain at least one '{{...}}' clause.")
{:type qp.error-type/invalid-query})))
(params/->Optional parsed))
{:type qp.error-type/invalid-query})))
(params/->Optional (combine-adjacent-strings parsed)))
(s/defn ^:private parse-tokens* :- [(s/one [ParsedToken] "parsed tokens") (s/one [StringOrToken] "remaining tokens")]
[tokens :- [StringOrToken], optional-level :- s/Int, param-level :- s/Int]
(loop [acc [], [token & more] tokens]
(condp = token
nil
[tokens :- [StringOrToken]
optional-level :- s/Int
param-level :- s/Int
comment-mode :- (s/enum nil :block-comment-begin :line-comment-begin)]
(loop [acc [], [string-or-token & more] tokens]
(cond
(nil? string-or-token)
(if (or (pos? optional-level) (pos? param-level))
(throw (ex-info (tru "Invalid query: found '[[' or '{{' with no matching ']]' or '}}'")
{:type qp.error-type/invalid-query}))
[acc nil])
:optional-begin
(let [[parsed more] (parse-tokens* more (inc optional-level) param-level)]
(recur (conj acc (apply optional parsed)) more))
:param-begin
(let [[parsed more] (parse-tokens* more optional-level (inc param-level))]
(recur (conj acc (apply param parsed)) more))
:optional-end
(if (pos? optional-level)
[acc more]
(recur (conj acc "]]") more))
:param-end
(if (pos? param-level)
[acc more]
(recur (conj acc "}}") more))
(recur (conj acc token) more))))
(s/defn ^:private parse-tokens :- [ParsedToken]
[tokens :- [StringOrToken]]
(let [parsed (first (parse-tokens* tokens 0 0))]
;; now loop over everything in `parsed`, and if we see 2 strings next to each other put them back together
;; e.g. [:token "x" "}}"] -> [:token "x}}"]
(loop [acc [], last (first parsed), [x & more] (rest parsed)]
(cond
(not x) (conj acc last)
(and (string? last) (string? x)) (recur acc (str last x) more)
:else (recur (conj acc last) x more)))))
(string? string-or-token)
(recur (conj acc string-or-token) more)
:else
(let [{:keys [text token]} string-or-token]
(case token
:optional-begin
(if comment-mode
(recur (conj acc text) more)
(let [[parsed more] (parse-tokens* more (inc optional-level) param-level comment-mode)]
(recur (conj acc (apply optional parsed)) more)))
:param-begin
(if comment-mode
(recur (conj acc text) more)
(let [[parsed more] (parse-tokens* more optional-level (inc param-level) comment-mode)]
(recur (conj acc (apply param parsed)) more)))
(:line-comment-begin :block-comment-begin)
(if (or comment-mode (pos? optional-level))
(recur (conj acc text) more)
(let [[parsed more] (parse-tokens* more optional-level param-level token)]
(recur (into acc (cons text parsed)) more)))
:block-comment-end
(if (= comment-mode :block-comment-begin)
[(conj acc text) more]
(recur (conj acc text) more))
:newline
(if (= comment-mode :line-comment-begin)
[(conj acc text) more]
(recur (conj acc text) more))
:optional-end
(if (pos? optional-level)
[acc more]
(recur (conj acc text) more))
:param-end
(if (pos? param-level)
[acc more]
(recur (conj acc text) more)))))))
(s/defn parse :- [(s/cond-pre s/Str Param Optional)]
"Attempts to parse parameters in string `s`. Parses any optional clauses or parameters found, and returns a sequence
of non-parameter string fragments (possibly) interposed with `Param` or `Optional` instances."
[s :- s/Str]
(let [tokenized (tokenize s)]
(if (= [s] tokenized)
[s]
(do
(log/tracef "Tokenized native query ->\n%s" (u/pprint-to-str tokenized))
(u/prog1 (parse-tokens tokenized)
(log/tracef "Parsed native query ->\n%s" (u/pprint-to-str <>)))))))
of non-parameter string fragments (possibly) interposed with `Param` or `Optional` instances.
If handle-sql-comments is true (default) then we make a best effort to ignore params in SQL comments."
([s :- s/Str]
(parse s true))
([s :- s/Str, handle-sql-comments :- s/Bool]
(let [tokenized (tokenize s handle-sql-comments)]
(if (= [s] tokenized)
[s]
(do
(log/tracef "Tokenized native query ->\n%s" (u/pprint-to-str tokenized))
(u/prog1 (combine-adjacent-strings (first (parse-tokens* tokenized 0 0 nil)))
(log/tracef "Parsed native query ->\n%s" (u/pprint-to-str <>))))))))
......@@ -6,6 +6,11 @@
(def ^:private ^{:arglists '([field-name])} param (var-get #'params.parse/param))
(def ^:private ^{:arglists '([& args])} optional (var-get #'params.parse/optional))
(defn- normalize-tokens
[tokens]
(for [token tokens]
(or (:token token) token)))
(deftest tokenize-test
(doseq [[query expected]
{"{{num_toucans}}"
......@@ -22,9 +27,24 @@
"SELECT * FROM toucanneries WHERE TRUE [[AND num_toucans > {{num_toucans}}]] [[AND total_birds > {{total_birds}}]]"
["SELECT * FROM toucanneries WHERE TRUE " :optional-begin "AND num_toucans > " :param-begin "num_toucans" :param-end :optional-end
" " :optional-begin "AND total_birds > " :param-begin "total_birds" :param-end :optional-end]}]
" " :optional-begin "AND total_birds > " :param-begin "total_birds" :param-end :optional-end]
"SELECT * FROM -- {{foo}}\n" ["SELECT * FROM " :line-comment-begin " " :param-begin "foo" :param-end :newline]
"/*SELECT {{foo}}*/" [:block-comment-begin "SELECT " :param-begin "foo" :param-end :block-comment-end]}]
(is (= expected
(#'params.parse/tokenize query))
(normalize-tokens (#'params.parse/tokenize query true)))
(format "%s should get tokenized to %s" (pr-str query) (pr-str expected)))))
(deftest tokenize-no-sql-comments-test
(doseq [[query expected]
{"-- {{num_toucans}}"
["-- " :param-begin "num_toucans" :param-end]
"/* [[AND num_toucans > {{num_toucans}} --]] */"
["/* " :optional-begin "AND num_toucans > " :param-begin "num_toucans" :param-end " --" :optional-end " */"]}]
(is (= expected
(normalize-tokens (#'params.parse/tokenize query false)))
(format "%s should get tokenized to %s" (pr-str query) (pr-str expected)))))
(deftest parse-test
......@@ -75,6 +95,11 @@
{"SELECT count(*)\nFROM products\nWHERE category = {{category}}"
["SELECT count(*)\nFROM products\nWHERE category = " (param "category")]}
"Queries with params in SQL comments (#7742)"
{"SELECT -- {{foo}}" ["SELECT -- {{foo}}"]
"[[{{this}} -- and]] that" [(optional (param "this") " -- and") " that"]
"SELECT /* \n --{{foo}} */ {{bar}}" ["SELECT /* \n --{{foo}} */ " (param "bar")]}
"JSON queries that contain non-param fragments like '}}'"
{"{x: {y: \"{{param}}\"}}" ["{x: {y: \"" (param "param") "\"}}"]
"{$match: {{{date}}, field: 1}}}" ["{$match: {" (param "date") ", field: 1}}}"]}
......@@ -85,7 +110,7 @@
(testing group
(doseq [[s expected] s->expected]
(is (= expected
(params.parse/parse s))
(normalize-tokens (params.parse/parse s)))
(format "%s should get parsed to %s" (pr-str s) (pr-str expected))))))
(testing "Testing that invalid/unterminated template params/clauses throw an exception"
......@@ -98,3 +123,12 @@
(is (thrown? clojure.lang.ExceptionInfo
(params.parse/parse invalid))
(format "Parsing %s should throw an exception" (pr-str invalid))))))
(deftest disable-comment-handling-test
(testing "SQL comments are ignored when handle-sql-comments = false, e.g. in Mongo driver queries"
(doseq [[query result] [["{{{foo}}: -- {{bar}}}"
["{" (param "foo") ": -- " (param "bar") "}"]]
["{{{foo}}: \"/* {{bar}} */\"}"
["{" (param "foo") ": \"/* " (param "bar") " */\"}"]]]]
(is (= result (normalize-tokens (params.parse/parse query false)))))))
......@@ -210,6 +210,43 @@
:target [:dimension [:template-tag "price"]]
:value [1 2]}]}))))))
(deftest params-in-comments-test
(testing "Params in SQL comments are ignored"
(testing "Single-line comments"
(mt/dataset airports
(is (= {:query "SELECT NAME FROM COUNTRY WHERE \"PUBLIC\".\"COUNTRY\".\"NAME\" IN ('US', 'MX') -- {{ignoreme}}"
:params nil}
(qp/compile-and-splice-parameters
{:type :native
:native {:query "SELECT NAME FROM COUNTRY WHERE {{country}} -- {{ignoreme}}"
:template-tags {"country"
{:name "country"
:display-name "Country"
:type :dimension
:dimension [:field (mt/id :country :name) nil]
:widget-type :category}}}
:database (mt/id)
:parameters [{:type :location/country
:target [:dimension [:template-tag "country"]]
:value ["US" "MX"]}]})))))
(testing "Multi-line comments"
(is (= {:query "SELECT * FROM VENUES WHERE\n/*\n{{ignoreme}}\n*/ \"PUBLIC\".\"VENUES\".\"PRICE\" IN (1, 2)"
:params []}
(qp/compile-and-splice-parameters
{:type :native
:native {:query "SELECT * FROM VENUES WHERE\n/*\n{{ignoreme}}\n*/ {{price}}"
:template-tags {"price"
{:name "price"
:display-name "Price"
:type :dimension
:dimension [:field (mt/id :venues :price) nil]
:widget-type :category}}}
:database (mt/id)
:parameters [{:type :category
:target [:dimension [:template-tag "price"]]
:value [1 2]}]}))))))
(deftest ignore-parameters-for-unparameterized-native-query-test
(testing "Parameters passed for unparameterized queries should get ignored"
(let [query {:database (mt/id)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment