From 94339237d29824099be683f0b991093940d197b7 Mon Sep 17 00:00:00 2001 From: john-metabase <92878045+john-metabase@users.noreply.github.com> Date: Fri, 17 Feb 2023 10:37:43 -0500 Subject: [PATCH] Updates param parser to ignore SQL comments (#28196) * Updates param parser to ignore SQL comments * Adds code to disable SQL comment handling in Mongo native query param parsing --- .../src/metabase/driver/mongo/parameters.clj | 2 +- .../driver/common/parameters/parse.clj | 194 ++++++++++-------- .../driver/common/parameters/parse_test.clj | 40 +++- .../query_processor_test/parameters_test.clj | 37 ++++ 4 files changed, 187 insertions(+), 86 deletions(-) diff --git a/modules/drivers/mongo/src/metabase/driver/mongo/parameters.clj b/modules/drivers/mongo/src/metabase/driver/mongo/parameters.clj index 3fb640cd04a..903d5f27914 100644 --- a/modules/drivers/mongo/src/metabase/driver/mongo/parameters.clj +++ b/modules/drivers/mongo/src/metabase/driver/mongo/parameters.clj @@ -209,7 +209,7 @@ (defn- parse-and-substitute [param->value x] (if-not (string? x) x - (u/prog1 (substitute param->value (params.parse/parse x)) + (u/prog1 (substitute param->value (params.parse/parse x false)) (when-not (= x <>) (log/debug (tru "Substituted {0} -> {1}" (pr-str x) (pr-str <>))))))) diff --git a/src/metabase/driver/common/parameters/parse.clj b/src/metabase/driver/common/parameters/parse.clj index 93b41247186..688c1f50f7d 100644 --- a/src/metabase/driver/common/parameters/parse.clj +++ b/src/metabase/driver/common/parameters/parse.clj @@ -12,44 +12,57 @@ (set! *warn-on-reflection* true) -(def ^:private StringOrToken (s/cond-pre s/Str (s/enum :optional-begin :param-begin :optional-end :param-end))) +(def ^:private StringOrToken (s/cond-pre s/Str {:token s/Keyword + :text s/Str})) (def ^:private ParsedToken (s/cond-pre s/Str Param Optional)) -(defn- split-on-token-string - "Split string `s` once when substring `token-str` is encountered; replace `token-str` with `token` keyword instead. - - (split-on-token \"ABxCxD\" \"x\" :x) ;; -> [\"AB\" :x \"CxD\"]" - [^String s token-str token] - (when-let [index (str/index-of s token-str)] - (let [before (.substring s 0 index) - after (.substring s (+ index (count token-str)) (count s))] - [before token after]))) - -(defn- split-on-token-pattern - "Like `split-on-token-string`, but splits on a regular expression instead, replacing the matched group with `token`. - The pattern match an entire query, and return 3 groups — everything before the match; the match itself; and - everything after the match." - [s re token] - (when-let [[_ before _ after] (re-matches re s)] - [before token after])) - -(defn- split-on-token - [s token-str token] - ((if (string? token-str) - split-on-token-string - split-on-token-pattern) s token-str token)) - -(defn- tokenize-one [s token-str token] +(defn- combine-adjacent-strings + "Returns any adjacent strings in coll combined together" + [coll] + (apply concat + (for [subseq (partition-by string? coll)] + (if (string? (first subseq)) + [(apply str subseq)] + subseq)))) + +(defn- find-token + "Returns a vector of [index match] for string or regex pattern found in s" + [s pattern] + (if (string? pattern) + (when-let [index (str/index-of s pattern)] + [index pattern]) + (let [m (re-matcher pattern s)] + (when (.find m) + [(.start m) (subs s (.start m) (.end m))])))) + +(defn- tokenize-one [s pattern token] (loop [acc [], s s] (if (empty? s) acc - (if-let [[before token after] (split-on-token s token-str token)] - (recur (into acc [before token]) after) + (if-let [[index text] (find-token s pattern)] + (recur (conj acc (subs s 0 index) {:text text :token token}) + (subs s (+ index (count text)))) (conj acc s))))) +(def ^:private param-token-patterns + [["[[" :optional-begin] + ["]]" :optional-end] + ;; param-begin should only match the last two opening brackets in a sequence of > 2, e.g. + ;; [{$match: {{{x}}, field: 1}}] should parse to ["[$match: {" (param "x") ", field: 1}}]"] + [#"(?s)\{\{(?!\{)" :param-begin] + ["}}" :param-end]]) + +(def ^:private sql-token-patterns + (concat + [["/*" :block-comment-begin] + ["*/" :block-comment-end] + ["--" :line-comment-begin] + ["\n" :newline]] + param-token-patterns)) + (s/defn ^:private tokenize :- [StringOrToken] - [s :- s/Str] + [s :- s/Str, handle-sql-comments :- s/Bool] (reduce (fn [strs [token-str token]] (filter @@ -61,79 +74,96 @@ (tokenize-one s token-str token))) strs))) [s] - [["[[" :optional-begin] - ["]]" :optional-end] - ;; param-begin should only match the last two opening brackets in a sequence of > 2, e.g. - ;; [{$match: {{{x}}, field: 1}}] should parse to ["[$match: {" (param "x") ", field: 1}}]"] - [#"(?s)(.*?)(\{\{(?!\{))(.*)" :param-begin] - ["}}" :param-end]])) + (if handle-sql-comments + sql-token-patterns + param-token-patterns))) (defn- param [& [k & more]] (when (or (seq more) (not (string? k))) (throw (ex-info (tru "Invalid '{{...}}' clause: expected a param name") - {:type qp.error-type/invalid-query}))) + {:type qp.error-type/invalid-query}))) (let [k (str/trim k)] (when (empty? k) (throw (ex-info (tru "'{{...}}' clauses cannot be empty.") - {:type qp.error-type/invalid-query}))) + {:type qp.error-type/invalid-query}))) (params/->Param k))) (defn- optional [& parsed] (when-not (some params/Param? parsed) (throw (ex-info (tru "'[[...]]' clauses must contain at least one '{{...}}' clause.") - {:type qp.error-type/invalid-query}))) - (params/->Optional parsed)) + {:type qp.error-type/invalid-query}))) + (params/->Optional (combine-adjacent-strings parsed))) (s/defn ^:private parse-tokens* :- [(s/one [ParsedToken] "parsed tokens") (s/one [StringOrToken] "remaining tokens")] - [tokens :- [StringOrToken], optional-level :- s/Int, param-level :- s/Int] - (loop [acc [], [token & more] tokens] - (condp = token - nil + [tokens :- [StringOrToken] + optional-level :- s/Int + param-level :- s/Int + comment-mode :- (s/enum nil :block-comment-begin :line-comment-begin)] + (loop [acc [], [string-or-token & more] tokens] + (cond + (nil? string-or-token) (if (or (pos? optional-level) (pos? param-level)) (throw (ex-info (tru "Invalid query: found '[[' or '{{' with no matching ']]' or '}}'") {:type qp.error-type/invalid-query})) [acc nil]) - :optional-begin - (let [[parsed more] (parse-tokens* more (inc optional-level) param-level)] - (recur (conj acc (apply optional parsed)) more)) - - :param-begin - (let [[parsed more] (parse-tokens* more optional-level (inc param-level))] - (recur (conj acc (apply param parsed)) more)) - - :optional-end - (if (pos? optional-level) - [acc more] - (recur (conj acc "]]") more)) - - :param-end - (if (pos? param-level) - [acc more] - (recur (conj acc "}}") more)) - - (recur (conj acc token) more)))) - -(s/defn ^:private parse-tokens :- [ParsedToken] - [tokens :- [StringOrToken]] - (let [parsed (first (parse-tokens* tokens 0 0))] - ;; now loop over everything in `parsed`, and if we see 2 strings next to each other put them back together - ;; e.g. [:token "x" "}}"] -> [:token "x}}"] - (loop [acc [], last (first parsed), [x & more] (rest parsed)] - (cond - (not x) (conj acc last) - (and (string? last) (string? x)) (recur acc (str last x) more) - :else (recur (conj acc last) x more))))) + (string? string-or-token) + (recur (conj acc string-or-token) more) + + :else + (let [{:keys [text token]} string-or-token] + (case token + :optional-begin + (if comment-mode + (recur (conj acc text) more) + (let [[parsed more] (parse-tokens* more (inc optional-level) param-level comment-mode)] + (recur (conj acc (apply optional parsed)) more))) + + :param-begin + (if comment-mode + (recur (conj acc text) more) + (let [[parsed more] (parse-tokens* more optional-level (inc param-level) comment-mode)] + (recur (conj acc (apply param parsed)) more))) + + (:line-comment-begin :block-comment-begin) + (if (or comment-mode (pos? optional-level)) + (recur (conj acc text) more) + (let [[parsed more] (parse-tokens* more optional-level param-level token)] + (recur (into acc (cons text parsed)) more))) + + :block-comment-end + (if (= comment-mode :block-comment-begin) + [(conj acc text) more] + (recur (conj acc text) more)) + + :newline + (if (= comment-mode :line-comment-begin) + [(conj acc text) more] + (recur (conj acc text) more)) + + :optional-end + (if (pos? optional-level) + [acc more] + (recur (conj acc text) more)) + + :param-end + (if (pos? param-level) + [acc more] + (recur (conj acc text) more))))))) (s/defn parse :- [(s/cond-pre s/Str Param Optional)] "Attempts to parse parameters in string `s`. Parses any optional clauses or parameters found, and returns a sequence - of non-parameter string fragments (possibly) interposed with `Param` or `Optional` instances." - [s :- s/Str] - (let [tokenized (tokenize s)] - (if (= [s] tokenized) - [s] - (do - (log/tracef "Tokenized native query ->\n%s" (u/pprint-to-str tokenized)) - (u/prog1 (parse-tokens tokenized) - (log/tracef "Parsed native query ->\n%s" (u/pprint-to-str <>))))))) + of non-parameter string fragments (possibly) interposed with `Param` or `Optional` instances. + + If handle-sql-comments is true (default) then we make a best effort to ignore params in SQL comments." + ([s :- s/Str] + (parse s true)) + ([s :- s/Str, handle-sql-comments :- s/Bool] + (let [tokenized (tokenize s handle-sql-comments)] + (if (= [s] tokenized) + [s] + (do + (log/tracef "Tokenized native query ->\n%s" (u/pprint-to-str tokenized)) + (u/prog1 (combine-adjacent-strings (first (parse-tokens* tokenized 0 0 nil))) + (log/tracef "Parsed native query ->\n%s" (u/pprint-to-str <>)))))))) diff --git a/test/metabase/driver/common/parameters/parse_test.clj b/test/metabase/driver/common/parameters/parse_test.clj index 9463a261b4d..3d11aa9ca24 100644 --- a/test/metabase/driver/common/parameters/parse_test.clj +++ b/test/metabase/driver/common/parameters/parse_test.clj @@ -6,6 +6,11 @@ (def ^:private ^{:arglists '([field-name])} param (var-get #'params.parse/param)) (def ^:private ^{:arglists '([& args])} optional (var-get #'params.parse/optional)) +(defn- normalize-tokens + [tokens] + (for [token tokens] + (or (:token token) token))) + (deftest tokenize-test (doseq [[query expected] {"{{num_toucans}}" @@ -22,9 +27,24 @@ "SELECT * FROM toucanneries WHERE TRUE [[AND num_toucans > {{num_toucans}}]] [[AND total_birds > {{total_birds}}]]" ["SELECT * FROM toucanneries WHERE TRUE " :optional-begin "AND num_toucans > " :param-begin "num_toucans" :param-end :optional-end - " " :optional-begin "AND total_birds > " :param-begin "total_birds" :param-end :optional-end]}] + " " :optional-begin "AND total_birds > " :param-begin "total_birds" :param-end :optional-end] + + "SELECT * FROM -- {{foo}}\n" ["SELECT * FROM " :line-comment-begin " " :param-begin "foo" :param-end :newline] + + "/*SELECT {{foo}}*/" [:block-comment-begin "SELECT " :param-begin "foo" :param-end :block-comment-end]}] (is (= expected - (#'params.parse/tokenize query)) + (normalize-tokens (#'params.parse/tokenize query true))) + (format "%s should get tokenized to %s" (pr-str query) (pr-str expected))))) + +(deftest tokenize-no-sql-comments-test + (doseq [[query expected] + {"-- {{num_toucans}}" + ["-- " :param-begin "num_toucans" :param-end] + + "/* [[AND num_toucans > {{num_toucans}} --]] */" + ["/* " :optional-begin "AND num_toucans > " :param-begin "num_toucans" :param-end " --" :optional-end " */"]}] + (is (= expected + (normalize-tokens (#'params.parse/tokenize query false))) (format "%s should get tokenized to %s" (pr-str query) (pr-str expected))))) (deftest parse-test @@ -75,6 +95,11 @@ {"SELECT count(*)\nFROM products\nWHERE category = {{category}}" ["SELECT count(*)\nFROM products\nWHERE category = " (param "category")]} + "Queries with params in SQL comments (#7742)" + {"SELECT -- {{foo}}" ["SELECT -- {{foo}}"] + "[[{{this}} -- and]] that" [(optional (param "this") " -- and") " that"] + "SELECT /* \n --{{foo}} */ {{bar}}" ["SELECT /* \n --{{foo}} */ " (param "bar")]} + "JSON queries that contain non-param fragments like '}}'" {"{x: {y: \"{{param}}\"}}" ["{x: {y: \"" (param "param") "\"}}"] "{$match: {{{date}}, field: 1}}}" ["{$match: {" (param "date") ", field: 1}}}"]} @@ -85,7 +110,7 @@ (testing group (doseq [[s expected] s->expected] (is (= expected - (params.parse/parse s)) + (normalize-tokens (params.parse/parse s))) (format "%s should get parsed to %s" (pr-str s) (pr-str expected)))))) (testing "Testing that invalid/unterminated template params/clauses throw an exception" @@ -98,3 +123,12 @@ (is (thrown? clojure.lang.ExceptionInfo (params.parse/parse invalid)) (format "Parsing %s should throw an exception" (pr-str invalid)))))) + +(deftest disable-comment-handling-test + (testing "SQL comments are ignored when handle-sql-comments = false, e.g. in Mongo driver queries" + (doseq [[query result] [["{{{foo}}: -- {{bar}}}" + ["{" (param "foo") ": -- " (param "bar") "}"]] + + ["{{{foo}}: \"/* {{bar}} */\"}" + ["{" (param "foo") ": \"/* " (param "bar") " */\"}"]]]] + (is (= result (normalize-tokens (params.parse/parse query false))))))) diff --git a/test/metabase/query_processor_test/parameters_test.clj b/test/metabase/query_processor_test/parameters_test.clj index b72e8a12eed..2d37996a969 100644 --- a/test/metabase/query_processor_test/parameters_test.clj +++ b/test/metabase/query_processor_test/parameters_test.clj @@ -210,6 +210,43 @@ :target [:dimension [:template-tag "price"]] :value [1 2]}]})))))) +(deftest params-in-comments-test + (testing "Params in SQL comments are ignored" + (testing "Single-line comments" + (mt/dataset airports + (is (= {:query "SELECT NAME FROM COUNTRY WHERE \"PUBLIC\".\"COUNTRY\".\"NAME\" IN ('US', 'MX') -- {{ignoreme}}" + :params nil} + (qp/compile-and-splice-parameters + {:type :native + :native {:query "SELECT NAME FROM COUNTRY WHERE {{country}} -- {{ignoreme}}" + :template-tags {"country" + {:name "country" + :display-name "Country" + :type :dimension + :dimension [:field (mt/id :country :name) nil] + :widget-type :category}}} + :database (mt/id) + :parameters [{:type :location/country + :target [:dimension [:template-tag "country"]] + :value ["US" "MX"]}]}))))) + + (testing "Multi-line comments" + (is (= {:query "SELECT * FROM VENUES WHERE\n/*\n{{ignoreme}}\n*/ \"PUBLIC\".\"VENUES\".\"PRICE\" IN (1, 2)" + :params []} + (qp/compile-and-splice-parameters + {:type :native + :native {:query "SELECT * FROM VENUES WHERE\n/*\n{{ignoreme}}\n*/ {{price}}" + :template-tags {"price" + {:name "price" + :display-name "Price" + :type :dimension + :dimension [:field (mt/id :venues :price) nil] + :widget-type :category}}} + :database (mt/id) + :parameters [{:type :category + :target [:dimension [:template-tag "price"]] + :value [1 2]}]})))))) + (deftest ignore-parameters-for-unparameterized-native-query-test (testing "Parameters passed for unparameterized queries should get ignored" (let [query {:database (mt/id) -- GitLab