From 94339237d29824099be683f0b991093940d197b7 Mon Sep 17 00:00:00 2001
From: john-metabase <92878045+john-metabase@users.noreply.github.com>
Date: Fri, 17 Feb 2023 10:37:43 -0500
Subject: [PATCH] Updates param parser to ignore SQL comments (#28196)

* Updates param parser to ignore SQL comments
* Adds code to disable SQL comment handling in Mongo native query param parsing
---
 .../src/metabase/driver/mongo/parameters.clj  |   2 +-
 .../driver/common/parameters/parse.clj        | 194 ++++++++++--------
 .../driver/common/parameters/parse_test.clj   |  40 +++-
 .../query_processor_test/parameters_test.clj  |  37 ++++
 4 files changed, 187 insertions(+), 86 deletions(-)

diff --git a/modules/drivers/mongo/src/metabase/driver/mongo/parameters.clj b/modules/drivers/mongo/src/metabase/driver/mongo/parameters.clj
index 3fb640cd04a..903d5f27914 100644
--- a/modules/drivers/mongo/src/metabase/driver/mongo/parameters.clj
+++ b/modules/drivers/mongo/src/metabase/driver/mongo/parameters.clj
@@ -209,7 +209,7 @@
 (defn- parse-and-substitute [param->value x]
   (if-not (string? x)
     x
-    (u/prog1 (substitute param->value (params.parse/parse x))
+    (u/prog1 (substitute param->value (params.parse/parse x false))
       (when-not (= x <>)
         (log/debug (tru "Substituted {0} -> {1}" (pr-str x) (pr-str <>)))))))
 
diff --git a/src/metabase/driver/common/parameters/parse.clj b/src/metabase/driver/common/parameters/parse.clj
index 93b41247186..688c1f50f7d 100644
--- a/src/metabase/driver/common/parameters/parse.clj
+++ b/src/metabase/driver/common/parameters/parse.clj
@@ -12,44 +12,57 @@
 
 (set! *warn-on-reflection* true)
 
-(def ^:private StringOrToken  (s/cond-pre s/Str (s/enum :optional-begin :param-begin :optional-end :param-end)))
+(def ^:private StringOrToken  (s/cond-pre s/Str {:token s/Keyword
+                                                 :text  s/Str}))
 
 (def ^:private ParsedToken (s/cond-pre s/Str Param Optional))
 
-(defn- split-on-token-string
-  "Split string `s` once when substring `token-str` is encountered; replace `token-str` with `token` keyword instead.
-
-    (split-on-token \"ABxCxD\" \"x\" :x) ;; -> [\"AB\" :x \"CxD\"]"
-  [^String s token-str token]
-  (when-let [index (str/index-of s token-str)]
-    (let [before (.substring s 0 index)
-          after  (.substring s (+ index (count token-str)) (count s))]
-      [before token after])))
-
-(defn- split-on-token-pattern
-  "Like `split-on-token-string`, but splits on a regular expression instead, replacing the matched group with `token`.
-  The pattern match an entire query, and return 3 groups — everything before the match; the match itself; and
-  everything after the match."
-  [s re token]
-  (when-let [[_ before _ after] (re-matches re s)]
-    [before token after]))
-
-(defn- split-on-token
-  [s token-str token]
-  ((if (string? token-str)
-     split-on-token-string
-     split-on-token-pattern) s token-str token))
-
-(defn- tokenize-one [s token-str token]
+(defn- combine-adjacent-strings
+  "Returns any adjacent strings in coll combined together"
+  [coll]
+  (apply concat
+         (for [subseq (partition-by string? coll)]
+           (if (string? (first subseq))
+             [(apply str subseq)]
+             subseq))))
+
+(defn- find-token
+  "Returns a vector of [index match] for string or regex pattern found in s"
+  [s pattern]
+  (if (string? pattern)
+    (when-let [index (str/index-of s pattern)]
+      [index pattern])
+    (let [m (re-matcher pattern s)]
+      (when (.find m)
+        [(.start m) (subs s (.start m) (.end m))]))))
+
+(defn- tokenize-one [s pattern token]
   (loop [acc [], s s]
     (if (empty? s)
       acc
-      (if-let [[before token after] (split-on-token s token-str token)]
-        (recur (into acc [before token]) after)
+      (if-let [[index text] (find-token s pattern)]
+        (recur (conj acc (subs s 0 index) {:text text :token token})
+               (subs s (+ index (count text))))
         (conj acc s)))))
 
+(def ^:private param-token-patterns
+  [["[[" :optional-begin]
+   ["]]" :optional-end]
+    ;; param-begin should only match the last two opening brackets in a sequence of > 2, e.g.
+    ;; [{$match: {{{x}}, field: 1}}] should parse to ["[$match: {" (param "x") ", field: 1}}]"]
+   [#"(?s)\{\{(?!\{)" :param-begin]
+   ["}}" :param-end]])
+
+(def ^:private sql-token-patterns
+  (concat
+   [["/*" :block-comment-begin]
+    ["*/" :block-comment-end]
+    ["--" :line-comment-begin]
+    ["\n" :newline]]
+   param-token-patterns))
+
 (s/defn ^:private tokenize :- [StringOrToken]
-  [s :- s/Str]
+  [s :- s/Str, handle-sql-comments :- s/Bool]
   (reduce
    (fn [strs [token-str token]]
      (filter
@@ -61,79 +74,96 @@
            (tokenize-one s token-str token)))
        strs)))
    [s]
-   [["[[" :optional-begin]
-    ["]]" :optional-end]
-    ;; param-begin should only match the last two opening brackets in a sequence of > 2, e.g.
-    ;; [{$match: {{{x}}, field: 1}}] should parse to ["[$match: {" (param "x") ", field: 1}}]"]
-    [#"(?s)(.*?)(\{\{(?!\{))(.*)" :param-begin]
-    ["}}" :param-end]]))
+   (if handle-sql-comments
+     sql-token-patterns
+     param-token-patterns)))
 
 (defn- param [& [k & more]]
   (when (or (seq more)
             (not (string? k)))
     (throw (ex-info (tru "Invalid '{{...}}' clause: expected a param name")
-             {:type qp.error-type/invalid-query})))
+                    {:type qp.error-type/invalid-query})))
   (let [k (str/trim k)]
     (when (empty? k)
       (throw (ex-info (tru "'{{...}}' clauses cannot be empty.")
-               {:type qp.error-type/invalid-query})))
+                      {:type qp.error-type/invalid-query})))
     (params/->Param k)))
 
 (defn- optional [& parsed]
   (when-not (some params/Param? parsed)
     (throw (ex-info (tru "'[[...]]' clauses must contain at least one '{{...}}' clause.")
-             {:type qp.error-type/invalid-query})))
-  (params/->Optional parsed))
+                    {:type qp.error-type/invalid-query})))
+  (params/->Optional (combine-adjacent-strings parsed)))
 
 (s/defn ^:private parse-tokens* :- [(s/one [ParsedToken] "parsed tokens") (s/one [StringOrToken] "remaining tokens")]
-  [tokens :- [StringOrToken], optional-level :- s/Int, param-level :- s/Int]
-  (loop [acc [], [token & more] tokens]
-    (condp = token
-      nil
+  [tokens :- [StringOrToken]
+   optional-level :- s/Int
+   param-level :- s/Int
+   comment-mode :- (s/enum nil :block-comment-begin :line-comment-begin)]
+  (loop [acc [], [string-or-token & more] tokens]
+    (cond
+      (nil? string-or-token)
       (if (or (pos? optional-level) (pos? param-level))
         (throw (ex-info (tru "Invalid query: found '[[' or '{{' with no matching ']]' or '}}'")
                         {:type qp.error-type/invalid-query}))
         [acc nil])
 
-      :optional-begin
-      (let [[parsed more] (parse-tokens* more (inc optional-level) param-level)]
-        (recur (conj acc (apply optional parsed)) more))
-
-      :param-begin
-      (let [[parsed more] (parse-tokens* more optional-level (inc param-level))]
-        (recur (conj acc (apply param parsed)) more))
-
-      :optional-end
-      (if (pos? optional-level)
-        [acc more]
-        (recur (conj acc "]]") more))
-
-      :param-end
-      (if (pos? param-level)
-        [acc more]
-        (recur (conj acc "}}") more))
-
-      (recur (conj acc token) more))))
-
-(s/defn ^:private parse-tokens :- [ParsedToken]
-  [tokens :- [StringOrToken]]
-  (let [parsed (first (parse-tokens* tokens 0 0))]
-    ;; now loop over everything in `parsed`, and if we see 2 strings next to each other put them back together
-    ;; e.g. [:token "x" "}}"] -> [:token "x}}"]
-    (loop [acc [], last (first parsed), [x & more] (rest parsed)]
-      (cond
-        (not x)                          (conj acc last)
-        (and (string? last) (string? x)) (recur acc (str last x) more)
-        :else                            (recur (conj acc last) x more)))))
+      (string? string-or-token)
+      (recur (conj acc string-or-token) more)
+
+      :else
+      (let [{:keys [text token]} string-or-token]
+        (case token
+          :optional-begin
+          (if comment-mode
+            (recur (conj acc text) more)
+            (let [[parsed more] (parse-tokens* more (inc optional-level) param-level comment-mode)]
+              (recur (conj acc (apply optional parsed)) more)))
+
+          :param-begin
+          (if comment-mode
+            (recur (conj acc text) more)
+            (let [[parsed more] (parse-tokens* more optional-level (inc param-level) comment-mode)]
+              (recur (conj acc (apply param parsed)) more)))
+
+          (:line-comment-begin :block-comment-begin)
+          (if (or comment-mode (pos? optional-level))
+            (recur (conj acc text) more)
+            (let [[parsed more] (parse-tokens* more optional-level param-level token)]
+              (recur (into acc (cons text parsed)) more)))
+
+          :block-comment-end
+          (if (= comment-mode :block-comment-begin)
+            [(conj acc text) more]
+            (recur (conj acc text) more))
+
+          :newline
+          (if (= comment-mode :line-comment-begin)
+            [(conj acc text) more]
+            (recur (conj acc text) more))
+
+          :optional-end
+          (if (pos? optional-level)
+            [acc more]
+            (recur (conj acc text) more))
+
+          :param-end
+          (if (pos? param-level)
+            [acc more]
+            (recur (conj acc text) more)))))))
 
 (s/defn parse :- [(s/cond-pre s/Str Param Optional)]
   "Attempts to parse parameters in string `s`. Parses any optional clauses or parameters found, and returns a sequence
-   of non-parameter string fragments (possibly) interposed with `Param` or `Optional` instances."
-  [s :- s/Str]
-  (let [tokenized (tokenize s)]
-    (if (= [s] tokenized)
-      [s]
-      (do
-        (log/tracef "Tokenized native query ->\n%s" (u/pprint-to-str tokenized))
-        (u/prog1 (parse-tokens tokenized)
-          (log/tracef "Parsed native query ->\n%s" (u/pprint-to-str <>)))))))
+   of non-parameter string fragments (possibly) interposed with `Param` or `Optional` instances.
+
+   If handle-sql-comments is true (default) then we make a best effort to ignore params in SQL comments."
+  ([s :- s/Str]
+   (parse s true))
+  ([s :- s/Str, handle-sql-comments :- s/Bool]
+   (let [tokenized (tokenize s handle-sql-comments)]
+     (if (= [s] tokenized)
+       [s]
+       (do
+         (log/tracef "Tokenized native query ->\n%s" (u/pprint-to-str tokenized))
+         (u/prog1 (combine-adjacent-strings (first (parse-tokens* tokenized 0 0 nil)))
+                  (log/tracef "Parsed native query ->\n%s" (u/pprint-to-str <>))))))))
diff --git a/test/metabase/driver/common/parameters/parse_test.clj b/test/metabase/driver/common/parameters/parse_test.clj
index 9463a261b4d..3d11aa9ca24 100644
--- a/test/metabase/driver/common/parameters/parse_test.clj
+++ b/test/metabase/driver/common/parameters/parse_test.clj
@@ -6,6 +6,11 @@
 (def ^:private ^{:arglists '([field-name])} param    (var-get #'params.parse/param))
 (def ^:private ^{:arglists '([& args])}     optional (var-get #'params.parse/optional))
 
+(defn- normalize-tokens
+  [tokens]
+  (for [token tokens]
+    (or (:token token) token)))
+
 (deftest tokenize-test
   (doseq [[query expected]
           {"{{num_toucans}}"
@@ -22,9 +27,24 @@
 
            "SELECT * FROM toucanneries WHERE TRUE [[AND num_toucans > {{num_toucans}}]] [[AND total_birds > {{total_birds}}]]"
            ["SELECT * FROM toucanneries WHERE TRUE " :optional-begin "AND num_toucans > " :param-begin "num_toucans" :param-end :optional-end
-            " " :optional-begin "AND total_birds > " :param-begin "total_birds" :param-end :optional-end]}]
+            " " :optional-begin "AND total_birds > " :param-begin "total_birds" :param-end :optional-end]
+
+           "SELECT * FROM -- {{foo}}\n" ["SELECT * FROM " :line-comment-begin " " :param-begin "foo" :param-end :newline]
+
+           "/*SELECT {{foo}}*/" [:block-comment-begin "SELECT " :param-begin "foo" :param-end :block-comment-end]}]
     (is (= expected
-           (#'params.parse/tokenize query))
+           (normalize-tokens (#'params.parse/tokenize query true)))
+        (format "%s should get tokenized to %s" (pr-str query) (pr-str expected)))))
+
+(deftest tokenize-no-sql-comments-test
+  (doseq [[query expected]
+          {"-- {{num_toucans}}"
+           ["-- " :param-begin "num_toucans" :param-end]
+
+           "/* [[AND num_toucans > {{num_toucans}} --]] */"
+           ["/* " :optional-begin "AND num_toucans > " :param-begin "num_toucans" :param-end " --" :optional-end " */"]}]
+    (is (= expected
+           (normalize-tokens (#'params.parse/tokenize query false)))
         (format "%s should get tokenized to %s" (pr-str query) (pr-str expected)))))
 
 (deftest parse-test
@@ -75,6 +95,11 @@
            {"SELECT count(*)\nFROM products\nWHERE category = {{category}}"
             ["SELECT count(*)\nFROM products\nWHERE category = " (param "category")]}
 
+           "Queries with params in SQL comments (#7742)"
+           {"SELECT -- {{foo}}" ["SELECT -- {{foo}}"]
+            "[[{{this}} -- and]] that" [(optional (param "this") " -- and") " that"]
+            "SELECT /* \n --{{foo}} */ {{bar}}" ["SELECT /* \n --{{foo}} */ " (param "bar")]}
+
            "JSON queries that contain non-param fragments like '}}'"
            {"{x: {y: \"{{param}}\"}}"         ["{x: {y: \"" (param "param") "\"}}"]
             "{$match: {{{date}}, field: 1}}}" ["{$match: {" (param "date") ", field: 1}}}"]}
@@ -85,7 +110,7 @@
     (testing group
       (doseq [[s expected] s->expected]
         (is (= expected
-               (params.parse/parse s))
+               (normalize-tokens (params.parse/parse s)))
             (format "%s should get parsed to %s" (pr-str s) (pr-str expected))))))
 
   (testing "Testing that invalid/unterminated template params/clauses throw an exception"
@@ -98,3 +123,12 @@
       (is (thrown? clojure.lang.ExceptionInfo
                    (params.parse/parse invalid))
           (format "Parsing %s should throw an exception" (pr-str invalid))))))
+
+(deftest disable-comment-handling-test
+  (testing "SQL comments are ignored when handle-sql-comments = false, e.g. in Mongo driver queries"
+    (doseq [[query result] [["{{{foo}}: -- {{bar}}}"
+                             ["{" (param "foo") ": -- " (param "bar") "}"]]
+
+                            ["{{{foo}}: \"/* {{bar}} */\"}"
+                             ["{" (param "foo") ": \"/* " (param "bar") " */\"}"]]]]
+      (is (= result (normalize-tokens (params.parse/parse query false)))))))
diff --git a/test/metabase/query_processor_test/parameters_test.clj b/test/metabase/query_processor_test/parameters_test.clj
index b72e8a12eed..2d37996a969 100644
--- a/test/metabase/query_processor_test/parameters_test.clj
+++ b/test/metabase/query_processor_test/parameters_test.clj
@@ -210,6 +210,43 @@
                              :target [:dimension [:template-tag "price"]]
                              :value  [1 2]}]}))))))
 
+(deftest params-in-comments-test
+  (testing "Params in SQL comments are ignored"
+    (testing "Single-line comments"
+      (mt/dataset airports
+                  (is (= {:query  "SELECT NAME FROM COUNTRY WHERE \"PUBLIC\".\"COUNTRY\".\"NAME\" IN ('US', 'MX') -- {{ignoreme}}"
+                          :params nil}
+                         (qp/compile-and-splice-parameters
+                          {:type       :native
+                           :native     {:query         "SELECT NAME FROM COUNTRY WHERE {{country}} -- {{ignoreme}}"
+                                        :template-tags {"country"
+                                                        {:name         "country"
+                                                         :display-name "Country"
+                                                         :type         :dimension
+                                                         :dimension    [:field (mt/id :country :name) nil]
+                                                         :widget-type  :category}}}
+                           :database   (mt/id)
+                           :parameters [{:type   :location/country
+                                         :target [:dimension [:template-tag "country"]]
+                                         :value  ["US" "MX"]}]})))))
+
+    (testing "Multi-line comments"
+      (is (= {:query  "SELECT * FROM VENUES WHERE\n/*\n{{ignoreme}}\n*/ \"PUBLIC\".\"VENUES\".\"PRICE\" IN (1, 2)"
+              :params []}
+             (qp/compile-and-splice-parameters
+              {:type       :native
+               :native     {:query         "SELECT * FROM VENUES WHERE\n/*\n{{ignoreme}}\n*/ {{price}}"
+                            :template-tags {"price"
+                                            {:name         "price"
+                                             :display-name "Price"
+                                             :type         :dimension
+                                             :dimension    [:field (mt/id :venues :price) nil]
+                                             :widget-type  :category}}}
+               :database   (mt/id)
+               :parameters [{:type   :category
+                             :target [:dimension [:template-tag "price"]]
+                             :value  [1 2]}]}))))))
+
 (deftest ignore-parameters-for-unparameterized-native-query-test
   (testing "Parameters passed for unparameterized queries should get ignored"
     (let [query {:database (mt/id)
-- 
GitLab