Skip to content
Snippets Groups Projects
Unverified Commit 148d0187 authored by Cam Saul's avatar Cam Saul Committed by GitHub
Browse files

Fix potential SQL injections (#462)

parent 6c9f31d4
No related branches found
No related tags found
No related merge requests found
Showing
with 394 additions and 139 deletions
(defproject metabase/bigquery-driver "1.0.0-SNAPSHOT-1.30.3"
(defproject metabase/bigquery-driver "1.0.0-SNAPSHOT-1.30.9"
:min-lein-version "2.5.0"
:dependencies
[[com.google.apis/google-api-services-bigquery "v2-rev20190917-1.30.3"]]
[[com.google.apis/google-api-services-bigquery "v2-rev20200429-1.30.9"]]
:profiles
{:provided
......
info:
name: Metabase BigQuery Driver
version: 1.0.0-SNAPSHOT-1.30.3
version: 1.0.0-SNAPSHOT-1.30.9
description: Allows Metabase to connect to Google BigQuery databases.
dependencies:
- plugin: Metabase Google Drivers Shared Dependencies
......
......@@ -8,9 +8,9 @@
[util :as u]]
[metabase.driver.bigquery
[common :as bigquery.common]
[params :as bigquery.params]
[query-processor :as bigquery.qp]]
[metabase.driver.google :as google]
[metabase.driver.sql.util.unprepare :as unprepare]
[metabase.query-processor
[error-type :as error-type]
[store :as qp.store]
......@@ -27,8 +27,6 @@
(driver/register! :bigquery, :parent #{:google :sql})
(defmethod driver/supports? [:bigquery :percentile-aggregations] [_ _] false)
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | Client |
......@@ -193,24 +191,25 @@
(parser v)))))))))
(defn- ^QueryResponse execute-bigquery
([{{:keys [project-id]} :details, :as database} query-string]
(execute-bigquery (database->client database) project-id query-string))
([{{:keys [project-id]} :details, :as database} sql parameters]
(execute-bigquery (database->client database) project-id sql parameters))
([^Bigquery client, ^String project-id, ^String query-string]
{:pre [client (seq project-id) (seq query-string)]}
([^Bigquery client ^String project-id ^String sql parameters]
{:pre [client (seq project-id) (seq sql)]}
(let [request (doto (QueryRequest.)
(.setTimeoutMs (* query-timeout-seconds 1000))
;; if the query contains a `#legacySQL` directive then use legacy SQL instead of standard SQL
(.setUseLegacySql (str/includes? (str/lower-case query-string) "#legacysql"))
(.setQuery query-string))]
(.setUseLegacySql (str/includes? (str/lower-case sql) "#legacysql"))
(.setQuery sql)
(bigquery.params/set-parameters! parameters))]
(google/execute (.query (.jobs client) project-id request)))))
(defn- process-native* [respond database query-string]
(defn- process-native* [respond database sql parameters]
{:pre [(map? database) (map? (:details database))]}
;; automatically retry the query if it times out or otherwise fails. This is on top of the auto-retry added by
;; `execute`
(letfn [(thunk []
(post-process-native respond (execute-bigquery database query-string)))]
(post-process-native respond (execute-bigquery database sql parameters)))]
(try
(thunk)
(catch Throwable e
......@@ -224,20 +223,20 @@
(defmethod driver/execute-reducible-query :bigquery
;; TODO - it doesn't actually cancel queries the way we'd expect
[driver {{sql :query, params :params, :keys [table-name mbql?]} :native, :as outer-query} _ respond]
[driver {{sql :query, :keys [params table-name mbql?]} :native, :as outer-query} _ respond]
(let [database (qp.store/database)]
(binding [bigquery.common/*bigquery-timezone-id* (effective-query-timezone-id database)]
(log/tracef "Running BigQuery query in %s timezone" bigquery.common/*bigquery-timezone-id*)
(let [sql (str "-- " (qputil/query->remark outer-query) "\n" (if (seq params)
(unprepare/unprepare driver (cons sql params))
sql))]
(process-native* respond database sql)))))
(let [sql (str "-- " (qputil/query->remark outer-query) "\n" sql)]
(process-native* respond database sql params)))))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | Other Driver Method Impls |
;;; +----------------------------------------------------------------------------------------------------------------+
(defmethod driver/supports? [:bigquery :percentile-aggregations] [_ _] false)
(defmethod driver/supports? [:bigquery :expressions] [_ _] false)
(defmethod driver/supports? [:bigquery :foreign-keys] [_ _] true)
......
(ns metabase.driver.bigquery.params
(:require [clojure.tools.logging :as log]
[java-time :as t]
[metabase.util.date-2 :as u.date])
(:import [com.google.api.services.bigquery.model QueryParameter QueryParameterType QueryParameterValue QueryRequest]))
(defn- param-type ^QueryParameterType [^String type-name]
(doto (QueryParameterType.)
(.setType type-name)))
(defn- param-value ^QueryParameterValue [v]
(doto (QueryParameterValue.)
(.setValue (str v))))
(defn- param [type-name v]
(doto (QueryParameter.)
(.setParameterType (param-type type-name))
(.setParameterValue (param-value v))))
(defmulti ^:private ->QueryParameter
{:arglists '(^QueryParameter [v])}
class)
(defmethod ->QueryParameter :default
[v]
(param "STRING" v))
;; See https://cloud.google.com/spanner/docs/data-types for type mappings
;; `nil` still has to be given a type (this determines the type it comes back as in cases like `["SELECT ?" nil]`) --
;; AFAIK this only affects native queries because `NULL` is usually spliced into the compiled SQL directly in MBQL
;; queries. Unfortunately we don't know the actual type we should set here so `STRING` is going to have to do for now.
;; This shouldn't really matter anyways since `WHERE field = NULL` generally doesn't work (we have to do `WHERE FIELD
;; IS NULL` instead)
(defmethod ->QueryParameter nil
[_]
(doto (QueryParameter.)
(.setParameterType (param-type "STRING"))
(.setParameterValue (doto (QueryParameterValue.)
(.setValue nil)))))
(defmethod ->QueryParameter String [v] (param "STRING" v))
(defmethod ->QueryParameter Boolean [v] (param "BOOL" v))
(defmethod ->QueryParameter Integer [v] (param "INT64" v))
(defmethod ->QueryParameter Long [v] (param "INT64" v))
(defmethod ->QueryParameter Short [v] (param "INT64" v))
(defmethod ->QueryParameter Byte [v] (param "INT64" v))
(defmethod ->QueryParameter clojure.lang.BigInt [v] (param "INT64" v))
(defmethod ->QueryParameter Float [v] (param "FLOAT64" v))
(defmethod ->QueryParameter Double [v] (param "FLOAT64" v))
(defmethod ->QueryParameter java.math.BigDecimal [v] (param "FLOAT64" v))
(defmethod ->QueryParameter java.time.LocalDate [t] (param "DATE" (u.date/format t)))
(defmethod ->QueryParameter java.time.LocalDateTime [t] (param "DATETIME" (u.date/format t)))
(defmethod ->QueryParameter java.time.LocalTime [t] (param "TIME" (u.date/format t)))
(defmethod ->QueryParameter java.time.OffsetTime [t] (param "TIME" (u.date/format
(t/local-time
(t/with-offset-same-instant t (t/zone-offset 0))))))
(defmethod ->QueryParameter java.time.OffsetDateTime [t] (param "TIMESTAMP" (u.date/format t)))
(defmethod ->QueryParameter java.time.ZonedDateTime [t] (param "TIMESTAMP" (u.date/format
(t/offset-date-time
(t/with-zone-same-instant t (t/zone-id "UTC"))))))
(defn- query-parameter ^QueryParameter [value]
(let [param (->QueryParameter value)]
(log/tracef "Set parameter ^%s %s -> %s" (some-> value class .getCanonicalName) (pr-str value) (pr-str param))
param))
(defn set-parameters!
"Set the `parameters` (i.e., values for `?` positional placeholders in the SQL) for a `query` request. Equivalent to
JDBC `.setObject()` and the like."
^QueryRequest [^QueryRequest query parameters]
(if (seq parameters)
(doto query
(.setParameterMode "POSITIONAL")
(.setQueryParameters (apply list (map query-parameter parameters))))
query))
......@@ -23,8 +23,7 @@
[metabase.util
[date-2 :as u.date]
[honeysql-extensions :as hx]
[i18n :refer [tru]]
[schema :as su]]
[i18n :refer [tru]]]
[schema.core :as s]
[toucan.db :as db])
(:import [java.time LocalDate LocalDateTime LocalTime OffsetDateTime OffsetTime ZonedDateTime]
......@@ -435,13 +434,6 @@
(cond->> ((get-method sql.qp/->honeysql [:sql :relative-datetime]) driver clause)
t (->temporal-type t))))
(s/defn ^:private honeysql-form->sql :- s/Str
[driver, honeysql-form :- su/Map]
(let [[sql & args :as sql+args] (sql.qp/format-honeysql driver honeysql-form)]
(if (seq args)
(unprepare/unprepare driver sql+args)
sql)))
;; From the dox: Fields must contain only letters, numbers, and underscores, start with a letter or underscore, and be
;; at most 128 characters long.
(defmethod driver/format-custom-field-name :bigquery
......@@ -451,19 +443,6 @@
(str/replace #"(^\d)" "_$1"))]
(subs replaced-str 0 (min 128 (count replaced-str)))))
;; These provide implementations of `->honeysql` that prevent HoneySQL from converting forms to prepared statement
;; parameters (`?` symbols)
;;
;; TODO - these should probably be impls of `unprepare-value` instead, but it effectively ends up doing the same thing
;; either way
(defmethod sql.qp/->honeysql [:bigquery String]
[_ s]
(hx/literal s))
(defmethod sql.qp/->honeysql [:bigquery Boolean]
[_ bool]
(hsql/raw (if bool "TRUE" "FALSE")))
;; See:
;;
;; * https://cloud.google.com/bigquery/docs/reference/standard-sql/timestamp_functions
......@@ -471,6 +450,11 @@
;; * https://cloud.google.com/bigquery/docs/reference/standard-sql/date_functions
;; * https://cloud.google.com/bigquery/docs/reference/standard-sql/datetime_functions
(defmethod unprepare/unprepare-value [:bigquery String]
[_ s]
;; escape single-quotes like Cam's String -> Cam\'s String
(str \' (str/replace s "'" "\\\\'") \'))
(defmethod unprepare/unprepare-value [:bigquery LocalTime]
[_ t]
(format "time \"%s\"" (u.date/format-sql t)))
......@@ -614,13 +598,15 @@
{table-name :name} (some-> source-table-id qp.store/table)]
(assert (seq dataset-id))
(binding [sql.qp/*query* (assoc outer-query :dataset-id dataset-id)]
{:query (->> outer-query
(sql.qp/build-honeysql-form driver)
(honeysql-form->sql driver))
:table-name (or table-name
(when source-query
sql.qp/source-query-alias))
:mbql? true})))
(let [[sql & params] (->> outer-query
(sql.qp/build-honeysql-form driver)
(sql.qp/format-honeysql driver))]
{:query sql
:params params
:table-name (or table-name
(when source-query
sql.qp/source-query-alias))
:mbql? true}))))
(defrecord ^:private CurrentMomentForm [t]
hformat/ToSql
......
(ns metabase.driver.bigquery.params-test
(:require [clojure.test :refer :all]
[metabase
[query-processor :as qp]
[test :as mt]]))
(deftest set-parameters-test
(mt/test-driver :bigquery
(testing "Make sure we're setting various types of parameters correctly\n"
(doseq [[v expected] [[nil {:base-type :type/Text}]
[true {:base-type :type/Boolean}]
[false {:base-type :type/Boolean}]
["a string" {:base-type :type/Text}]
[(int 100) {:base-type :type/Integer}]
[(long 100) {:base-type :type/Integer}]
[(short 100) {:base-type :type/Integer}]
[(byte 100) {:base-type :type/Integer}]
[(bigint 100) {:base-type :type/Integer}]
[(float 100.0) {:base-type :type/Float}]
[(double 100.0) {:base-type :type/Float}]
[(bigdec 100.0) {:base-type :type/Float, :v 100.0}]
;; LocalDate
[#t "2020-05-26" {:base-type :type/Date
:v #t "2020-05-26T00:00Z[UTC]"}]
;; LocaleDateTime
[#t "2020-05-26T17:06:00" {:base-type :type/DateTime
:v #t "2020-05-26T17:06Z[UTC]"}]
;; LocalTime
[#t "17:06:00" {:base-type :type/Time}]
;; OffsetTime
[#t "17:06:00-07:00" {:base-type :type/Time
:v #t "00:06:00"}]
;; OffsetDateTime
[#t "2020-05-26T17:06:00-07:00" {:base-type :type/DateTimeWithLocalTZ
:v #t "2020-05-27T00:06Z[UTC]"}]
;; ZonedDateTime
[#t "2020-05-26T17:06:00-07:00[US/Pacific]" {:base-type :type/DateTimeWithLocalTZ
:v #t "2020-05-27T00:06Z[UTC]"}]]]
(testing (format "^%s %s" (some-> v class .getCanonicalName) (pr-str v))
(let [results (qp/process-query
(assoc (mt/native-query
{:query "SELECT ?"
:params [v]})
:middleware {:format-rows? false}))]
(is (= (or (:v expected) v)
(first (mt/first-row results))))
(is (= (:base-type expected)
(-> (mt/cols results) first :base_type)))))))))
......@@ -105,6 +105,7 @@
"FROM `test_data.venues` "
"GROUP BY `price` "
"ORDER BY `avg` ASC, `price` ASC")
:params nil
:table-name "venues"
:mbql? true}
(qp/query->native
......@@ -170,7 +171,7 @@
;; if I run a BigQuery query, does it get a remark added to it?
(defn- query->native [query]
(let [native-query (atom nil)]
(with-redefs [bigquery/process-native* (fn [_ _ sql]
(with-redefs [bigquery/process-native* (fn [_ _ sql _]
(reset! native-query sql)
(throw (Exception. "Done.")))]
(u/ignore-exceptions
......@@ -604,3 +605,21 @@
(testing "Make sure datetime breakouts like :minute-of-hour work correctly for different temporal types"
(mt/dataset attempted-murders
(test-table-with-fn breakout-test-table can-breakout?)))))
(deftest string-escape-test
(mt/test-driver :bigquery
(testing "Make sure single quotes in parameters are escaped properly to prevent SQL injection\n"
#_(testing "MBQL query"
(is (= [[0]]
(mt/formatted-rows [int]
(mt/run-mbql-query venues
{:aggregation [[:count]]
:filter [:= $name "x\\\\' OR 1 = 1 -- "]})))))
(testing "native query"
(is (= [[0]]
(mt/formatted-rows [int]
(qp/process-query
(mt/native-query
{:query "SELECT count(*) AS `count` FROM `test_data.venues` WHERE `test_data.venues`.`name` = ?"
:params ["x\\\\' OR 1 = 1 -- "]})))))))))
......@@ -4,38 +4,34 @@
[driver :as driver]
[models :refer [Field Table]]
[query-processor :as qp]
[query-processor-test :as qp.test]
[sync :as sync]]
[sync :as sync]
[test :as mt]]
[metabase.db.metadata-queries :as metadata-queries]
[metabase.test
[data :as data]
[util :as tu]]
[metabase.test.data
[bigquery :as bigquery.tx]
[datasets :as datasets]]))
[metabase.test.data.bigquery :as bigquery.tx]
[metabase.test.util :as tu]))
(deftest table-rows-sample-test
(datasets/test-driver :bigquery
(mt/test-driver :bigquery
(is (= [[1 "Red Medicine"]
[2 "Stout Burgers & Beers"]
[3 "The Apple Pan"]
[4 "Wurstküche"]
[5 "Brite Spot Family Restaurant"]]
(->> (metadata-queries/table-rows-sample (Table (data/id :venues))
[(Field (data/id :venues :id))
(Field (data/id :venues :name))])
(->> (metadata-queries/table-rows-sample (Table (mt/id :venues))
[(Field (mt/id :venues :id))
(Field (mt/id :venues :name))])
(sort-by first)
(take 5))))))
(deftest db-timezone-id-test
(datasets/test-driver :bigquery
(mt/test-driver :bigquery
(is (= "UTC"
(tu/db-timezone-id)))))
(defn- do-with-view [f]
(driver/with-driver :bigquery
(let [view-name (name (munge (gensym "view_")))]
(data/with-temp-copy-of-db
(mt/with-temp-copy-of-db
(try
(bigquery.tx/execute!
(str "CREATE VIEW `test_data.%s` "
......@@ -57,9 +53,9 @@
`(do-with-view (fn [~(or view-name-binding '_)] ~@body)))
(deftest sync-views-test
(datasets/test-driver :bigquery
(mt/test-driver :bigquery
(with-view [view-name]
(is (contains? (:tables (driver/describe-database :bigquery (data/db)))
(is (contains? (:tables (driver/describe-database :bigquery (mt/db)))
{:schema nil, :name view-name})
"`describe-database` should see the view")
(is (= {:schema nil
......@@ -67,16 +63,16 @@
:fields #{{:name "id", :database-type "INTEGER", :base-type :type/Integer}
{:name "venue_name", :database-type "STRING", :base-type :type/Text}
{:name "category_name", :database-type "STRING", :base-type :type/Text}}}
(driver/describe-table :bigquery (data/db) {:name view-name}))
(driver/describe-table :bigquery (mt/db) {:name view-name}))
"`describe-tables` should see the fields in the view")
(sync/sync-database! (data/db))
(is (= [[1 "Asian" "Red Medicine"]
[2 "Burger" "Stout Burgers & Beers"]
[3 "Burger" "The Apple Pan"]]
(qp.test/rows
(qp/process-query
{:database (data/id)
:type :query
:query {:source-table (data/id view-name)
:order-by [[:asc (data/id view-name :id)]]}})))
"We should be able to run queries against the view (#3414)"))))
(sync/sync-database! (mt/db))
(testing "We should be able to run queries against the view (#3414)"
(is (= [[1 "Asian" "Red Medicine"]
[2 "Burger" "Stout Burgers & Beers"]
[3 "Burger" "The Apple Pan"]]
(mt/rows
(qp/process-query
{:database (mt/id)
:type :query
:query {:source-table (mt/id view-name)
:order-by [[:asc (mt/id view-name :id)]]}}))))))))
......@@ -88,7 +88,7 @@
(let [sql (apply format format-string args)]
(printf "[BigQuery] %s\n" sql)
(flush)
(bigquery/with-finished-response [response (#'bigquery/execute-bigquery (data/db) sql)]
(bigquery/with-finished-response [response (#'bigquery/execute-bigquery (data/db) sql nil)]
response))))
(def ^:private valid-field-types
......
(ns metabase.driver.presto
"Presto driver. See https://prestodb.io/docs/current/ for complete dox."
(:require [clj-http.client :as http]
(:require [buddy.core.codecs :as codecs]
[clj-http.client :as http]
[clojure
[set :as set]
[string :as str]]
......@@ -197,7 +198,7 @@
(u/minutes->ms 2))
(defn- execute-presto-query-for-sync
"Execute a Presto query for sync purposed."
"Execute a Presto query for metadata sync."
[details query]
(let [result-chan (a/promise-chan)]
(execute-presto-query details query nil (fn [cols rows]
......@@ -252,10 +253,6 @@
:database-type type
:base-type (presto-type->base-type type)}))}))
(defmethod sql.qp/->honeysql [:presto String]
[_ s]
(hx/literal (str/replace s "'" "''")))
(defmethod sql.qp/->honeysql [:presto Boolean]
[_ bool]
(hsql/raw (if bool "TRUE" "FALSE")))
......@@ -280,6 +277,19 @@
[driver [_ arg p]]
(hsql/call :approx_percentile (sql.qp/->honeysql driver arg) (sql.qp/->honeysql driver p)))
(def ^:private ^:dynamic *param-splice-style*
"How we should splice params into SQL (i.e. 'unprepare' the SQL). Either `:friendly` (the default) or `:paranoid`.
`:friendly` makes a best-effort attempt to escape strings and generate SQL that is nice to look at, but should not
be considered safe against all SQL injection -- use this for 'convert to SQL' functionality. `:paranoid` hex-encodes
strings so SQL injection is impossible; this isn't nice to look at, so use this for actually running a query."
:friendly)
(defmethod unprepare/unprepare-value [:presto String]
[_ ^String s]
(case *param-splice-style*
:friendly (str \' (sql.u/escape-sql s :ansi) \')
:paranoid (format "from_utf8(from_hex('%s'))" (codecs/bytes->hex (.getBytes s "UTF-8")))))
;; See https://prestodb.io/docs/current/functions/datetime.html
;; This is only needed for test purposes, because some of the sample data still uses legacy types
......@@ -306,7 +316,8 @@
respond]
(let [sql (str "-- "
(qputil/query->remark outer-query) "\n"
(unprepare/unprepare driver (cons sql params)))
(binding [*param-splice-style* :paranoid]
(unprepare/unprepare driver (cons sql params))))
details (merge (:details (qp.store/database))
settings)]
(execute-presto-query details sql (context/canceled-chan context) respond)))
......@@ -390,11 +401,12 @@
[& args]
(apply driver.common/current-db-time args))
(defmethod driver/supports? [:presto :set-timezone] [_ _] true)
(defmethod driver/supports? [:presto :basic-aggregations] [_ _] true)
(defmethod driver/supports? [:presto :standard-deviation-aggregations] [_ _] true)
(defmethod driver/supports? [:presto :expressions] [_ _] true)
(defmethod driver/supports? [:presto :native-parameters] [_ _] true)
(defmethod driver/supports? [:presto :expression-aggregations] [_ _] true)
(defmethod driver/supports? [:presto :binning] [_ _] true)
(defmethod driver/supports? [:presto :foreign-keys] [_ _] true)
(doseq [[feature supported?] {:set-timezone true
:basic-aggregations true
:standard-deviation-aggregations true
:expressions true
:native-parameters true
:expression-aggregations true
:binning true
:foreign-keys true}]
(defmethod driver/supports? [:presto feature] [_ _] supported?))
......@@ -195,3 +195,29 @@
:parameters [{:type "date/single"
:target ["variable" ["template-tag" "date"]]
:value "2014-08-02"}]}))))))))
(deftest splice-strings-test
(mt/test-driver :presto
(let [query (mt/mbql-query venues
{:aggregation [[:count]]
:filter [:= $name "wow"]})]
(testing "The native query returned in query results should use user-friendly splicing"
(is (= (str "SELECT count(*) AS \"count\" "
"FROM \"default\".\"test_data_venues\" "
"WHERE \"default\".\"test_data_venues\".\"name\" = 'wow'")
(:query (qp/query->native-with-spliced-params query))
(-> (qp/process-query query) :data :native_form :query))))
(testing "When actually running the query we should use paranoid splicing and hex-encode strings"
(let [orig @#'presto/execute-presto-query
the-sql (atom nil)]
(with-redefs [presto/execute-presto-query (fn [details sql canceled-chan respond]
(reset! the-sql sql)
(with-redefs [presto/execute-presto-query orig]
(orig details sql canceled-chan respond)))]
(qp/process-query query)
(is (= (str "-- Metabase\n"
"SELECT count(*) AS \"count\" "
"FROM \"default\".\"test_data_venues\" "
"WHERE \"default\".\"test_data_venues\".\"name\" = from_utf8(from_hex('776f77'))")
@the-sql))))))))
(defproject metabase/sparksql-driver "1.0.0"
(defproject metabase/sparksql-driver "1.0.0-SNAPSHOT-1.2.2"
:min-lein-version "2.5.0"
:dependencies
......@@ -18,12 +18,9 @@
org.eclipse.jetty/jetty-util
org.slf4j/slf4j-log4j12
org.tukaani/xz]]
;; TODO - this is deprecated, seems like we'll want to upgrade to `org.apache.hive/hive-jdbc` in the future. Don't
;; thing it works with Spark SQL atm however
[org.apache.hive/hive-jdbc "1.2.1"
[org.apache.hive/hive-jdbc "1.2.2"
:exclusions
[#_com.google.guava/guava
commons-logging
[commons-logging
org.apache.curator/curator-framework
org.codehaus.jackson/jackson-jaxrs
org.codehaus.jackson/jackson-xc
......
info:
name: Metabase Spark SQL Driver
version: 1.0.0-SNAPSHOT
version: 1.0.0-SNAPSHOT-1.2.2
description: Allows Metabase to connect to Spark SQL databases.
driver:
- name: hive-like
......
(ns metabase.driver.hive-like
(:require [clojure.string :as str]
(:require [buddy.core.codecs :as codecs]
[honeysql.core :as hsql]
[java-time :as t]
[metabase.driver :as driver]
[metabase.driver.sql
[query-processor :as sql.qp]
[util :as sql.u]]
[metabase.driver.sql-jdbc
[connection :as sql-jdbc.conn]
[execute :as sql-jdbc.execute]
[sync :as sql-jdbc.sync]]
[metabase.driver.sql-jdbc.execute.legacy-impl :as legacy]
[metabase.driver.sql.query-processor :as sql.qp]
[metabase.driver.sql.util.unprepare :as unprepare]
[metabase.models.table :refer [Table]]
[metabase.util
......@@ -111,7 +113,10 @@
(defmethod sql.qp/->honeysql [:hive-like :replace]
[driver [_ arg pattern replacement]]
(hsql/call :regexp_replace (sql.qp/->honeysql driver arg) (sql.qp/->honeysql driver pattern) (sql.qp/->honeysql driver replacement)))
(hsql/call :regexp_replace
(sql.qp/->honeysql driver arg)
(sql.qp/->honeysql driver pattern)
(sql.qp/->honeysql driver replacement)))
(defmethod sql.qp/->honeysql [:hive-like :regex-match-first]
[driver [_ arg pattern]]
......@@ -139,9 +144,20 @@
[_ field]
(apply hsql/qualify (qualified-name-components field)))
(def ^:dynamic *param-splice-style*
"How we should splice params into SQL (i.e. 'unprepare' the SQL). Either `:friendly` (the default) or `:paranoid`.
`:friendly` makes a best-effort attempt to escape strings and generate SQL that is nice to look at, but should not
be considered safe against all SQL injection -- use this for 'convert to SQL' functionality. `:paranoid` hex-encodes
strings so SQL injection is impossible; this isn't nice to look at, so use this for actually running a query."
:friendly)
(defmethod unprepare/unprepare-value [:hive-like String]
[_ value]
(str \' (str/replace value "'" "\\\\'") \'))
[_ ^String s]
;; Because Spark SQL doesn't support parameterized queries (e.g. `?`) convert the entire String to hex and decode.
;; e.g. encode `abc` as `decode(unhex('616263'), 'utf-8')` to prevent SQL injection
(case *param-splice-style*
:friendly (str \' (sql.u/escape-sql s :backslashes) \')
:paranoid (format "decode(unhex('%s'), 'utf-8')" (codecs/bytes->hex (.getBytes s "UTF-8")))))
;; Hive/Spark SQL doesn't seem to like DATEs so convert it to a DATETIME first
(defmethod unprepare/unprepare-value [:hive-like LocalDate]
......
......@@ -7,6 +7,7 @@
[core :as hsql]
[helpers :as h]]
[metabase.driver :as driver]
[metabase.driver.hive-like :as hive-like]
[metabase.driver.sql
[query-processor :as sql.qp]
[util :as sql.u]]
......@@ -130,7 +131,8 @@
(let [inner-query (-> (assoc inner-query
:remark (qputil/query->remark outer-query)
:query (if (seq params)
(unprepare/unprepare driver (cons sql params))
(binding [hive-like/*param-splice-style* :paranoid]
(unprepare/unprepare driver (cons sql params)))
sql)
:max-rows (mbql.u/query->max-rows-limit outer-query))
(dissoc :params))
......@@ -165,7 +167,6 @@
(.close stmt)
(throw e)))))
(doseq [feature [:basic-aggregations
:binning
:expression-aggregations
......
(ns metabase.driver.sparksql-test
(:require [expectations :refer [expect]]
(:require [clojure.test :refer :all]
honeysql.types
[metabase
[query-processor :as qp]
[test :as mt]]
[metabase.driver.sql-jdbc.execute :as sql-jdbc.execute]
[metabase.driver.sql.query-processor :as sql.qp]))
;; Make sure our custom implementation of `apply-page` works the way we'd expect
(expect
{:select ["name" "id"]
:from [{:select [[:default.categories.name "name"]
[:default.categories.id "id"]
[{:s "row_number() OVER (ORDER BY `default`.`categories`.`id` ASC)"} :__rownum__]]
:from [:default.categories]
:order-by [[:default.categories.id :asc]]}]
:where [:> :__rownum__ 5]
:limit 5}
(sql.qp/apply-top-level-clause :sparksql :page
{:select [[:default.categories.name "name"] [:default.categories.id "id"]]
:from [:default.categories]
:order-by [[:default.categories.id :asc]]}
{:page {:page 2
:items 5}}))
(comment honeysql.types/keep-me)
(deftest apply-page-test
(testing "Make sure our custom implementation of `apply-page` works the way we'd expect"
(is (= {:select ["name" "id"]
:from [{:select [[:default.categories.name "name"]
[:default.categories.id "id"]
[#honeysql.types.SqlRaw{:s "row_number() OVER (ORDER BY `default`.`categories`.`id` ASC)"}
:__rownum__]]
:from [:default.categories]
:order-by [[:default.categories.id :asc]]}]
:where [:> :__rownum__ 5]
:limit 5}
(sql.qp/apply-top-level-clause :sparksql :page
{:select [[:default.categories.name "name"] [:default.categories.id "id"]]
:from [:default.categories]
:order-by [[:default.categories.id :asc]]}
{:page {:page 2
:items 5}})))))
(deftest splice-strings-test
(mt/test-driver :sparksql
(let [query (mt/mbql-query venues
{:aggregation [[:count]]
:filter [:= $name "wow"]})]
(testing "The native query returned in query results should use user-friendly splicing"
(is (= "SELECT count(*) AS `count` FROM `test_data`.`venues` `t1` WHERE `t1`.`name` = 'wow'"
(:query (qp/query->native-with-spliced-params query))
(-> (qp/process-query query) :data :native_form :query))))
(testing "When actually running the query we should use paranoid splicing and hex-encode strings"
(let [orig sql-jdbc.execute/prepared-statement
the-sql (atom nil)]
(with-redefs [sql-jdbc.execute/prepared-statement (fn [driver conn sql params]
(reset! the-sql sql)
(with-redefs [sql-jdbc.execute/prepared-statement orig]
(orig driver conn sql params)))]
(qp/process-query query)
(is (= (str "-- Metabase\n"
"SELECT count(*) AS `count` "
"FROM `test_data`.`venues` `t1` "
"WHERE `t1`.`name` = decode(unhex('776f77'), 'utf-8')")
@the-sql))))))))
......@@ -505,7 +505,8 @@
dispatch-on-initialized-driver
:hierarchy #'hierarchy)
(defmethod splice-parameters-into-native-query ::driver [_ query]
(defmethod splice-parameters-into-native-query ::driver
[_ query]
query)
......
......@@ -8,6 +8,7 @@
[substitute :as params.substitute]
[substitution :as param-substitution]]
[metabase.driver.sql.query-processor :as sql.qp]
[metabase.driver.sql.util.unprepare :as unprepare]
[potemkin :as p]))
(comment param-substitution/keep-me) ; this is so `cljr-clean-ns` and the liner don't remove the `:require`
......@@ -47,6 +48,15 @@
:query query
:params params)))
;; `:sql` drivers almost certainly don't need to override this method, and instead can implement
;; `unprepare/unprepare-value` for specific classes, or, in extereme cases, `unprepare/unprepare` itself.
(defmethod driver/splice-parameters-into-native-query :sql
[driver {:keys [params], sql :query, :as query}]
(cond-> query
(seq params)
(merge {:params nil
:query (unprepare/unprepare driver (cons sql params))})))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | Convenience Imports |
......
......@@ -100,3 +100,27 @@
;; otherwise if we haven't seen it record it as seen and move on to the next column
:else
(recur (conj already-seen alias) (conj acc [col alias]) more)))))
(defn escape-sql
"Escape single quotes in a SQL string. `escape-style` is either `:ansi` (escape a single quote with two single quotes)
or `:backslashes` (escape a single quote with a backslash).
(escape-sql \"Tito's Tacos\" :ansi) ; -> \"Tito''s Tacos\"
(escape-sql \"Tito's Tacos\" :backslashes) ; -> \"Tito\\'s Tacos\"
!!!! VERY IMPORTANT !!!!
DON'T RELY ON THIS FOR SANITIZING USER INPUT BEFORE RUNNING QUERIES!
For user input, *ALWAYS* pass parameters separately (e.g. using `?` in the SQL) where supported, or if unsupported,
encode the strings as hex and splice in something along the lines of `utf8_string(hex_decode(<hex-string>))`
instead. This is intended only for escaping trusted strings, or for generating the SQL equivalent version of an MBQL
query for debugging purposes or powering the 'convert to SQL' feature."
{:arglists '([s :ansi] [s :backslashes])}
^String [^String s escape-style]
(when s
(case escape-style
:ansi (str/replace s "'" "''")
:backslashes (-> s
(str/replace "\\" "\\\\")
(str/replace "'" "\\'")))))
(ns metabase.driver.sql.util.unprepare
"Utility functions for converting a prepared statement with `?` into a plain SQL query.
"Utility functions for converting a prepared statement with `?` param placeholders into a plain SQL query by splicing
params in place.
TODO - since this is no longer strictly a 'util' namespace (most `:sql-jdbc` drivers need to implement one or
TODO -- since this is no longer strictly a 'util' namespace (most `:sql-jdbc` drivers need to implement one or
methods from here) let's rename this `metabase.driver.sql.unprepare` when we get a chance."
(:require [clojure.string :as str]
[clojure.tools.logging :as log]
[java-time :as t]
[metabase.driver :as driver]
[metabase
[driver :as driver]
[util :as u]]
[metabase.driver.sql.util :as sql.u]
[metabase.util.i18n :refer [trs]])
(:import [java.time Instant LocalDate LocalDateTime LocalTime OffsetDateTime OffsetTime ZonedDateTime]))
......@@ -30,9 +34,9 @@
"NULL")
(defmethod unprepare-value [:sql String]
[_ value]
[_ s]
;; escape single-quotes like Cam's String -> Cam''s String
(str \' (str/replace value "'" "''") \'))
(str \' (sql.u/escape-sql s :ansi) \'))
(defmethod unprepare-value [:sql Boolean]
[_ value]
......@@ -84,14 +88,21 @@
:hierarchy #'driver/hierarchy)
(defmethod unprepare :sql [driver [sql & args]]
(reduce
(fn [sql arg]
;; Only match single question marks; do not match ones like `??` which JDBC converts to `?` to use as Postgres
;; JSON operators amongst other things.
;;
;; TODO - this is not smart enough to handle question marks in non argument contexts, for example if someone
;; were to have a question mark inside an identifier such as a table name. I think we'd have to parse the SQL in
;; order to handle those situations.
(str/replace-first sql #"(?<!\?)\?(?!\?)" (unprepare-value driver arg)))
(transduce
identity
(completing
(fn [sql arg]
;; Only match single question marks; do not match ones like `??` which JDBC converts to `?` to use as Postgres
;; JSON operators amongst other things.
;;
;; TODO - this is not smart enough to handle question marks in non argument contexts, for example if someone
;; were to have a question mark inside an identifier such as a table name. I think we'd have to parse the SQL in
;; order to handle those situations.
(let [v (str (unprepare-value driver arg))]
(log/tracef "Splice %s as %s" (pr-str arg) (pr-str v))
(str/replace-first sql #"(?<!\?)\?(?!\?)" (str/re-quote-replacement v))))
(fn [spliced-sql]
(log/tracef "Spliced %s\n-> %s" (u/colorize 'green (pr-str sql)) (u/colorize 'blue (pr-str spliced-sql)))
spliced-sql))
sql
args))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment