Skip to content
Snippets Groups Projects
Unverified Commit df181c41 authored by Cam Saul's avatar Cam Saul Committed by GitHub
Browse files

Spark SQL => Honey SQL 2 (#28394)

* Spark SQL => Honey SQL 2

* Fix stuff :wrench:
parent f437e248
No related branches found
No related tags found
No related merge requests found
......@@ -695,10 +695,8 @@
;; "athena"
;; "bigquery-cloud-sdk"
;; "h2"
;; "hive-like"
;; [:and "presto" [:? [:or "-common" "-jdbc"]]]
;; "snowflake"
;; "sparksql"
;; "sqlite"
;; "sqlserver"]
;; [:? "-test"]
......@@ -706,7 +704,7 @@
;; ".*"))
;;
;; Please keep this form updated when you change the generated regex! <3
{:pattern "^metabase\\.(?!util\\.(?:(?:honeysql-extensions)|(?:honey-sql-1)))(?!query-processor-test)(?!(?:(?:driver)|(?:test\\.data))\\.(?:(?:sql(?:-jdbc)?)|(?:(?:sql(?:-jdbc)?))|(?:athena)|(?:bigquery-cloud-sdk)|(?:h2)|(?:hive-like)|(?:presto(?:(?:(?:-common)|(?:-jdbc)))?)|(?:snowflake)|(?:sparksql)|(?:sqlite)|(?:sqlserver))(?:-test)?(?:(?:\\.)|(?:$))).*"
{:pattern "^metabase\\.(?!util\\.(?:(?:honeysql-extensions)|(?:honey-sql-1)))(?!query-processor-test)(?!(?:(?:driver)|(?:test\\.data))\\.(?:(?:sql(?:-jdbc)?)|(?:(?:sql(?:-jdbc)?))|(?:athena)|(?:bigquery-cloud-sdk)|(?:h2)|(?:presto(?:(?:(?:-common)|(?:-jdbc)))?)|(?:snowflake)|(?:sqlite)|(?:sqlserver))(?:-test)?(?:(?:\\.)|(?:$))).*"
:name honey-sql-2-namespaces}]
:config-in-ns
......
......@@ -2,7 +2,7 @@
(:require
[buddy.core.codecs :as codecs]
[clojure.string :as str]
[honeysql.format :as hformat]
[honey.sql :as sql]
[java-time :as t]
[metabase.driver :as driver]
[metabase.driver.sql-jdbc.connection :as sql-jdbc.conn]
......@@ -13,7 +13,7 @@
[metabase.driver.sql.util :as sql.u]
[metabase.driver.sql.util.unprepare :as unprepare]
[metabase.util.date-2 :as u.date]
[metabase.util.honeysql-extensions :as hx])
[metabase.util.honey-sql-2 :as h2x])
(:import
(java.sql ResultSet Types)
(java.time LocalDate OffsetDateTime ZonedDateTime)))
......@@ -73,126 +73,160 @@
#"map" :type/Dictionary
#".*" :type/*))
(defmethod sql.qp/honey-sql-version :hive-like
[_driver]
2)
(defmethod sql.qp/current-datetime-honeysql-form :hive-like
[_]
(hx/with-database-type-info :%now "timestamp"))
(h2x/with-database-type-info :%now "timestamp"))
(defmethod sql.qp/unix-timestamp->honeysql [:hive-like :seconds]
[_ _ expr]
(hx/->timestamp (hx/call :from_unixtime expr)))
(h2x/->timestamp [:from_unixtime expr]))
(defn- date-format [format-str expr]
(hx/call :date_format expr (hx/literal format-str)))
[:date_format expr (h2x/literal format-str)])
(defn- str-to-date [format-str expr]
(hx/->timestamp
(hx/call :from_unixtime
(hx/call :unix_timestamp
expr (hx/literal format-str)))))
(h2x/->timestamp [:from_unixtime [:unix_timestamp expr (h2x/literal format-str)]]))
(defn- trunc-with-format [format-str expr]
(str-to-date format-str (date-format format-str expr)))
(defmethod sql.qp/date [:hive-like :default] [_ _ expr] (hx/->timestamp expr))
(defmethod sql.qp/date [:hive-like :minute] [_ _ expr] (trunc-with-format "yyyy-MM-dd HH:mm" (hx/->timestamp expr)))
(defmethod sql.qp/date [:hive-like :minute-of-hour] [_ _ expr] (hx/call :minute (hx/->timestamp expr)))
(defmethod sql.qp/date [:hive-like :hour] [_ _ expr] (trunc-with-format "yyyy-MM-dd HH" (hx/->timestamp expr)))
(defmethod sql.qp/date [:hive-like :hour-of-day] [_ _ expr] (hx/call :hour (hx/->timestamp expr)))
(defmethod sql.qp/date [:hive-like :day] [_ _ expr] (trunc-with-format "yyyy-MM-dd" (hx/->timestamp expr)))
(defmethod sql.qp/date [:hive-like :day-of-month] [_ _ expr] (hx/call :dayofmonth (hx/->timestamp expr)))
(defmethod sql.qp/date [:hive-like :day-of-year] [_ _ expr] (hx/->integer (date-format "D" (hx/->timestamp expr))))
(defmethod sql.qp/date [:hive-like :month] [_ _ expr] (hx/call :trunc (hx/->timestamp expr) (hx/literal :MM)))
(defmethod sql.qp/date [:hive-like :month-of-year] [_ _ expr] (hx/call :month (hx/->timestamp expr)))
(defmethod sql.qp/date [:hive-like :quarter-of-year] [_ _ expr] (hx/call :quarter (hx/->timestamp expr)))
(defmethod sql.qp/date [:hive-like :year] [_ _ expr] (hx/call :trunc (hx/->timestamp expr) (hx/literal :year)))
(defrecord DateExtract [unit expr]
hformat/ToSql
(to-sql [_this]
(format "extract(%s FROM %s)" (name unit) (hformat/to-sql expr))))
(defmethod sql.qp/date [:hive-like :default] [_ _ expr] (h2x/->timestamp expr))
(defmethod sql.qp/date [:hive-like :minute] [_ _ expr] (trunc-with-format "yyyy-MM-dd HH:mm" (h2x/->timestamp expr)))
(defmethod sql.qp/date [:hive-like :minute-of-hour] [_ _ expr] [:minute (h2x/->timestamp expr)])
(defmethod sql.qp/date [:hive-like :hour] [_ _ expr] (trunc-with-format "yyyy-MM-dd HH" (h2x/->timestamp expr)))
(defmethod sql.qp/date [:hive-like :hour-of-day] [_ _ expr] [:hour (h2x/->timestamp expr)])
(defmethod sql.qp/date [:hive-like :day] [_ _ expr] (trunc-with-format "yyyy-MM-dd" (h2x/->timestamp expr)))
(defmethod sql.qp/date [:hive-like :day-of-month] [_ _ expr] [:dayofmonth (h2x/->timestamp expr)])
(defmethod sql.qp/date [:hive-like :day-of-year] [_ _ expr] (h2x/->integer (date-format "D" (h2x/->timestamp expr))))
(defmethod sql.qp/date [:hive-like :month] [_ _ expr] [:trunc (h2x/->timestamp expr) (h2x/literal :MM)])
(defmethod sql.qp/date [:hive-like :month-of-year] [_ _ expr] [:month (h2x/->timestamp expr)])
(defmethod sql.qp/date [:hive-like :quarter-of-year] [_ _ expr] [:quarter (h2x/->timestamp expr)])
(defmethod sql.qp/date [:hive-like :year] [_ _ expr] [:trunc (h2x/->timestamp expr) (h2x/literal :year)])
(def ^:private date-extract-units
"See https://spark.apache.org/docs/3.3.0/api/sql/#extract"
#{:year :y :years :yr :yrs
:yearofweek
:quarter :qtr
:month :mon :mons :months
:week :w :weeks
:day :d :days
:dayofweek :dow
:dayofweek_iso :dow_iso
:doy
:hour :h :hours :hr :hrs
:minute :m :min :mins :minutes
:second :s :sec :seconds :secs})
(defn- format-date-extract
[_fn [unit expr]]
{:pre [(contains? date-extract-units unit)]}
(let [[expr-sql & expr-args] (sql/format-expr expr {:nested true})]
(into [(format "extract(%s FROM %s)" (name unit) expr-sql)]
expr-args)))
(sql/register-fn! ::date-extract #'format-date-extract)
(defn- format-interval
"Interval actually supports more than just plain numbers, but that's all we currently need. See
https://spark.apache.org/docs/latest/sql-ref-literals.html#interval-literal"
[_fn [amount unit]]
{:pre [(number? amount)
;; other units are supported too but we're not currently supporting them.
(#{:year :month :week :day :hour :minute :second :millisecond} unit)]}
[(format "(interval '%d' %s)" (long amount) (name unit))])
(sql/register-fn! ::interval #'format-interval)
(defmethod sql.qp/date [:hive-like :day-of-week]
[driver _unit expr]
(sql.qp/adjust-day-of-week driver (-> (->DateExtract :dow (hx/->timestamp expr))
(hx/with-database-type-info "integer"))))
(sql.qp/adjust-day-of-week driver (-> [::date-extract :dow (h2x/->timestamp expr)]
(h2x/with-database-type-info "integer"))))
(defmethod sql.qp/date [:hive-like :week]
[driver _ expr]
[driver _unit expr]
(let [week-extract-fn (fn [expr]
(-> (hx/call :date_sub
(hx/+ (hx/->timestamp expr)
(hx/raw "interval '1' day"))
(->DateExtract :dow (hx/->timestamp expr)))
(hx/with-database-type-info "timestamp")))]
(-> [:date_sub
(h2x/+ (h2x/->timestamp expr)
[::interval 1 :day])
[::date-extract :dow (h2x/->timestamp expr)]]
(h2x/with-database-type-info "timestamp")))]
(sql.qp/adjust-start-of-week driver week-extract-fn expr)))
(defmethod sql.qp/date [:hive-like :week-of-year-iso] [_driver _ expr] (hx/call :weekofyear (hx/->timestamp expr)))
(defmethod sql.qp/date [:hive-like :week-of-year-iso]
[_driver _unit expr]
[:weekofyear (h2x/->timestamp expr)])
(defmethod sql.qp/date [:hive-like :quarter]
[_ _ expr]
(hx/call :add_months
(hx/call :trunc (hx/->timestamp expr) (hx/literal :year))
(hx/* (hx/- (hx/call :quarter (hx/->timestamp expr))
1)
3)))
[_driver _unit expr]
[:add_months
[:trunc (h2x/->timestamp expr) (h2x/literal :year)]
(h2x/* (h2x/- [:quarter (h2x/->timestamp expr)]
1)
3)])
(defmethod sql.qp/->honeysql [:hive-like :replace]
[driver [_ arg pattern replacement]]
(hx/call :regexp_replace
(sql.qp/->honeysql driver arg)
(sql.qp/->honeysql driver pattern)
(sql.qp/->honeysql driver replacement)))
[:regexp_replace
(sql.qp/->honeysql driver arg)
(sql.qp/->honeysql driver pattern)
(sql.qp/->honeysql driver replacement)])
(defmethod sql.qp/->honeysql [:hive-like :regex-match-first]
[driver [_ arg pattern]]
(hx/call :regexp_extract (sql.qp/->honeysql driver arg) (sql.qp/->honeysql driver pattern) 0))
[:regexp_extract (sql.qp/->honeysql driver arg) (sql.qp/->honeysql driver pattern) 0])
(defmethod sql.qp/->honeysql [:hive-like :median]
[driver [_ arg]]
(hx/call :percentile (sql.qp/->honeysql driver arg) 0.5))
[:percentile (sql.qp/->honeysql driver arg) 0.5])
(defmethod sql.qp/->honeysql [:hive-like :percentile]
[driver [_ arg p]]
(hx/call :percentile (sql.qp/->honeysql driver arg) (sql.qp/->honeysql driver p)))
[:percentile (sql.qp/->honeysql driver arg) (sql.qp/->honeysql driver p)])
(defmethod sql.qp/add-interval-honeysql-form :hive-like
[driver hsql-form amount unit]
(if (= unit :quarter)
(recur driver hsql-form (* amount 3) :month)
(hx/+ (hx/->timestamp hsql-form) (hx/raw (format "(INTERVAL '%d' %s)" (int amount) (name unit))))))
(h2x/+ (h2x/->timestamp hsql-form)
[::interval amount unit])))
(defmethod sql.qp/datetime-diff [:hive-like :year]
[driver _unit x y]
(hx/call :div (sql.qp/datetime-diff driver :month x y) 12))
[:div (sql.qp/datetime-diff driver :month x y) 12])
(defmethod sql.qp/datetime-diff [:hive-like :quarter]
[driver _unit x y]
(hx/call :div (sql.qp/datetime-diff driver :month x y) 3))
[:div (sql.qp/datetime-diff driver :month x y) 3])
(defmethod sql.qp/datetime-diff [:hive-like :month]
[_driver _unit x y]
(hx/->integer (hx/call :months_between y x)))
(h2x/->integer [:months_between y x]))
(defmethod sql.qp/datetime-diff [:hive-like :week]
[_driver _unit x y]
(hx/call :div (hx/call :datediff y x) 7))
[:div [:datediff y x] 7])
(defmethod sql.qp/datetime-diff [:hive-like :day]
[_driver _unit x y]
(hx/call :datediff y x))
[:datediff y x])
(defmethod sql.qp/datetime-diff [:hive-like :hour]
[driver _unit x y]
(hx/call :div (sql.qp/datetime-diff driver :second x y) 3600))
[:div (sql.qp/datetime-diff driver :second x y) 3600])
(defmethod sql.qp/datetime-diff [:hive-like :minute]
[driver _unit x y]
(hx/call :div (sql.qp/datetime-diff driver :second x y) 60))
[:div (sql.qp/datetime-diff driver :second x y) 60])
(defmethod sql.qp/datetime-diff [:hive-like :second]
[_driver _unit x y]
(hx/call :- (hx/call :unix_timestamp y) (hx/call :unix_timestamp x)))
[:- [:unix_timestamp y] [:unix_timestamp x]])
(def ^:dynamic *param-splice-style*
"How we should splice params into SQL (i.e. 'unprepare' the SQL). Either `:friendly` (the default) or `:paranoid`.
......
......@@ -2,8 +2,8 @@
(:require
[clojure.java.jdbc :as jdbc]
[clojure.string :as str]
[honeysql.core :as hsql]
[honeysql.helpers :as hh]
[honey.sql :as sql]
[honey.sql.helpers :as sql.helpers]
[medley.core :as m]
[metabase.connection-pool :as connection-pool]
[metabase.driver :as driver]
......@@ -22,7 +22,7 @@
[metabase.query-processor.store :as qp.store]
[metabase.query-processor.util :as qp.util]
[metabase.query-processor.util.add-alias-info :as add]
[metabase.util.honeysql-extensions :as hx])
[metabase.util.honey-sql-2 :as h2x])
(:import
(java.sql Connection ResultSet)))
......@@ -57,27 +57,36 @@
:else source-table)))]
(parent-method driver field-clause)))
(defn- format-over
"e.g. ROW_NUMBER() OVER (ORDER BY field DESC) AS __rownum__"
[_fn [expr partition]]
(let [[expr-sql & expr-args] (sql/format-expr expr {:nested true})
[partition-sql & partition-args] (sql/format-expr partition {:nested true})]
(into [(format "%s OVER %s" expr-sql partition-sql)]
cat
[expr-args
partition-args])))
(sql/register-fn! ::over #'format-over)
(defmethod sql.qp/apply-top-level-clause [:sparksql :page]
[_ _ honeysql-form {{:keys [items page]} :page}]
[_driver _clause honeysql-form {{:keys [items page]} :page}]
(let [offset (* (dec page) items)]
(if (zero? offset)
;; if there's no offset we can simply use limit
(hh/limit honeysql-form items)
(sql.helpers/limit honeysql-form items)
;; if we need to do an offset we have to do nesting to generate a row number and where on that
(let [over-clause (format "row_number() OVER (%s)"
(first (hsql/format (select-keys honeysql-form [:order-by])
:allow-dashed-names? true
:quoting :mysql)))]
(-> (apply hh/select (map last (:select honeysql-form)))
(hh/from (hh/merge-select honeysql-form [(hx/raw over-clause) :__rownum__]))
(hh/where [:> :__rownum__ offset])
(hh/limit items))))))
(let [over-clause [::over :%row_number (select-keys honeysql-form [:order-by])]]
(-> (apply sql.helpers/select (map last (:select honeysql-form)))
(sql.helpers/from (sql.helpers/select honeysql-form [over-clause :__rownum__]))
(sql.helpers/where [:> :__rownum__ [:inline offset]])
(sql.helpers/limit [:inline items]))))))
(defmethod sql.qp/apply-top-level-clause [:sparksql :source-table]
[driver _ honeysql-form {source-table-id :source-table}]
(let [{table-name :name, schema :schema} (qp.store/table source-table-id)]
(hh/from honeysql-form [(sql.qp/->honeysql driver (hx/identifier :table schema table-name))
(sql.qp/->honeysql driver (hx/identifier :table-alias source-table-alias))])))
(sql.helpers/from honeysql-form [(sql.qp/->honeysql driver (h2x/identifier :table schema table-name))
[(sql.qp/->honeysql driver (h2x/identifier :table-alias source-table-alias))]])))
;;; ------------------------------------------- Other Driver Method Impls --------------------------------------------
......@@ -203,4 +212,6 @@
[_driver _feature _database]
false)
(defmethod sql.qp/quote-style :sparksql [_] :mysql)
(defmethod sql.qp/quote-style :sparksql
[_driver]
:mysql)
......@@ -2,7 +2,7 @@
(:require [clojure.test :refer :all]
[metabase.driver.sql-jdbc.sync :as sql-jdbc.sync]))
(deftest database-type->base-type-test
(deftest ^:parallel database-type->base-type-test
(testing "make sure the various types we use for running tests are actually mapped to the correct DB type"
(are [db-type expected] (= expected
(sql-jdbc.sync/database-type->base-type :hive-like db-type))
......
(ns metabase.driver.sparksql-test
(:require [clojure.test :refer :all]
honeysql.types
[metabase.driver.sql-jdbc.execute :as sql-jdbc.execute]
[metabase.driver.sql.query-processor :as sql.qp]
[metabase.query-processor :as qp]
[metabase.test :as mt]))
(:require
[clojure.string :as str]
[clojure.test :refer :all]
[metabase.db.query :as mdb.query]
[metabase.driver.sparksql :as sparksql]
[metabase.driver.sql-jdbc.execute :as sql-jdbc.execute]
[metabase.driver.sql.query-processor :as sql.qp]
[metabase.query-processor :as qp]
[metabase.test :as mt]
#_{:clj-kondo/ignore [:discouraged-namespace]}
[metabase.util.honeysql-extensions :as hx]))
(comment honeysql.types/keep-me)
(use-fixtures :each (fn [thunk]
(binding [hx/*honey-sql-version* 2]
(thunk))))
(deftest apply-page-test
(deftest ^:parallel apply-page-test
(testing "Make sure our custom implementation of `apply-page` works the way we'd expect"
(is (= {:select ["name" "id"]
:from [{:select [[:default.categories.name "name"]
[:default.categories.id "id"]
[#honeysql.types.SqlRaw{:s "row_number() OVER (ORDER BY `default`.`categories`.`id` ASC)"}
:__rownum__]]
:from [:default.categories]
:order-by [[:default.categories.id :asc]]}]
:where [:> :__rownum__ 5]
:limit 5}
(sql.qp/apply-top-level-clause :sparksql :page
{:select [[:default.categories.name "name"] [:default.categories.id "id"]]
:from [:default.categories]
:order-by [[:default.categories.id :asc]]}
{:page {:page 2
:items 5}})))))
(let [hsql {:select [:name :id]
:from [{:select [[:default.categories.name :name]
[:default.categories.id :id]
[[::sparksql/over
:%row_number
{:order-by [[:default.categories.id :asc]]}]
:__rownum__]]
:from [:default.categories]
:order-by [[:default.categories.id :asc]]}]
:where [:> :__rownum__ [:inline 5]]
:limit [:inline 5]}]
(is (= hsql
(sql.qp/apply-top-level-clause :sparksql :page
{:select [[:default.categories.name :name] [:default.categories.id :id]]
:from [:default.categories]
:order-by [[:default.categories.id :asc]]}
{:page {:page 2
:items 5}})))
(is (= [["SELECT"
" `name`,"
" `id`"
"FROM"
" ("
" SELECT"
" `default`.`categories`.`name` AS `name`,"
" `default`.`categories`.`id` AS `id`,"
" ROW_NUMBER() OVER ("
" ORDER BY"
" `default`.`categories`.`id` ASC"
" ) AS `__rownum__`"
" FROM"
" `default`.`categories`"
" ORDER BY"
" `default`.`categories`.`id` ASC"
" )"
"WHERE"
" `__rownum__` > 5"
"LIMIT"
" 5"]]
(-> (sql.qp/format-honeysql :sparksql hsql)
vec
(update 0 mdb.query/format-sql :sparksql)
(update 0 str/split-lines)))))))
(deftest splice-strings-test
(mt/test-driver :sparksql
......@@ -32,7 +67,7 @@
{:aggregation [[:count]]
:filter [:= $name "wow"]})]
(testing "The native query returned in query results should use user-friendly splicing"
(is (= "SELECT count(*) AS `count` FROM `test_data`.`venues` `t1` WHERE `t1`.`name` = 'wow'"
(is (= "SELECT COUNT(*) AS `count` FROM `test_data`.`venues` AS `t1` WHERE `t1`.`name` = 'wow'"
(:query (qp/compile-and-splice-parameters query))
(-> (qp/process-query query) :data :native_form :query))))
......@@ -43,9 +78,13 @@
(reset! the-sql sql)
(with-redefs [sql-jdbc.execute/prepared-statement orig]
(orig driver conn sql params)))]
(qp/process-query query)
(is (= (str "-- Metabase\n"
"SELECT count(*) AS `count` "
"FROM `test_data`.`venues` `t1` "
"WHERE `t1`.`name` = decode(unhex('776f77'), 'utf-8')")
@the-sql))))))))
(is (=? {:status :completed}
(qp/process-query query)))
(is (= ["-- Metabase"
"SELECT"
" COUNT(*) AS `count`"
"FROM"
" `test_data`.`venues` AS `t1`"
"WHERE"
" `t1`.`name` = decode(unhex('776f77'), 'utf-8')"]
(str/split-lines (mdb.query/format-sql @the-sql :sparksql))))))))))
......@@ -2,12 +2,9 @@
(:require
[clojure.java.jdbc :as jdbc]
[clojure.string :as str]
[honeysql.core :as hsql]
[honeysql.format :as hformat]
[metabase.config :as config]
[metabase.driver :as driver]
[metabase.driver.ddl.interface :as ddl.i]
[metabase.driver.sql.query-processor :as sql.qp]
[metabase.driver.sql.util :as sql.u]
[metabase.driver.sql.util.unprepare :as unprepare]
[metabase.test.data.interface :as tx]
......@@ -61,13 +58,28 @@
(when (= context :db)
{:db (ddl.i/format-name driver database-name)})))
(defmethod ddl/insert-rows-ddl-statements :sparksql
(defprotocol ^:private Inline
(^:private ->inline [this]))
(extend-protocol Inline
nil
(->inline [_] nil)
Object
(->inline [obj]
[:raw (unprepare/unprepare-value :sparksql obj)]))
(defmethod ddl/insert-rows-honeysql-form :sparksql
[driver table-identifier row-or-rows]
[(unprepare/unprepare driver
(binding [hformat/*subquery?* false]
(hsql/format (ddl/insert-rows-honeysql-form driver table-identifier row-or-rows)
:quoting (sql.qp/quote-style driver)
:allow-dashed-names? false)))])
(let [rows (u/one-or-many row-or-rows)
rows (for [row rows]
(update-vals row
(fn [val]
(if (and (vector? val)
(= (first val) :metabase.driver.sql.query-processor/compiled))
val
(->inline val)))))]
((get-method ddl/insert-rows-honeysql-form :sql/test-extensions) driver table-identifier rows)))
(defmethod load-data/do-insert! :sparksql
[driver spec table-identifier row-or-rows]
......
......@@ -1242,8 +1242,8 @@
(defmethod apply-top-level-clause [:sql :page]
[_driver _top-level-clause honeysql-form {{:keys [items page]} :page}]
(-> honeysql-form
(sql.helpers/limit items)
(sql.helpers/offset (* items (dec page)))))
(sql.helpers/limit (inline-num items))
(sql.helpers/offset (inline-num (* items (dec page))))))
;;; -------------------------------------------------- source-table --------------------------------------------------
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment