Skip to content
Snippets Groups Projects
Unverified Commit 87f20155 authored by Cam Saul's avatar Cam Saul
Browse files

Increased support & tests for Query.constraints.max-results [ci drivers]

parent fa1f957f
No related branches found
No related tags found
No related merge requests found
Showing
with 388 additions and 57 deletions
......@@ -21,10 +21,20 @@ checksum_file="$driver_project_dir/target/checksum.txt"
######################################## CALCULATING CHECKSUMS ########################################
md5_command=''
if [ `command -v md5` ]; then
md5_command=md5
elif [ `command -v md5sum` ]; then
md5_command=md5sum
else
echo "Don't know what command to use to calculate md5sums."
exit -2
fi
# Calculate a checksum of all the driver source files. If we've already built the driver and the checksum is the same
# there's no need to build the driver a second time
calculate_checksum() {
find "$driver_project_dir" -name '*.clj' -or -name '*.yaml' | sort | cat | md5sum
find "$driver_project_dir" -name '*.clj' -or -name '*.yaml' | sort | cat | $md5_command
}
# Check whether the saved checksum for the driver sources from the last build is the same as the current one. If so,
......@@ -127,7 +137,7 @@ build_driver_uberjar() {
if [ ! -f "$target_jar" ]; then
echo "Error: could not find $target_jar. Build failed."
exit -1
return -1
fi
}
......@@ -150,25 +160,47 @@ save_checksum() {
echo "$checksum" > "$checksum_file"
}
copy_target_to_dest() {
# ok, finally, copy finished JAR to the resources dir
echo "Copying $target_jar -> $dest_location"
cp "$target_jar" "$dest_location"
}
# Runs all the steps needed to build the driver.
build_driver() {
delete_old_drivers
install_metabase_core
build_metabase_uberjar
build_parents
build_driver_uberjar
strip_and_compress
save_checksum
delete_old_drivers &&
install_metabase_core &&
build_metabase_uberjar &&
build_parents &&
build_driver_uberjar &&
strip_and_compress &&
save_checksum &&
copy_target_to_dest
}
######################################## PUTTING IT ALL TOGETHER ########################################
mkdir -p resources/modules
clean_local_repo() {
echo "Deleting existing installed metabase-core and driver dependencies..."
rm -rf ~/.m2/repository/metabase-core
rm -rf ~/.m2/repository/metabase/*-driver
}
if [ ! "$(checksum_is_same)" ]; then
retry_clean_build() {
echo "Building without cleaning failed. Retrying clean build..."
clean_local_repo
build_driver
fi
}
# ok, finally, copy finished JAR to the resources dir
echo "Copying $target_jar -> $dest_location"
cp "$target_jar" "$dest_location"
mkdir -p resources/modules
# run only a specific step with ./bin/build-driver.sh <driver> <step>
if [ $# -eq 2 ]; then
$2
# Build driver if checksum has changed
elif [ ! "$(checksum_is_same)" ]; then
build_driver || retry_clean_build
# Either way, always copy the target uberjar to the dest location
else
copy_target_to_dest
fi
......@@ -74,7 +74,7 @@ describe("Question", () => {
const results1 = await question.apiGetResults({ ignoreCache: true });
expect(results1[0]).toBeDefined();
expect(results1[0].data.rows.length).toEqual(10000);
expect(results1[0].data.rows.length).toEqual(2000);
question._parameterValues = { [templateTagId]: "5" };
const results2 = await question.apiGetResults({ ignoreCache: true });
......
......@@ -6,6 +6,7 @@ driver:
- name: hive-like
lazy-load: true
abstract: true
parent: sql-jdbc
- name: sparksql
display-name: Spark SQL
lazy-load: true
......
......@@ -14,7 +14,7 @@
[table :refer [Table]]]
[metabase.util.honeysql-extensions :as hx]
[toucan.db :as db])
(:import java.util.Date))
(:import java.sql.PreparedStatement java.util.Date))
(driver/register! :hive-like, :parent :sql-jdbc, :abstract? true)
......@@ -110,12 +110,17 @@
(defn- run-query
"Run the query itself."
[{sql :query, params :params, remark :remark} connection]
(let [sql (str "-- " remark "\n" (hx/unescape-dots sql))
statement (into [sql] params)
[columns & rows] (jdbc/query connection statement {:identifiers identity, :as-arrays? true})]
{:rows (or rows [])
:columns (map u/keyword->qualified-name columns)}))
[{sql :query, :keys [params remark max-rows]} connection]
(let [sql (str "-- " remark "\n" (hx/unescape-dots sql))
options {:identifiers identity
:as-arrays? true
:max-rows max-rows}]
(with-open [connection (jdbc/get-connection connection)]
(with-open [^PreparedStatement statement (jdbc/prepare-statement connection sql options)]
(let [statement (into [statement] params)
[columns & rows] (jdbc/query connection statement options)]
{:rows (or rows [])
:columns (map u/keyword->qualified-name columns)})))))
(defn run-query-without-timezone
"Runs the given query without trying to set a timezone"
......
......@@ -16,6 +16,7 @@
[execute :as sql-jdbc.execute]
[sync :as sql-jdbc.sync]]
[metabase.driver.sql.query-processor :as sql.qp]
[metabase.mbql.util :as mbql.u]
[metabase.models.field :refer [Field]]
[metabase.query-processor
[store :as qp.store]
......@@ -114,15 +115,17 @@
;; bound variables are not supported in Spark SQL (maybe not Hive either, haven't checked)
(defmethod driver/execute-query :sparksql
[driver {:keys [database settings], query :native, :as outer-query}]
(let [query (-> (assoc query :remark (qputil/query->remark outer-query))
(assoc :query (if (seq (:params query))
(hive-like/unprepare (cons (:query query) (:params query)))
(:query query)))
(let [query (-> (assoc query
:remark (qputil/query->remark outer-query)
:query (if (seq (:params query))
(hive-like/unprepare (cons (:query query) (:params query)))
(:query query))
:max-rows (mbql.u/query->max-rows-limit outer-query))
(dissoc :params))]
(sql-jdbc.execute/do-with-try-catch
(fn []
(let [db-connection (sql-jdbc.conn/db->pooled-connection-spec database)]
(hive-like/run-query-without-timezone driver settings db-connection query))))))
(fn []
(let [db-connection (sql-jdbc.conn/db->pooled-connection-spec database)]
(hive-like/run-query-without-timezone driver settings db-connection query))))))
(defmethod driver/supports? [:sparksql :basic-aggregations] [_ _] true)
(defmethod driver/supports? [:sparksql :binning] [_ _] true)
......
......@@ -7,6 +7,7 @@
[driver :as driver]
[util :as u]]
[metabase.driver.sql-jdbc.connection :as sql-jdbc.conn]
[metabase.mbql.util :as mbql.u]
[metabase.query-processor
[store :as qp.store]
[util :as qputil]]
......@@ -178,15 +179,15 @@
(defn- run-query
"Run the query itself."
[driver {sql :query, params :params, remark :remark}, ^TimeZone timezone, connection]
[driver {sql :query, :keys [params remark max-rows]}, ^TimeZone timezone, connection]
(let [sql (str "-- " remark "\n" (hx/unescape-dots sql))
statement (into [sql] params)
[columns & rows] (cancelable-run-query
connection sql params
{:identifiers identity
:as-arrays? true
:read-columns (read-columns driver (some-> timezone Calendar/getInstance))
:set-parameters (set-parameters-with-timezone timezone)})]
:set-parameters (set-parameters-with-timezone timezone)
:max-rows max-rows})]
{:rows (or rows [])
:columns (map u/keyword->qualified-name columns)}))
......@@ -264,7 +265,9 @@
(defn execute-query
"Process and run a native (raw SQL) QUERY."
[driver {settings :settings, query :native, :as outer-query}]
(let [query (assoc query :remark (qputil/query->remark outer-query))]
(let [query (assoc query
:remark (qputil/query->remark outer-query)
:max-rows (mbql.u/query->max-rows-limit outer-query))]
(do-with-try-catch
(fn []
(let [db-connection (sql-jdbc.conn/db->pooled-connection-spec (qp.store/database))]
......
......@@ -558,7 +558,11 @@
(s/optional-key :filter) Filter
(s/optional-key :limit) su/IntGreaterThanZero
(s/optional-key :order-by) (distinct-non-empty [OrderBy])
(s/optional-key :page) {:page su/IntGreaterThanOrEqualToZero
;; page = page num, starting with 1. items = number of items per page.
;; e.g.
;; {:page 1, :items 10} = items 1-10
;; {:page 2, :items 10} = items 11-20
(s/optional-key :page) {:page su/IntGreaterThanZero
:items su/IntGreaterThanZero}
;; Various bits of middleware add additonal keys, such as `fields-is-implicit?`, to record bits of state or pass
;; info to other pieces of middleware. Everyone else can ignore them.
......@@ -597,13 +601,21 @@
"Additional constraints added to a query limiting the maximum number of rows that can be returned. Mostly useful
because native queries don't support the MBQL `:limit` clause. For MBQL queries, if `:limit` is set, it will
override these values."
{;; maximum number of results to allow for a query with aggregations
(s/optional-key :max-results) su/IntGreaterThanOrEqualToZero
;; maximum number of results to allow for a query with no aggregations
(s/optional-key :max-results-bare-rows) su/IntGreaterThanOrEqualToZero
;; other Constraints might be used somewhere, but I don't know about them. Add them if you come across them for
;; documentation purposes
s/Keyword s/Any})
(s/constrained
{ ;; maximum number of results to allow for a query with aggregations. If `max-results-bare-rows` is unset, this
;; applies to all queries
(s/optional-key :max-results) su/IntGreaterThanOrEqualToZero
;; maximum number of results to allow for a query with no aggregations.
;; If set, this should be LOWER than `:max-results`
(s/optional-key :max-results-bare-rows) su/IntGreaterThanOrEqualToZero
;; other Constraints might be used somewhere, but I don't know about them. Add them if you come across them for
;; documentation purposes
s/Keyword s/Any}
(fn [{:keys [max-results max-results-bare-rows]}]
(if-not (core/and max-results max-results-bare-rows)
true
(core/>= max-results max-results-bare-rows)))
"max-results-bare-rows must be less or equal to than max-results"))
(def ^:private MiddlewareOptions
"Additional options that can be used to toggle middleware on or off."
......@@ -644,6 +656,8 @@
:question
:xlsx-download))
;; TODO - this schema is somewhat misleading because if you use a function like `qp/process-query-and-save-with-max!`
;; some of these keys (e.g. `:context`) are in fact required
(def Info
"Schema for query `:info` dictionary, which is used for informational purposes to record information about how a query
was executed in QueryExecution and other places. It is considered bad form for middleware to change its behavior
......@@ -683,6 +697,7 @@
`Card.dataset_query`."
(s/constrained
;; TODO - move database/virtual-id into this namespace so we don't have to use the magic number here
;; Something like `metabase.mbql.constants`
{:database (s/cond-pre (s/eq -1337) su/IntGreaterThanZero)
;; Type of query. `:query` = MBQL; `:native` = native. TODO - consider normalizing `:query` to `:mbql`
:type (s/enum :query :native)
......
......@@ -476,3 +476,34 @@
[aggregation->name-fn :- (s/pred fn?), aggregations :- [mbql.s/Aggregation]]
(-> (pre-alias-aggregations aggregation->name-fn aggregations)
uniquify-named-aggregations))
(defn query->max-rows-limit
"Calculate the absolute maximum number of results that should be returned by this query (MBQL or native), useful for
doing the equivalent of
java.sql.Statement statement = ...;
statement.setMaxRows(<max-rows-limit>).
to ensure the DB cursor or equivalent doesn't fetch more rows than will be consumed.
This is calculated as follows:
* If query is `MBQL` and has a `:limit` or `:page` clause, returns appropriate number
* If query has `:constraints` with `:max-results-bare-rows` or `:max-results`, returns the appropriate number
* `:max-results-bare-rows` is returned if set and Query does not have any aggregations
* `:max-results` is returned otherwise
* If none of the above are set, returns `nil`. In this case, you should use something like the Metabase QP's
`max-rows-limit`"
[{{:keys [max-results max-results-bare-rows]} :constraints
{limit :limit, aggregations :aggregation, {:keys [items]} :page} :query
query-type :type}]
(let [safe-min (fn [& args]
(when-let [args (seq (filter some? args))]
(reduce min args)))
mbql-limit (when (= query-type :query)
(safe-min items limit))
constraints-limit (or
(when-not aggregations
max-results-bare-rows)
max-results)]
(safe-min mbql-limit constraints-limit)))
......@@ -366,11 +366,24 @@
{:max-results max-results
:max-results-bare-rows max-results-bare-rows})
(defn- add-default-constraints
"Add default values of `:max-results` and `:max-results-bare-rows` to `:constraints` map `m`."
[m]
(merge
default-query-constraints
;; `:max-results-bare-rows` must be less than or equal to `:max-results`, so if someone sets `:max-results` but not
;; `:max-results-bare-rows` use the same value for both. Otherwise the default bare rows value could end up being
;; higher than the custom `:max-rows` value, causing an error
(when-let [max-results (:max-results m)]
{:max-results-bare-rows max-results})
m))
(s/defn process-query-and-save-with-max!
"Same as `process-query-and-save-execution!` but will include the default max rows returned as a constraint"
"Same as `process-query-and-save-execution!` but will include the default max rows returned as a constraint. (This
function is ulitmately what powers most API endpoints that run queries, including `POST /api/dataset`.)"
{:style/indent 1}
[query, options :- mbql.s/Info]
(process-query-and-save-execution! (assoc query :constraints default-query-constraints) options))
(process-query-and-save-execution! (update query :constraints add-default-constraints) options))
(s/defn process-query-without-save!
"Invokes `process-query` with info needed for the included remark."
......
(ns metabase.query-processor.middleware.limit
"Middleware that handles limiting the maximum number of rows returned by a query."
(:require (metabase.query-processor [interface :as i]
[util :as qputil])))
(:require [metabase.mbql.util :as mbql.u]
[metabase.query-processor
[interface :as i]
[util :as qputil]]))
(defn limit
"Add an implicit `limit` clause to MBQL queries without any aggregations, and limit the maximum number of rows that
can be returned in post-processing."
[qp]
(fn [{{:keys [max-results max-results-bare-rows]} :constraints, query-type :type, :as query}]
(let [query (cond-> query
(and (= query-type :query)
(qputil/query-without-aggregations-or-limits? query))
(assoc-in [:query :limit] (or max-results-bare-rows
max-results
i/absolute-max-results)))
results (qp query)]
(update results :rows (partial take (or max-results
i/absolute-max-results))))))
(fn [{query-type :type, :as query}]
(let [max-rows (or (mbql.u/query->max-rows-limit query)
i/absolute-max-results)
query (cond-> query
(and (= query-type :query)
(qputil/query-without-aggregations-or-limits? query))
(assoc-in [:query :limit] max-rows))
results (qp query)]
(update results :rows (partial take max-rows)))))
(ns metabase.driver.sql-jdbc.execute-test
(:require [clojure.java.jdbc :as jdbc]
[metabase.driver.sql-jdbc-test :as sql-jdbc-test]
[metabase.query-processor :as qp]
[metabase.test.data :as data]
[metabase.test.data.datasets :as datasets])
(:import java.sql.PreparedStatement))
(defn- do-with-max-rows [f]
(let [orig-query jdbc/query
max-rows (atom nil)]
(with-redefs [jdbc/query (fn [conn sql-params & [opts]]
(when (sequential? sql-params)
(let [[statement] sql-params]
(when (instance? PreparedStatement statement)
(reset! max-rows (.getMaxRows ^PreparedStatement statement)))))
(orig-query conn sql-params opts))]
(let [result (f)]
(or (when @max-rows
{:max-rows @max-rows})
result)))))
(defmacro ^:private with-max-rows
"Runs query in `body`, and returns the max rows that was set for (via `PreparedStatement.setMaxRows()`) for that
query. This number is the number we've instructed JDBC to limit the results to."
[& body]
`(do-with-max-rows (fn [] ~@body)))
;; We should be setting statement max rows based on appropriate limits when running queries (Snowflake runs tests with
(datasets/expect-with-drivers @sql-jdbc-test/sql-jdbc-drivers
{:max-rows 10}
(with-max-rows
(qp/process-query
{:database (data/id)
:type :query
:query {:source-table (data/id :venues)
:limit 10}})))
(datasets/expect-with-drivers @sql-jdbc-test/sql-jdbc-drivers
{:max-rows 5}
(with-max-rows
(qp/process-query
{:database (data/id)
:type :query
:query {:source-table (data/id :venues)
:limit 10}
:constraints {:max-results 5}})))
(datasets/expect-with-drivers @sql-jdbc-test/sql-jdbc-drivers
{:max-rows 15}
(with-max-rows
(qp/process-query
{:database (data/id)
:type :native
:native (qp/query->native {:database (data/id)
:type :query
:query {:source-table (data/id :venues)}})
:constraints {:max-results 15}})))
......@@ -19,7 +19,8 @@
(def ^:private venues-table (delay (Table (id :venues))))
(def ^:private users-name-field (delay (Field (id :users :name))))
(defonce sql-jdbc-drivers
(defonce ^{:doc "Set of drivers descending from `:sql-jdbc`, for test purposes (i.e. `expect-with-drivers`)"}
sql-jdbc-drivers
(delay
(du/profile "resolve @metabase.driver.sql-jdbc-test/sql-jdbc-drivers"
(set
......
......@@ -610,3 +610,100 @@
[:avg [:field-id 1]]
[:named [:sum [:field-id 1]] "sum_2"]
[:min [:field-id 1]]]))
;;; --------------------------------------------- query->max-rows-limit ----------------------------------------------
;; should return `:limit` if set
(expect
10
(mbql.u/query->max-rows-limit
{:database 1, :type :query, :query {:source-table 1, :limit 10}}))
;; should return `:page` items if set
(expect
5
(mbql.u/query->max-rows-limit
{:database 1, :type :query, :query {:source-table 1, :page {:page 1, :items 5}}}))
;; if `:max-results` is set return that
(expect
15
(mbql.u/query->max-rows-limit
{:database 1, :type :query, :query {:source-table 1}, :constraints {:max-results 15}}))
;; if `:max-results-bare-rows` is set AND query has no aggregations, return that
(expect
10
(mbql.u/query->max-rows-limit
{:database 1, :type :query, :query {:source-table 1}, :constraints {:max-results 5, :max-results-bare-rows 10}}))
(expect
10
(mbql.u/query->max-rows-limit
{:database 1
:type :native
:native {:query "SELECT * FROM my_table"}
:constraints {:max-results 5, :max-results-bare-rows 10}}))
;; if `:max-results-bare-rows` is set but query has aggregations, return `:max-results` instead
(expect
5
(mbql.u/query->max-rows-limit
{:database 1
:type :query
:query {:source-table 1, :aggregation [[:count]]}
:constraints {:max-results 5, :max-results-bare-rows 10}}))
;; if both `:limit` and `:page` are set (not sure makes sense), return the smaller of the two
(expect
5
(mbql.u/query->max-rows-limit
{:database 1, :type :query, :query {:source-table 1, :limit 10, :page {:page 1, :items 5}}}))
(expect
5
(mbql.u/query->max-rows-limit
{:database 1, :type :query, :query {:source-table 1, :limit 5, :page {:page 1, :items 10}}}))
;; if both `:limit` and `:constraints` are set, prefer the smaller of the two
(expect
5
(mbql.u/query->max-rows-limit
{:database 1
:type :query
:query {:source-table 1, :limit 5}
:constraints {:max-results 10}}))
(expect
10
(mbql.u/query->max-rows-limit
{:database 1
:type :query
:query {:source-table 1, :limit 15}
:constraints {:max-results 10}}))
;; since this query doesn't have an aggregation we should be using `max-results-bare-rows`
(expect
5
(mbql.u/query->max-rows-limit
{:database 1
:type :query
:query {:source-table 1, :limit 10}
:constraints {:max-results 15, :max-results-bare-rows 5}}))
;; add an aggregation, and `:max-results` is used instead; since `:limit` is lower, return that
(expect
10
(mbql.u/query->max-rows-limit
{:database 1
:type :query
:query {:source-table 1, :limit 10, :aggregation [[:count]]}
:constraints {:max-results 15, :max-results-bare-rows 5}}))
;; if nothing is set return `nil`
(expect
nil
(mbql.u/query->max-rows-limit
{:database 1
:type :query
:query {:source-table 1}}))
(ns metabase.query-processor-test.constraints-test
"Test for MBQL `:constraints`"
(:require [metabase
[query-processor :as qp]
[query-processor-test :as qp.test]]
[metabase.test.data :as data]))
(defn- mbql-query []
{:database (data/id)
:type :query
:query {:source-table (data/id :venues)
:fields [[:field-id (data/id :venues :name)]]
:order-by [[:asc [:field-id (data/id :venues :id)]]]}})
(defn- native-query []
(qp/query->native (mbql-query)))
;; Do `:max-results` constraints affect the number of rows returned by native queries?
(qp.test/expect-with-non-timeseries-dbs
[["Red Medicine"]
["Stout Burgers & Beers"]
["The Apple Pan"]
["Wurstküche"]
["Brite Spot Family Restaurant"]]
(qp.test/rows
(qp/process-query
{:database (data/id)
:type :native
:native (native-query)
:constraints {:max-results 5}})))
;; does it also work when running via `process-query-and-save-with-max!`, the function that powers endpoints like
;; `POST /api/dataset`?
(qp.test/expect-with-non-timeseries-dbs
[["Red Medicine"]
["Stout Burgers & Beers"]
["The Apple Pan"]
["Wurstküche"]
["Brite Spot Family Restaurant"]]
(qp.test/rows
(qp/process-query-and-save-with-max!
{:database (data/id)
:type :native
:native (native-query)
:constraints {:max-results 5}}
{:context :question})))
;; constraints should override MBQL `:limit` if lower
(qp.test/expect-with-non-timeseries-dbs
[["Red Medicine"]
["Stout Burgers & Beers"]
["The Apple Pan"]]
(qp.test/rows
(qp/process-query
(-> (mbql-query)
(assoc-in [:query :limit] 10)
(assoc :constraints {:max-results 3})))))
;; However if `:limit` is lower than `:constraints` we should not return more than the `:limit`
(qp.test/expect-with-non-timeseries-dbs
[["Red Medicine"]
["Stout Burgers & Beers"]
["The Apple Pan"]
["Wurstküche"]
["Brite Spot Family Restaurant"]]
(qp.test/rows
(qp/process-query
(-> (mbql-query)
(assoc-in [:query :limit] 5)
(assoc :constraints {:max-results 10})))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment