Skip to content
Snippets Groups Projects
Unverified Commit a31a7d08 authored by Pawit Pornkitprasan's avatar Pawit Pornkitprasan Committed by GitHub
Browse files

Fix loading sample dataset in Redshift (#18089)

- `TEXT` in Redshift is equivalent to `VARCHAR(256)`, use
  `VARCHAR(1024)` instead
- Unskip all tests skipped on Redshift due to sample dataset issue
- Add explicit `:order-by` to `join-source-queries-with-joins-test`
  to make it pass in Redshift as Redshift does not preserve order
  after join (like a lot of other distributed query engines)
- Add `VACUUM` and `ANALYZE` call after INSERT to improve performance
parent a750e4a5
No related merge requests found
Showing
with 33 additions and 32 deletions
......@@ -915,8 +915,6 @@
(deftest pivot-query-test
(mt/test-drivers (disj
(mt/normal-drivers-with-feature :foreign-keys :nested-queries :left-join)
;; sample-dataset doesn't work on Redshift yet -- see #14784
:redshift
;; this test relies on a FK relation between $product_id->products.category, so skip for Presto
;; JDBC, because that driver doesn't support resolving FKs from the JDBC metadata
:presto-jdbc)
......
......@@ -4,6 +4,7 @@
[metabase.driver.sql-jdbc.sync :as sql-jdbc.sync]
[metabase.test.data.interface :as tx]
[metabase.test.data.sql :as sql.tx]
[metabase.test.data.sql-jdbc.load-data :as load-data]
[metabase.test.data.sql.ddl :as ddl]
[metabase.util :as u]))
......@@ -18,7 +19,11 @@
:type/Decimal "DECIMAL"
:type/Float "FLOAT8"
:type/Integer "INTEGER"
:type/Text "TEXT"}]
;; Use VARCHAR because TEXT in Redshift is VARCHAR(256)
;; https://docs.aws.amazon.com/redshift/latest/dg/r_Character_types.html#r_Character_types-varchar-or-character-varying
;; But don't use VARCHAR(MAX) either because of performance impact
;; https://docs.aws.amazon.com/redshift/latest/dg/c_best-practices-smallest-column-size.html
:type/Text "VARCHAR(1024)"}]
(defmethod sql.tx/field-base-type->sql-type [:redshift base-type] [_ _] database-type))
;; If someone tries to run Time column tests with Redshift give them a heads up that Redshift does not support it
......@@ -65,6 +70,15 @@
[& args]
(apply sql.tx/drop-table-if-exists-cascade-sql args))
(defmethod load-data/load-data! :redshift
[driver {:keys [database-name], :as dbdef} {:keys [table-name], :as tabledef}]
(load-data/load-data-all-at-once! driver dbdef tabledef)
(let [table-identifier (sql.tx/qualify-and-quote :redshift database-name table-name)
spec (sql-jdbc.conn/connection-details->spec :redshift @db-connection-details)]
;; VACUUM and ANALYZE after insert to improve performance (according to doc)
(jdbc/execute! spec (str "VACUUM " table-identifier) {:transaction? false})
(jdbc/execute! spec (str "ANALYZE " table-identifier) {:transaction? false})))
;;; Create + destroy the schema used for this test session
(defn execute! [format-string & args]
......
......@@ -319,7 +319,7 @@
;; this only works on a handful of databases -- most of them don't allow you to ask for a Field that isn't in
;; the GROUP BY expression
(when (#{:bigquery :mongo :presto :redshift :h2 :sqlite} metabase.driver/*driver*)
(when (#{:bigquery :mongo :presto :h2 :sqlite} metabase.driver/*driver*)
(testing "with an added expression"
;; the added expression is coming back in this query because it is explicitly included in `:fields` -- see
;; comments on `metabase.query-processor.pivot-test/pivots-should-not-return-expressions-test`.
......
(ns metabase.api.pivots
(:require [metabase.test :as mt]))
;; Redshift takes A LONG TIME to insert the sample-dataset, so do not
;; run these tests against Redshift (for now?)
;;TODO: refactor Redshift testing to support a bulk COPY or something
;; other than INSERT INTO statements
(defn applicable-drivers
"Drivers that these pivot table tests should run on"
[]
(disj (mt/normal-drivers-with-feature :expressions :left-join) :redshift))
(mt/normal-drivers-with-feature :expressions :left-join))
(defn pivot-query
"A basic pivot table query"
......
......@@ -86,8 +86,7 @@
(deftest two-case-functions-test
(testing "We should support expressions with two case statements (#15107)"
;; sample-dataset doesn't work on Redshift yet -- see #14784
(mt/test-drivers (disj (mt/normal-drivers-with-feature :expressions) :redshift)
(mt/test-drivers (mt/normal-drivers-with-feature :expressions)
(mt/dataset sample-dataset
(is (= [[1
"1018947080336"
......
......@@ -521,8 +521,7 @@
(deftest join-source-queries-with-joins-test
(testing "Should be able to join against source queries that themselves contain joins (#12928)"
;; sample-dataset doesn't work on Redshift yet -- see #14784
(mt/test-drivers (disj (mt/normal-drivers-with-feature :nested-queries :left-join :foreign-keys) :redshift)
(mt/test-drivers (mt/normal-drivers-with-feature :nested-queries :left-join :foreign-keys)
(mt/dataset sample-dataset
(testing "(#12928)"
(let [query (mt/mbql-query orders
......@@ -550,6 +549,8 @@
:alias "P2"}]
:aggregation [[:avg $reviews.rating]]
:breakout [&P2.products.category]}}]
:order-by [[:asc &P1.products.category]
[:asc [:field %people.source {:join-alias "People"}]]]
:limit 2})]
(is (= [["Doohickey" "Affiliate" 783 "Doohickey" 3]
["Doohickey" "Facebook" 816 "Doohickey" 3]]
......
......@@ -275,8 +275,7 @@
:expressions {"double-price" [:* $price 2]}})))))))
#_(deftest multiple-cumulative-sums-test
;; sample-dataset doesn't work on Redshift yet -- see #14784
(mt/test-drivers (disj (mt/normal-drivers-with-feature :expression-aggregations) :redshift)
(mt/test-drivers (mt/normal-drivers-with-feature :expression-aggregations)
(testing "The results of divide or multiply two CumulativeSum should be correct (#15118)"
(mt/dataset sample-dataset
(is (= [["2016-01-01T00:00:00Z" 3236 2458.0 5694.0 1]
......
......@@ -373,8 +373,7 @@
[:field "min" {:base-type :type/Number}]]}})))))))
(deftest fk-field-and-duplicate-names-test
;; Redshift hangs on sample-dataset -- See #14784
(mt/test-drivers (disj (mt/normal-drivers-with-feature :expressions :foreign-keys) :redshift)
(mt/test-drivers (mt/normal-drivers-with-feature :expressions :foreign-keys)
(testing "Expressions with `fk->` fields and duplicate names should work correctly (#14854)"
(mt/dataset sample-dataset
(let [results (mt/run-mbql-query orders
......
......@@ -139,8 +139,7 @@
:filter [:= $receiver_id->users.name "Rasta Toucan"]}))))))))
(deftest implicit-joins-with-expressions-test
;; Redshift excluded for now since the sample dataset seems to hang for Redshift -- see #14784
(mt/test-drivers (disj (mt/normal-drivers-with-feature :foreign-keys :expressions) :redshift)
(mt/test-drivers (mt/normal-drivers-with-feature :foreign-keys :expressions)
(testing "Should be able to run query with multiple implicit joins and breakouts"
(mt/dataset sample-dataset
(is (= [["Doohickey" "Facebook" "2019-01-01T00:00:00Z" 0 263]
......
......@@ -1051,8 +1051,7 @@
:value "Widget"}]})))))))
(deftest nested-queries-with-expressions-and-joins-test
;; sample-dataset doesn't work on Redshift yet -- see #14784
(mt/test-drivers (disj (mt/normal-drivers-with-feature :foreign-keys :nested-queries :left-join) :redshift)
(mt/test-drivers (mt/normal-drivers-with-feature :foreign-keys :nested-queries :left-join)
(mt/dataset sample-dataset
(testing "Do nested queries in combination with joins and expressions still work correctly? (#14969)"
;; not sure why Snowflake has slightly different results
......@@ -1104,7 +1103,7 @@
:fk-field-id %product_id}]}))))))))
(deftest multi-level-aggregations-with-post-aggregation-filtering-test
(mt/test-drivers (disj (mt/normal-drivers-with-feature :foreign-keys :nested-queries) :redshift) ; sample-dataset doesn't work on Redshift yet -- see #14784
(mt/test-drivers (mt/normal-drivers-with-feature :foreign-keys :nested-queries)
(testing "Multi-level aggregations with filter is the last section (#14872)"
(mt/dataset sample-dataset
;; not 100% sure why Snowflake has slightly different results
......@@ -1133,7 +1132,7 @@
:filter [:> *sum/Float 100]}))))))))
(deftest date-range-test
(mt/test-drivers (disj (mt/normal-drivers-with-feature :foreign-keys :nested-queries) :redshift) ; sample-dataset doesn't work on Redshift yet -- see #14784
(mt/test-drivers (mt/normal-drivers-with-feature :foreign-keys :nested-queries)
(testing "Date ranges should work the same in nested queries as is regular queries (#15352)"
(mt/dataset sample-dataset
(let [q1 (mt/mbql-query orders
......@@ -1181,13 +1180,11 @@
(deftest nested-query-with-expressions-test
(testing "Nested queries with expressions should work in top-level native queries (#12236)"
(mt/test-drivers (disj (mt/normal-drivers-with-feature
:nested-queries
:basic-aggregations
:expression-aggregations
:foreign-keys)
;; sample-dataset doesn't work on Redshift yet -- see #14784
:redshift)
(mt/test-drivers (mt/normal-drivers-with-feature
:nested-queries
:basic-aggregations
:expression-aggregations
:foreign-keys)
(mt/dataset sample-dataset
(mt/with-temp Card [card {:dataset_query (mt/mbql-query orders
{:filter [:between $total 30 60]
......
......@@ -202,8 +202,7 @@
))))
(deftest remappings-with-implicit-joins-test
;; Redshift excluded for now since the sample dataset seems to hang for Redshift.
(mt/test-drivers (disj (mt/normal-drivers-with-feature :foreign-keys :nested-queries) :redshift)
(mt/test-drivers (mt/normal-drivers-with-feature :foreign-keys :nested-queries)
(testing "Queries with implicit joins should still work when FK remaps are used (#13641)"
(mt/dataset sample-dataset
(mt/with-column-remappings [orders.product_id products.title]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment