Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
Metabase
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Engineering Digital Service
Metabase
Commits
365f2c3b
Unverified
Commit
365f2c3b
authored
2 years ago
by
Cam Saul
Committed by
GitHub
2 years ago
Browse files
Options
Downloads
Patches
Plain Diff
Make Athena test data loading more robust (#27538)
parent
aa68beea
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
modules/drivers/athena/src/metabase/driver/athena.clj
+15
-6
15 additions, 6 deletions
modules/drivers/athena/src/metabase/driver/athena.clj
modules/drivers/athena/test/metabase/test/data/athena.clj
+61
-7
61 additions, 7 deletions
modules/drivers/athena/test/metabase/test/data/athena.clj
with
76 additions
and
13 deletions
modules/drivers/athena/src/metabase/driver/athena.clj
+
15
−
6
View file @
365f2c3b
...
...
@@ -142,9 +142,14 @@
;;; ------------------------------------------------- date functions -------------------------------------------------
(
def
^
:dynamic
*loading-data*
"HACK! Whether we're loading data (e.g. in [[metabase.test.data.athena]]). We can't use `timestamp with time zone`
literals when loading data because Athena doesn't let you use a `timestamp with time zone` value for a `timestamp`
column, and you can only have `timestamp` columns when actually creating them."
false
)
(
defmethod
unprepare/unprepare-value
[
:athena
OffsetDateTime
]
[
_driver
t
]
#
_
(
format
"timestamp '%s %s %s'"
(
t/local-date
t
)
(
t/local-time
t
)
(
t/zone-offset
t
))
;; Timestamp literals do not support offsets, or at least they don't in `INSERT INTO ...` statements. I'm not 100%
;; sure what the correct thing to do here is then. The options are either:
;;
...
...
@@ -154,11 +159,14 @@
;;
;; For now I went with option (1) because it SEEMS like that's what Athena is doing. Not sure about this tho. We can
;; do something better when we figure out what's actually going on. -- Cam
(
let
[
t
(
u.date/with-time-zone-same-instant
t
(
t/zone-id
"UTC"
))]
(
format
"timestamp '%s %s'"
(
t/local-date
t
)
(
t/local-time
t
))))
(
if
*loading-data*
(
let
[
t
(
u.date/with-time-zone-same-instant
t
(
t/zone-id
"UTC"
))]
(
format
"timestamp '%s %s'"
(
t/local-date
t
)
(
t/local-time
t
)))
;; when not loading data we can actually use timestamp with offset info.
(
format
"timestamp '%s %s %s'"
(
t/local-date
t
)
(
t/local-time
t
)
(
t/zone-offset
t
))))
(
defmethod
unprepare/unprepare-value
[
:athena
ZonedDateTime
]
[
_
driver
t
]
[
driver
t
]
;; This format works completely fine for literals e.g.
;;
;; SELECT timestamp '2022-11-16 04:21:00 US/Pacific'
...
...
@@ -169,8 +177,9 @@
;; that have already been created for performance reasons. If you add a new dataset and it should work for
;; Athena (despite Athena only partially supporting TIMESTAMP WITH TIME ZONE) then you can use the commented out impl
;; to do it. That should work ok because it converts it to a UTC then to a LocalDateTime. -- Cam
#
_
(
unprepare/unprepare-value
driver
(
t/offset-date-time
t
))
(
format
"timestamp '%s %s %s'"
(
t/local-date
t
)
(
t/local-time
t
)
(
t/zone-id
t
)))
(
if
*loading-data*
(
unprepare/unprepare-value
driver
(
t/offset-date-time
t
))
(
format
"timestamp '%s %s %s'"
(
t/local-date
t
)
(
t/local-time
t
)
(
t/zone-id
t
))))
;;; for some evil reason Athena expects `OFFSET` *before* `LIMIT`, unlike every other database in the known universe; so
;;; we'll have to have a custom implementation of `:page` here and do our own version of `:offset` that comes before
...
...
This diff is collapsed.
Click to expand it.
modules/drivers/athena/test/metabase/test/data/athena.clj
+
61
−
7
View file @
365f2c3b
...
...
@@ -5,6 +5,7 @@
[
clojure.tools.logging
:as
log
]
[
metabase.config
:as
config
]
[
metabase.driver
:as
driver
]
[
metabase.driver.athena
:as
athena
]
[
metabase.driver.ddl.interface
:as
ddl.i
]
[
metabase.driver.sql-jdbc.connection
:as
sql-jdbc.conn
]
[
metabase.driver.sql-jdbc.sync
:as
sql-jdbc.sync
]
...
...
@@ -28,8 +29,8 @@
;; Athena doesn't support dashes in Table names... we'll just go ahead and convert them all to underscores, even for DB
;; names.
(
defmethod
ddl.i/format-name
:athena
[
driver
table-or-field-name
]
((
get-method
ddl.i/format-name
:sql-jdbc
)
driver
(
str/replace
table-or-field-name
#
"-"
"_"
)))
[
driver
database-or-
table-or-field-name
]
((
get-method
ddl.i/format-name
:sql-jdbc
)
driver
(
str/replace
database-or-
table-or-field-name
#
"-"
"_"
)))
(
defmethod
tx/dbdef->connection-details
:athena
[
driver
_context
{
:keys
[
database-name
]
,
:as
_dbdef
}]
...
...
@@ -58,11 +59,46 @@
([
_
db-name
table-name
]
[
db-name
table-name
])
([
_
db-name
table-name
field-name
]
[
db-name
table-name
field-name
]))
;;; INSTRUCTIONS FOR MANUALLY DROPPING AND RECREATING A DATABASE
;;;
;;; 1. Install the AWS CLI if you haven't done so already
;;;
;;; 2. Create a profile using the `MB_ATHENA_TEST_ACCESS_KEY`, `MB_ATHENA_TEST_SECRET_KEY`, and `MB_ATHENA_TEST_REGION`
;;; you're using to run tests
;;;
;;; ````
;;; aws configure --profile athena-ci
;;;
;;; 3. Delete the data from the `MB_ATHENA_TEST_S3_STAGING_DIR` S3 bucket. The data directory is the same as the dataset
;;; name you want to delete with hyphens replaced with underscores e.g. `sample-dataset` becomes `sample_dataset`
;;;
;;; ```
;;; aws s3 --profile athena-ci rm s3://metabase-ci-athena-results/sample_dataset --recursive
;;; ```
;;;
;;; 4. Delete the database from the Glue Console.
;;;
;;; ```
;;; aws glue --profile athena-ci delete-database --name sample_dataset
;;; ```
;;;
;;; 5. After this you can recreate the database normally using the test loading code. Note that you must
;;; enable [[*allow-database-creation*]] for this to work:
;;;
;;; ```
;;; (db/delete! 'Database :engine "athena", :name "sample-dataset")
;;; (binding [metabase.test.data.athena/*allow-database-creation* true]
;;; (metabase.driver/with-driver :athena
;;; (metabase.test/dataset sample-dataset
;;; (metabase.test/db))))
;;; ```
;;; Athena requires backtick-escaped database name for some queries
(
defmethod
sql.tx/drop-db-if-exists-sql
:athena
[
driver
{
:keys
[
database-name
]}]
(
log/warn
"Dropping an Athena database does not delete existing data. You may have to delete in manually from S3 and Glue Console."
)
(
format
"DROP DATABASE IF EXISTS `%s` CASCADE;"
(
ddl.i/format-name
driver
database-name
)))
[
_driver
_dbdef
]
(
log/warn
(
str
"You cannot delete a [non-Iceberg] Athena database using DDL statements. It has to be deleted "
"manually from S3 and the Glue Console. See documentation in [[metabase.test.data.athena]] for "
"instructions for doing this."
)))
(
defmethod
sql.tx/drop-table-if-exists-sql
:athena
[
driver
{
:keys
[
database-name
]}
{
:keys
[
table-name
]}]
...
...
@@ -160,7 +196,10 @@
;;; 200 is super slow, and the query ends up being too large around 500 rows... for some reason the same dataset
;;; orders table (about 17k rows) stalls out at row 10,000 when loading them 200 at a time. It works when you do 400
;;; at a time tho. This is just going to have to be ok for now.
(
binding
[
load-data/*chunk-size*
400
]
(
binding
[
load-data/*chunk-size*
400
;; This tells Athena to convert `timestamp with time zone` literals to `timestamp` because otherwise it gets
;; very fussy! See [[athena/*loading-data*]] for more info.
athena/*loading-data*
true
]
(
apply
load-data/load-data-add-ids-chunked!
args
)))
(
defn-
server-connection-details
[]
...
...
@@ -177,11 +216,26 @@
(
log/infof
"The following Athena databases have already been created: %s"
(
pr-str
(
sort
dbs
)))
dbs
)))
(
def
^
:private
^
:dynamic
*allow-database-creation*
"Whether to allow database creation. This is normally disabled to prevent people from accidentally loading duplicate
data into Athena or somehow stomping over existing databases and breaking CI. If you want to create a new dataset,
change this flag to true and run your code again so the data will be loaded normally. Set it back to false when
you're done."
false
)
(
defmethod
tx/create-db!
:athena
[
driver
{
:keys
[
database-name
]
,
:as
db-def
}
&
options
]
(
let
[
database-name
(
ddl.i/format-name
driver
database-name
)]
(
if
(
contains?
(
existing-databases
)
database-name
)
(
cond
(
contains?
(
existing-databases
)
database-name
)
(
log/infof
"Athena database %s already exists, skipping creation"
(
pr-str
database-name
))
(
not
*allow-database-creation*
)
(
log/fatalf
(
str
"Athena database creation is disabled: not creating database %s. Tests will likely fail.\n"
"See metabase.test.data.athena/*allow-database-creation* for more info."
)
(
pr-str
database-name
))
:else
(
do
(
log/infof
"Creating Athena database %s"
(
pr-str
database-name
))
;; call the default impl for SQL JDBC drivers
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment