Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
Metabase
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Engineering Digital Service
Metabase
Commits
a767fd31
Unverified
Commit
a767fd31
authored
1 year ago
by
Chris Truter
Committed by
GitHub
1 year ago
Browse files
Options
Downloads
Patches
Plain Diff
Support appending new CSV fields (#39951) (#40220)
parent
7a4918d0
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/metabase/driver/h2.clj
+8
-0
8 additions, 0 deletions
src/metabase/driver/h2.clj
src/metabase/upload.clj
+72
-53
72 additions, 53 deletions
src/metabase/upload.clj
test/metabase/upload_test.clj
+49
-22
49 additions, 22 deletions
test/metabase/upload_test.clj
with
129 additions
and
75 deletions
src/metabase/driver/h2.clj
+
8
−
0
View file @
a767fd31
...
...
@@ -580,3 +580,11 @@
[
_driver
]
;; http://www.h2database.com/html/advanced.html#limits_limitations
256
)
(
defmethod
driver/add-columns!
:h2
[
driver
db-id
table-name
column-definitions
&
{
:as
settings
}]
;; Workaround for the fact that H2 uses different syntax for adding multiple columns, which is difficult to
;; produce with HoneySQL. As a simpler workaround we instead break it up into single column statements.
(
let
[
f
(
get-method
driver/add-columns!
:sql-jdbc
)]
(
doseq
[[
k
v
]
column-definitions
]
(
f
driver
db-id
table-name
{
k
v
}
settings
))))
This diff is collapsed.
Click to expand it.
src/metabase/upload.clj
+
72
−
53
View file @
a767fd31
...
...
@@ -570,28 +570,33 @@
(
defn-
base-type->upload-type
"Returns the most specific upload type for the given base type."
[
base-type
]
(
condp
#
(
isa?
%2
%1
)
base-type
:type/Float
::float
:type/BigInteger
::int
:type/Integer
::int
:type/Boolean
::boolean
:type/DateTimeWithTZ
::offset-datetime
:type/DateTime
::datetime
:type/Date
::date
:type/Text
::text
))
(
when
base-type
(
condp
#
(
isa?
%2
%1
)
base-type
:type/Float
::float
:type/BigInteger
::int
:type/Integer
::int
:type/Boolean
::boolean
:type/DateTimeWithTZ
::offset-datetime
:type/DateTime
::datetime
:type/Date
::date
:type/Text
::text
)))
(
defn-
not-blank
[
s
]
(
when-not
(
str/blank?
s
)
s
))
(
defn-
extra-and-missing-error-markdown
[
extra
missing
]
(
->>
[[(
tru
"The CSV file contains extra columns that are not in the table:"
)
extra
]
[(
tru
"The CSV file is missing columns that are in the table:"
)
missing
]]
(
keep
(
fn
[[
header
columns
]]
(
when
(
seq
columns
)
(
str/join
"\n"
(
cons
header
(
map
#
(
str
"- "
%
)
columns
))))))
(
str/join
"\n\n"
)
(
not-blank
)))
(
when
(
seq
missing
)
(
->>
[[(
tru
"The CSV file is missing columns that are in the table:"
)
missing
]
;; Even though we allow new columns to be implicitly added by uploads, we mention then in the error messages
;; for missing fields as a common case will be the misspelling of names. Seeing the actual and expected
;; names together could help customers spot the root cause more easily.
[(
tru
"There are new columns in the CSV file that are not in the table:"
)
extra
]]
(
keep
(
fn
[[
header
columns
]]
(
when
(
seq
columns
)
(
str/join
"\n"
(
cons
header
(
map
#
(
str
"- "
%
)
columns
))))))
(
str/join
"\n\n"
)
(
not-blank
))))
(
def
^
:private
allowed-type-upgrades
"A mapping of which types a column can be implicitly relaxed to, based on the content of appended values."
...
...
@@ -607,35 +612,50 @@
;; Assumes table-cols are unique when normalized
(
let
[
normalized-field-names
(
keys
fields-by-normed-name
)
normalized-header
(
map
normalize-column-name
header
)
[
extra
missing
_both
]
(
data/diff
(
set
normalized-header
)
(
set
normalized-field-names
))]
[
extra
missing
_both
]
(
data/diff
(
set
normalized-header
)
(
set
normalized-field-names
))]
;; check for duplicates
(
when
(
some
#
(
<
1
%
)
(
vals
(
frequencies
normalized-header
)))
(
throw
(
ex-info
(
tru
"The CSV file contains duplicate column names."
)
{
:status-code
422
})))
(
when
(
or
extra
missing
)
(
let
[
error-message
(
extra-and-missing-error-markdown
extra
missing
)]
(
throw
(
ex-info
error-message
{
:status-code
422
}))))))
(
when-let
[
error-message
(
extra-and-missing-error-markdown
extra
missing
)]
(
throw
(
ex-info
error-message
{
:status-code
422
})))))
(
defn-
matching-or-upgradable?
[
current-type
relaxed-type
]
(
or
(
=
current-type
relaxed-type
)
(
or
(
nil?
current-type
)
(
=
current-type
relaxed-type
)
(
when-let
[
f
(
allowed-type-upgrades
current-type
)]
(
f
relaxed-type
))))
(
defn-
changed-field->new-type
"Given some fields and old and new types, filter out fields with unchanged types, then pair with the new types."
[
fields
old-types
new-types
]
(
let
[
new-if-changed
#
(
when
(
not=
%1
%2
)
%2
)]
(
->>
(
map
new-if-changed
old-types
new-types
)
(
map
vector
fields
)
(
filter
second
)
(
into
{}))))
(
defn-
alter-columns!
[
driver
database
table
field->new-type
]
(
driver/alter-columns!
driver
(
:id
database
)
(
table-identifier
table
)
(
m/map-kv
(
fn
[
field
column-type
]
[(
keyword
(
:name
field
))
(
driver/upload-type->database-type
driver
column-type
)])
field->new-type
)))
(
defn-
field-changes
"Given existing and newly inferred types for the given `field-names`, calculate which fields need to be added or updated, along with their new types."
[
field-names
existing-types
new-types
]
(
reduce
(
fn
[
m
[
f
e
n
]]
(
cond
(
nil?
e
)
(
assoc-in
m
[
:added
f
]
n
)
(
not=
e
n
)
(
assoc-in
m
[
:updated
f
]
n
)
:else
m
))
{
:added
{}
,
:updated
{}}
(
map
vector
field-names
existing-types
new-types
)))
(
defn-
field->db-type
[
driver
field->col-type
]
(
m/map-kv
(
fn
[
field-name
col-type
]
[(
keyword
field-name
)
(
driver/upload-type->database-type
driver
col-type
)])
field->col-type
))
(
defn-
add-columns!
[
driver
database
table
field->type
&
args
]
(
when
(
seq
field->type
)
(
apply
driver/add-columns!
driver
(
:id
database
)
(
table-identifier
table
)
(
field->db-type
driver
field->type
)
args
)))
(
defn-
alter-columns!
[
driver
database
table
field->new-type
&
args
]
(
when
(
seq
field->new-type
)
(
apply
driver/alter-columns!
driver
(
:id
database
)
(
table-identifier
table
)
(
field->db-type
driver
field->new-type
)
args
)))
(
defn-
append-csv!*
[
database
table
file
]
...
...
@@ -652,24 +672,25 @@
(
not
(
contains?
normed-name->field
auto-pk-column-name
)))
_
(
check-schema
(
dissoc
normed-name->field
auto-pk-column-name
)
header
)
settings
(
upload-parsing/get-settings
)
old-
column-types
(
map
(
comp
base-type->upload-type
:base_type
normed-name->field
)
normed-header
)
old-
types
(
map
(
comp
base-type->upload-type
:base_type
normed-name->field
)
normed-header
)
;; in the happy, and most common, case all the values will match the existing types
;; for now we just plan for the worst and perform a fairly expensive operation to detect any type changes
;; we can come back and optimize this to an optimistic-with-fallback approach later.
detected-types
(
column-types-from-rows
settings
old-column-types
rows
)
new-column-types
(
map
#
(
if
(
matching-or-upgradable?
%1
%2
)
%2
%1
)
old-column-types
detected-types
)
_
(
when
(
and
(
not=
old-column-types
new-column-types
)
;; if we cannot coerce all the columns, don't bother coercing any of them
;; we will instead throw an error when we try to parse as the old type
(
=
detected-types
new-column-types
))
(
let
[
fields
(
map
normed-name->field
normed-header
)]
(
->>
(
changed-field->new-type
fields
old-column-types
detected-types
)
(
alter-columns!
driver
database
table
))))
detected-types
(
column-types-from-rows
settings
old-types
rows
)
new-types
(
map
#
(
if
(
matching-or-upgradable?
%1
%2
)
%2
%1
)
old-types
detected-types
)
;; avoid any schema modification unless we are able to fully upgrade to supporting the given file
;; choosing to not upgrade means that we will defer failure until we hit the first value that cannot
;; be parsed as its previous type - there is scope to improve these error messages in the future.
modify-schema?
(
and
(
not=
old-types
new-types
)
(
=
detected-types
new-types
))
_
(
when
modify-schema?
(
let
[
changes
(
field-changes
normed-header
old-types
new-types
)]
(
add-columns!
driver
database
table
(
:added
changes
))
(
alter-columns!
driver
database
table
(
:updated
changes
))))
;; this will fail if any of our required relaxations were rejected.
parsed-rows
(
parse-rows
settings
new-
column-
types
rows
)
parsed-rows
(
parse-rows
settings
new-types
rows
)
row-count
(
count
parsed-rows
)
stats
{
:num-rows
row-count
:num-columns
(
count
new-
column-
types
)
:num-columns
(
count
new-types
)
:generated-columns
(
if
create-auto-pk?
1
0
)
:size-mb
(
file-size-mb
file
)
:upload-seconds
(
since-ms
timer
)}]
...
...
@@ -680,11 +701,9 @@
(
throw
(
ex-info
(
ex-message
e
)
{
:status-code
422
}))))
(
when
create-auto-pk?
(
driver/add-columns!
driver
(
:id
database
)
(
table-identifier
table
)
{
auto-pk-column-keyword
(
driver/upload-type->database-type
driver
::auto-incrementing-int-pk
)}
:primary-key
[
auto-pk-column-keyword
]))
(
add-columns!
driver
database
table
{
auto-pk-column-keyword
::auto-incrementing-int-pk
}
:primary-key
[
auto-pk-column-keyword
]))
(
scan-and-sync-table!
database
table
)
...
...
This diff is collapsed.
Click to expand it.
test/metabase/upload_test.clj
+
49
−
22
View file @
a767fd31
...
...
@@ -617,7 +617,7 @@
(
is
(
some?
table
))))))))
(
deftest
load-from-csv-offset-datetime-test
(
testing
"Upload a CSV file with a datetime column"
(
testing
"Upload a CSV file with a
n offset
datetime column"
(
mt/test-drivers
(
mt/normal-drivers-with-feature
:uploads
)
(
with-mysql-local-infile-on-and-off
(
mt/with-dynamic-redefs
[
driver/db-default-timezone
(
constantly
"Z"
)
...
...
@@ -1133,7 +1133,6 @@
:user-id
(
str
(
mt/user->id
:rasta
))}
(
last
(
snowplow-test/pop-event-data-and-user-id!
))))))))))
(
deftest
csv-upload-audit-log-test
;; Just test with h2 because these events are independent of the driver
(
mt/test-driver
:h2
...
...
@@ -1351,40 +1350,53 @@
(
deftest
append-column-mismatch-test
(
mt/test-drivers
(
mt/normal-drivers-with-feature
:uploads
)
(
with-uploads-allowed
(
testing
"Append should fail if there are
extra or
missing columns in the CSV file"
(
testing
"Append should fail
only
if there are missing columns in the CSV file"
(
doseq
[[
csv-rows
error-message
]
{[
"_mb_row_id,id,name,extra column one,EXTRA COLUMN TWO"
]
(
trim-lines
"The CSV file contains extra columns that are not in the table:
- extra_column_two
- extra_column_one"
)
[
""
]
{[
""
]
(
trim-lines
"The CSV file is missing columns that are in the table:
- id
- name"
)
;; Extra columns are fine, as long as none are missing.
[
"_mb_row_id,id,extra 1, extra 2,name"
]
nil
[
"_mb_row_id,extra 1, extra 2"
]
(
trim-lines
"The CSV file contains extra columns that are not in the table:
(
trim-lines
"The CSV file is missing columns that are in the table:
- id
- name
There are new columns in the CSV file that are not in the table:
- extra_2
- extra_1
- extra_1
"
)
The CSV file is missing columns that are in the table:
- id
- name"
)}]
[
"_mb_row_id,id, extra 2"
]
(
trim-lines
"The CSV file is missing columns that are in the table:
- name
There are new columns in the CSV file that are not in the table:
- extra_2"
)}]
(
with-upload-table!
[
table
(
create-upload-table!
{
:col->upload-type
(
ordered-map/ordered-map
:id
::upload/int
:name
::upload/varchar-255
)
:rows
[[
1
,
"some_text"
]]})]
(
let
[
file
(
csv-file-with
csv-rows
(
mt/random-name
))]
(
is
(
=
{
:message
error-message
:data
{
:status-code
422
}}
(
catch-ex-info
(
append-csv!
{
:file
file
:table-id
(
:id
table
)}))))
(
testing
"Check the data was not uploaded into the table"
(
is
(
=
[[
1
"some_text"
]]
(
rows-for-table
table
))))
(
let
[
file
(
csv-file-with
csv-rows
(
mt/random-name
))]
(
when
error-message
(
is
(
=
{
:message
error-message
:data
{
:status-code
422
}}
(
catch-ex-info
(
append-csv!
{
:file
file
:table-id
(
:id
table
)}))))
(
testing
"Check the data was not uploaded into the table"
(
is
(
=
[[
1
"some_text"
]]
(
rows-for-table
table
)))))
(
when-not
error-message
(
testing
"Check the data was uploaded into the table"
;; No exception is thrown - but there were also no rows in the table to check
(
append-csv!
{
:file
file
:table-id
(
:id
table
)})))
(
io/delete-file
file
))))))))
(
deftest
append-all-types-test
...
...
@@ -1639,6 +1651,21 @@
(
rows-for-table
table
))))
(
io/delete-file
file
))))))
(
deftest
append-new-column-test
(
mt/test-drivers
(
mt/normal-drivers-with-feature
:uploads
)
(
with-uploads-allowed
(
testing
"Append should handle new columns being added in the latest CSV"
(
with-upload-table!
[
table
(
create-upload-table!
)]
;; Reorder as well for good measure
(
let
[
csv-rows
[
"game,name"
"Witticisms,Fluke Skytalker"
]
file
(
csv-file-with
csv-rows
(
mt/random-name
))]
(
testing
"The new row is inserted with the values correctly reordered"
(
is
(
=
{
:row-count
1
}
(
append-csv!
{
:file
file,
:table-id
(
:id
table
)})))
(
is
(
=
[[
1
"Obi-Wan Kenobi"
nil
]
[
2
"Fluke Skytalker"
"Witticisms"
]]
(
rows-for-table
table
))))
(
io/delete-file
file
)))))))
(
deftest
append-type-mismatch-test
(
mt/test-drivers
(
mt/normal-drivers-with-feature
:uploads
)
(
with-mysql-local-infile-on-and-off
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment