Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
Metabase
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Engineering Digital Service
Metabase
Commits
4e9bc023
Unverified
Commit
4e9bc023
authored
1 year ago
by
Chris Truter
Committed by
GitHub
1 year ago
Browse files
Options
Downloads
Patches
Plain Diff
Clean up CSV type inference interface (#39822)
parent
db182076
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/metabase/upload.clj
+17
-20
17 additions, 20 deletions
src/metabase/upload.clj
test/metabase/upload_test.clj
+1
-1
1 addition, 1 deletion
test/metabase/upload_test.clj
with
18 additions
and
21 deletions
src/metabase/upload.clj
+
17
−
20
View file @
4e9bc023
...
...
@@ -222,18 +222,6 @@
(
filter
#
((
type->check
%
)
trimmed
))
first
)))))
(
defn-
type-relaxer
"Given a map of {value-type -> predicate}, return a reducing fn which updates our inferred schema using the next row."
[
type->check
]
(
fn
[
value-types
row
]
;; It's important to realize this lazy sequence, because otherwise we can build a huge stack and overflow.
(
vec
(
u/map-all
(
partial
relax-type
type->check
)
value-types
row
))))
(
defn-
relax-types
[
settings
current-types
rows
]
(
let
[
type->check
(
settings->type->check
settings
)]
(
->>
(
reduce
(
type-relaxer
type->check
)
current-types
rows
)
(
map
column-type
))))
(
defn-
normalize-column-name
[
raw-name
]
(
if
(
str/blank?
raw-name
)
...
...
@@ -253,10 +241,18 @@
(
=
(
normalize-column-name
(
:name
field
))
auto-pk-column-name
))
(
t2/select
:model/Field
:table_id
table-id
:active
true
))))
(
defn-
type-relaxer
"Given a map of {value-type -> predicate}, return a reducing fn which updates our inferred schema using the next row."
[
settings
]
(
let
[
relax
(
partial
relax-type
(
settings->type->check
settings
))]
(
fn
[
value-types
row
]
;; It's important to realize this lazy sequence, because otherwise we can build a huge stack and overflow.
(
vec
(
u/map-all
relax
value-types
row
)))))
(
mu/defn
column-types-from-rows
:-
[
:sequential
(
into
[
:enum
]
column-types
)]
"
Returns a sequence of types, given the unparsed rows in the CSV file
"
[
settings
column-count
rows
]
(
relax-types
settings
(
repeat
column-count
nil
)
rows
))
"
Given the types of the existing columns (if there are any), and rows to be added, infer the best supporting types.
"
[
settings
existing-types
rows
]
(
map
column-type
(
reduce
(
type-relaxer
settings
)
existing-types
rows
))
)
(
defn-
detect-schema
"Consumes the header and rows from a CSV file.
...
...
@@ -272,7 +268,8 @@
(
let
[
normalized-header
(
map
normalize-column-name
header
)
unique-header
(
map
keyword
(
mbql.u/uniquify-names
normalized-header
))
column-count
(
count
normalized-header
)
col-name+type-pairs
(
->>
(
column-types-from-rows
settings
column-count
rows
)
initial-types
(
repeat
column-count
nil
)
col-name+type-pairs
(
->>
(
column-types-from-rows
settings
initial-types
rows
)
(
map
vector
unique-header
))]
{
:extant-columns
(
ordered-map/ordered-map
col-name+type-pairs
)
:generated-columns
(
ordered-map/ordered-map
auto-pk-column-keyword
::auto-incrementing-int-pk
)}))
...
...
@@ -652,14 +649,14 @@
;; in the happy, and most common, case all the values will match the existing types
;; for now we just plan for the worst and perform a fairly expensive operation to detect any type changes
;; we can come back and optimize this to an optimistic-with-fallback approach later.
relax
ed-types
(
relax-type
s
settings
old-column-types
rows
)
new-column-types
(
map
#
(
if
(
matching-or-upgradable?
%1
%2
)
%2
%1
)
old-column-types
relax
ed-types
)
detect
ed-types
(
column-types-from-row
s
settings
old-column-types
rows
)
new-column-types
(
map
#
(
if
(
matching-or-upgradable?
%1
%2
)
%2
%1
)
old-column-types
detect
ed-types
)
_
(
when
(
and
(
not=
old-column-types
new-column-types
)
;; if we cannot coerce all the columns, don't bother coercing any of them
;; we will instead throw an error when we try to parse as the old type
(
=
relax
ed-types
new-column-types
))
(
=
detect
ed-types
new-column-types
))
(
let
[
fields
(
map
normed-name->field
normed-header
)]
(
->>
(
changed-field->new-type
fields
old-column-types
relax
ed-types
)
(
->>
(
changed-field->new-type
fields
old-column-types
detect
ed-types
)
(
alter-columns!
driver
database
table
))))
;; this will fail if any of our required relaxations were rejected.
parsed-rows
(
parse-rows
settings
new-column-types
rows
)
...
...
This diff is collapsed.
Click to expand it.
test/metabase/upload_test.clj
+
1
−
1
View file @
4e9bc023
...
...
@@ -231,7 +231,7 @@
type->check
(
#
'upload/settings->type->check
settings
)
value-type
(
#
'upload/value->type
type->check
string-value
)
;; get the type of the column, if it were filled with only that value
col-type
(
first
(
upload/column-types-from-rows
settings
1
[[
string-value
]]))
col-type
(
first
(
upload/column-types-from-rows
settings
nil
[[
string-value
]]))
parser
(
upload-parsing/upload-type->parser
col-type
settings
)]
(
testing
(
format
"\"%s\" is a %s"
string-value
type
)
(
is
(
=
expected-type
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment