Skip to content
Snippets Groups Projects
Unverified Commit a8fc67d6 authored by Tim Macdonald's avatar Tim Macdonald Committed by GitHub
Browse files

Parse currency appropriately in CSVs (#30417)


* Parse currency appropriately in CSVs

* Remove digit separator parsing for now

* Formatting

* Don't attempt to parse commas in floats

* Add back removing separators

---------

Co-authored-by: default avatarCallum Herries <hi@callumherries.com>
parent 8778569c
No related branches found
No related tags found
No related merge requests found
......@@ -67,6 +67,21 @@
(catch Exception _
false)))
(def ^:private currency-regex "Digits, perhaps with separators and at least one digit" #"[$€£¥₹₪₩₿¢\s]")
(defn- with-currency
"Returns a regex that matches a positive or negative number, including currency symbols"
[number-regex]
;; currency signs can be all over: $2, -$2, $-2, 2€
(re-pattern (str currency-regex "?\\s*-?"
currency-regex "?"
number-regex
"\\s*" currency-regex "?")))
;; These are pulled out so that the regex is only compiled once, not for every invocation of value->type
(def ^:private int-regex "Digits, perhaps with separators and at least one digit" (with-currency #"[\d,]+"))
(def ^:private float-regex "Digits, perhaps with separators and at least one digit" (with-currency #"[\d,]*\.\d+"))
(defn value->type
"The most-specific possible type for a given value. Possibilities are:
- ::boolean
......@@ -83,10 +98,10 @@
(cond
(str/blank? value) nil
(re-matches #"(?i)true|t|yes|y|1|false|f|no|n|0" value) ::boolean
(re-matches #"-?[\d,]+" value) ::int
(re-matches #"-?[\d,]*\.\d+" value) ::float
(datetime-string? value) ::datetime
(date-string? value) ::date
(re-matches int-regex value) ::int
(re-matches float-regex value) ::float
(re-matches #".{1,255}" value) ::varchar_255
:else ::text))
......@@ -159,11 +174,19 @@
(date-string? s) (t/local-date-time (t/local-date s) (t/local-time "00:00:00"))
(datetime-string? s) (t/local-date-time s)))
(defn- remove-currency-signs
[s]
(str/replace s currency-regex ""))
(defn- remove-separators
[s]
(str/replace s "," ""))
(def ^:private upload-type->parser
{::varchar_255 identity
::text identity
::int #(Integer/parseInt (str/trim %))
::float #(parse-double (str/trim %))
::int #(parse-long (remove-currency-signs (remove-separators (str/trim %))))
::float #(parse-double (remove-currency-signs (remove-separators (str/trim %))))
::boolean #(parse-bool (str/trim %))
::date #(parse-date (str/trim %))
::datetime #(parse-datetime (str/trim %))})
......
......@@ -25,37 +25,66 @@
(def datetime-type :metabase.upload/datetime)
(def text-type :metabase.upload/text)
(deftest type-detection-test
(doseq [[value expected] [["0" bool-type]
["1" bool-type]
["t" bool-type]
["T" bool-type]
["tRuE" bool-type]
["f" bool-type]
["F" bool-type]
["FAlse" bool-type]
["Y" bool-type]
["n" bool-type]
["yes" bool-type]
["NO" bool-type]
["2" int-type]
["-86" int-type]
["9,986,000" int-type]
["3.14" float-type]
[".14" float-type]
["0.14" float-type]
["-9,986.567" float-type]
["9,986,000.0" float-type]
[(apply str (repeat 255 "x")) vchar-type]
[(apply str (repeat 256 "x")) text-type]
["86 is my favorite number" vchar-type]
["My favorite number is 86" vchar-type]
["2022-01-01" date-type]
["2022-01-01T01:00:00" datetime-type]
["2022-01-01T01:00:00.00" datetime-type]
["2022-01-01T01:00:00.000000000" datetime-type]]]
(testing (format "\"%s\" is a %s" value expected)
(is (= expected (upload/value->type value))))))
(deftest type-detection-and-parse-test
(doseq [[string-value expected-value expected-type]
[["0" false bool-type]
["1" true bool-type]
["t" true bool-type]
["T" true bool-type]
["tRuE" true bool-type]
["f" false bool-type]
["F" false bool-type]
["FAlse" false bool-type]
["Y" true bool-type]
["n" false bool-type]
["yes" true bool-type]
["NO" false bool-type]
["$2" 2 int-type]
["$ 3" 3 int-type]
["-43€" -43 int-type]
["£1000" 1000 int-type]
["-¥9" -9 int-type]
["₹ -13" -13 int-type]
["₪13" 13 int-type]
["₩-13" -13 int-type]
["₿42" 42 int-type]
["-99¢" -99 int-type]
["2" 2 int-type]
["-86" -86 int-type]
["9,986,000" 9986000 int-type]
[",,," nil int-type] ;; TODO: this should be a vchar in the future
["9.986.000" "9.986.000" vchar-type] ;; TODO: this should be an integer in the future
["3.14" 3.14 float-type]
[".14" 0.14 float-type]
["0.14" 0.14 float-type]
["-9986.567" -9986.567 float-type]
["$2.0" 2.0 float-type]
["$ 3.50" 3.50 float-type]
["-4300.23€" -4300.23 float-type]
["£1,000.23" 1000.23 float-type]
["£1.000,23" "£1.000,23" vchar-type] ;; TODO: this should be a float in the future
["-¥9.99" -9.99 float-type]
["₹ -13.23" -13.23 float-type]
["₪13.01" 13.01 float-type]
["₩13.33" 13.33 float-type]
["₿42.243646" 42.243646 float-type]
["-99.99¢" -99.99 float-type]
[(apply str (repeat 255 "x")) (apply str (repeat 255 "x")) vchar-type]
[(apply str (repeat 256 "x")) (apply str (repeat 256 "x")) text-type]
["86 is my favorite number" "86 is my favorite number" vchar-type]
["My favorite number is 86" "My favorite number is 86" vchar-type]
["2022-01-01" #t "2022-01-01" date-type]
["2022-01-01T01:00:00" #t "2022-01-01T01:00" datetime-type]
["2022-01-01T01:00:00.00" #t "2022-01-01T01:00" datetime-type]
["2022-01-01T01:00:00.000000000" #t "2022-01-01T01:00" datetime-type]]]
(let [type (upload/value->type string-value)
parser (#'upload/upload-type->parser type)]
(testing (format "\"%s\" is a %s" string-value type)
(is (= expected-type
type)))
(testing (format "\"%s\" is parsed into %s" string-value expected-value)
(is (= expected-value
(parser string-value)))))))
(deftest type-coalescing-test
(doseq [[type-a type-b expected] [[bool-type bool-type bool-type]
......@@ -217,9 +246,9 @@
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["id,nulls,string,bool,number,date,datetime"
"2\t ,,string,true ,1.1\t ,2022-01-01,2022-01-01T00:00:00"
" 3,,string,false, 1.1,2022-02-01,2022-02-01T00:00:00"]))
(csv-file-with ["id ,nulls,string ,bool ,number ,date ,datetime"
"2\t ,, a ,true ,1.1\t ,2022-01-01,2022-01-01T00:00:00"
"\" 3\",, b,false,\"$ 1,000.1\",2022-02-01,2022-02-01T00:00:00"]))
(testing "Table and Fields exist after sync"
(sync/sync-database! (mt/db))
(let [table (t2/select-one Table :db_id (mt/id))]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment