Skip to content
Snippets Groups Projects
Unverified Commit c70fba89 authored by Cal Herries's avatar Cal Herries Committed by GitHub
Browse files

Uploads: parse local number formats (#30593)


* Parse numbers according to number_separators setting

* Add more test cases

* Fix error

* Dedupe tests

* Make fn private

* Doc fixes etc.

---------

Co-authored-by: default avatarTim Macdonald <tim@tsmacdonald.com>
parent e0dc8a86
No related branches found
No related tags found
No related merge requests found
......@@ -10,8 +10,12 @@
[java-time :as t]
[metabase.driver :as driver]
[metabase.mbql.util :as mbql.u]
[metabase.public-settings :as public-settings]
[metabase.search.util :as search-util]
[metabase.util :as u]))
[metabase.util :as u])
(:import
(java.text NumberFormat)
(java.util Locale)))
(set! *warn-on-reflection* true)
......@@ -67,7 +71,7 @@
(catch Exception _
false)))
(def ^:private currency-regex "Digits, perhaps with separators and at least one digit" #"[$€£¥₹₪₩₿¢\s]")
(def ^:private currency-regex "Supported currency signs" #"[$€£¥₹₪₩₿¢\s]")
(defn- with-currency
"Returns a regex that matches a positive or negative number, including currency symbols"
......@@ -78,9 +82,24 @@
number-regex
"\\s*" currency-regex "?")))
;; These are pulled out so that the regex is only compiled once, not for every invocation of value->type
(def ^:private int-regex "Digits, perhaps with separators and at least one digit" (with-currency #"[\d,]+"))
(def ^:private float-regex "Digits, perhaps with separators and at least one digit" (with-currency #"[\d,]*\.\d+"))
(defn- get-number-separators []
(get-in (public-settings/custom-formatting) [:type/Number :number_separators] ".,"))
(defn- int-regex [number-separators]
(with-currency
(case number-separators
("." ".,") #"\d[\d,]*"
",." #"\d[\d.]*"
", " #"\d[\d \u00A0]*"
".’" #"\d[\d’]*")))
(defn- float-regex [number-separators]
(with-currency
(case number-separators
("." ".,") #"\d[\d,]*\.\d+"
",." #"\d[\d.]*\,[\d]+"
", " #"\d[\d \u00A0]*\,[\d.]+"
".’" #"\d[\d’]*\.[\d.]+")))
(defn value->type
"The most-specific possible type for a given value. Possibilities are:
......@@ -92,18 +111,20 @@
- nil, in which case other functions are expected to replace it with ::text as the catch-all type
NB: There are currently the following gotchas:
1. ints/floats are assumed to have commas as separators and periods as decimal points
1. ints/floats are assumed to use the separators and decimal points corresponding to the locale defined in the
application settings
2. 0 and 1 are assumed to be booleans, not ints."
[value]
(cond
(str/blank? value) nil
(re-matches #"(?i)true|t|yes|y|1|false|f|no|n|0" value) ::boolean
(datetime-string? value) ::datetime
(date-string? value) ::date
(re-matches int-regex value) ::int
(re-matches float-regex value) ::float
(re-matches #".{1,255}" value) ::varchar_255
:else ::text))
(let [number-separators (get-number-separators)]
(cond
(str/blank? value) nil
(re-matches #"(?i)true|t|yes|y|1|false|f|no|n|0" value) ::boolean
(datetime-string? value) ::datetime
(date-string? value) ::date
(re-matches (int-regex number-separators) value) ::int
(re-matches (float-regex number-separators) value) ::float
(re-matches #".{1,255}" value) ::varchar_255
:else ::text)))
(defn- row->types
[row]
......@@ -178,18 +199,29 @@
[s]
(str/replace s currency-regex ""))
(defn- remove-separators
(defn- parse-plain-number [s]
(case (get-number-separators)
("." ".,") (. (NumberFormat/getInstance (Locale. "en" "US")) parse s)
",." (. (NumberFormat/getInstance (Locale. "de" "DE")) parse s)
", " (. (NumberFormat/getInstance (Locale. "fr" "FR")) parse (str/replace s \space \u00A0)) ; \u00A0 is a non-breaking space
".’" (. (NumberFormat/getInstance (Locale. "de" "CH")) parse s)))
(defn- parse-number
[s]
(str/replace s "," ""))
(def ^:private upload-type->parser
{::varchar_255 identity
::text identity
::int #(parse-long (remove-currency-signs (remove-separators (str/trim %))))
::float #(parse-double (remove-currency-signs (remove-separators (str/trim %))))
::boolean #(parse-bool (str/trim %))
::date #(parse-date (str/trim %))
::datetime #(parse-datetime (str/trim %))})
(-> s
(str/trim)
(remove-currency-signs)
(parse-plain-number)))
(defn- upload-type->parser [upload-type]
(case upload-type
::varchar_255 identity
::text identity
::int parse-number
::float parse-number
::boolean #(parse-bool (str/trim %))
::date #(parse-date (str/trim %))
::datetime #(parse-datetime (str/trim %))))
(defn- parsed-rows
"Returns a vector of parsed rows from a `csv-file`.
......
......@@ -26,23 +26,21 @@
(def text-type :metabase.upload/text)
(deftest type-detection-and-parse-test
(doseq [[string-value expected-value expected-type]
[["0" false bool-type]
["1" true bool-type]
["t" true bool-type]
["T" true bool-type]
["tRuE" true bool-type]
["f" false bool-type]
["F" false bool-type]
["FAlse" false bool-type]
["Y" true bool-type]
["n" false bool-type]
["yes" true bool-type]
["NO" false bool-type]
(doseq [[string-value expected-value expected-type seps]
[["0.0" 0 float-type "."]
["0.0" 0 float-type ".,"]
["0,0" 0 float-type ",."]
["0,0" 0 float-type ", "]
["0.0" 0 float-type ".’"]
["$2" 2 int-type]
["$ 3" 3 int-type]
["-43€" -43 int-type]
["£1000" 1000 int-type]
["£1000" 1000 int-type "."]
["£1000" 1000 int-type ".,"]
["£1000" 1000 int-type ",."]
["£1000" 1000 int-type ", "]
["£1000" 1000 int-type ".’"]
["-¥9" -9 int-type]
["₹ -13" -13 int-type]
["₪13" 13 int-type]
......@@ -52,23 +50,36 @@
["2" 2 int-type]
["-86" -86 int-type]
["9,986,000" 9986000 int-type]
[",,," nil int-type] ;; TODO: this should be a vchar in the future
["9.986.000" "9.986.000" vchar-type] ;; TODO: this should be an integer in the future
["9,986,000" 9986000 int-type "."]
["9,986,000" 9986000 int-type ".,"]
["9.986.000" 9986000 int-type ",."]
["9’986’000" 9986000 int-type ".’"]
["9.986.000" "9.986.000" vchar-type ".,"]
["3.14" 3.14 float-type]
[".14" 0.14 float-type]
["0.14" 0.14 float-type]
["-9986.567" -9986.567 float-type]
["$2.0" 2.0 float-type]
["$ 3.50" 3.50 float-type]
["-4300.23€" -4300.23 float-type]
["3.14" 3.14 float-type "."]
["3.14" 3.14 float-type ".,"]
["3,14" 3.14 float-type ",."]
["3,14" 3.14 float-type ", "]
["3.14" 3.14 float-type ".’"]
[".14" ".14" vchar-type ".,"] ;; TODO: this should be a float type
["0.14" 0.14 float-type ".,"]
["-9986.567" -9986.567 float-type ".,"]
["$2.0" 2 float-type ".,"]
["$ 3.50" 3.50 float-type ".,"]
["-4300.23€" -4300.23 float-type ".,"]
["£1,000.23" 1000.23 float-type]
["£1.000,23" "£1.000,23" vchar-type] ;; TODO: this should be a float in the future
["-¥9.99" -9.99 float-type]
["₹ -13.23" -13.23 float-type]
["₪13.01" 13.01 float-type]
["₩13.33" 13.33 float-type]
["₿42.243646" 42.243646 float-type]
["-99.99¢" -99.99 float-type]
["£1,000.23" 1000.23 float-type "."]
["£1,000.23" 1000.23 float-type ".,"]
["£1.000,23" 1000.23 float-type ",."]
["£1 000,23" 1000.23 float-type ", "]
["£1’000.23" 1000.23 float-type ".’"]
["-¥9.99" -9.99 float-type ".,"]
["₹ -13.23" -13.23 float-type ".,"]
["₪13.01" 13.01 float-type ".,"]
["₩13.33" 13.33 float-type ".,"]
["₿42.243646" 42.243646 float-type ".,"]
["-99.99¢" -99.99 float-type ".,"]
["." "." vchar-type]
[(apply str (repeat 255 "x")) (apply str (repeat 255 "x")) vchar-type]
[(apply str (repeat 256 "x")) (apply str (repeat 256 "x")) text-type]
["86 is my favorite number" "86 is my favorite number" vchar-type]
......@@ -77,14 +88,15 @@
["2022-01-01T01:00:00" #t "2022-01-01T01:00" datetime-type]
["2022-01-01T01:00:00.00" #t "2022-01-01T01:00" datetime-type]
["2022-01-01T01:00:00.000000000" #t "2022-01-01T01:00" datetime-type]]]
(let [type (upload/value->type string-value)
parser (#'upload/upload-type->parser type)]
(testing (format "\"%s\" is a %s" string-value type)
(is (= expected-type
type)))
(testing (format "\"%s\" is parsed into %s" string-value expected-value)
(is (= expected-value
(parser string-value)))))))
(mt/with-temporary-setting-values [custom-formatting (when seps {:type/Number {:number_separators seps}})]
(let [type (upload/value->type string-value)
parser (#'upload/upload-type->parser type)]
(testing (format "\"%s\" is a %s" string-value type)
(is (= expected-type
type)))
(testing (format "\"%s\" is parsed into %s" string-value expected-value)
(is (= expected-value
(parser string-value))))))))
(deftest type-coalescing-test
(doseq [[type-a type-b expected] [[bool-type bool-type bool-type]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment