Skip to content
Snippets Groups Projects
Unverified Commit 862daf33 authored by Chris Truter's avatar Chris Truter Committed by GitHub
Browse files

Break out upload.types namespace (#40741)

parent 2d281d2c
No related branches found
No related tags found
No related merge requests found
(ns metabase.upload
(:refer-clojure :exclude [derive make-hierarchy parents])
(:require
[clj-bom.core :as bom]
[clojure.data :as data]
......@@ -29,265 +28,17 @@
[metabase.sync.sync-metadata.fields :as sync-fields]
[metabase.sync.sync-metadata.tables :as sync-tables]
[metabase.upload.parsing :as upload-parsing]
[metabase.upload.types :as upload-types]
[metabase.util :as u]
[metabase.util.i18n :refer [tru]]
[metabase.util.malli :as mu]
[metabase.util.malli.schema :as ms]
[metabase.util.ordered-hierarchy :as ordered-hierarchy :refer [make-hierarchy]]
[toucan2.core :as t2])
(:import
(java.io File)))
(set! *warn-on-reflection* true)
;;;; +------------------+
;;;; | Schema detection |
;;;; +------------------+
;; Upload value-types form a DAG (directed acyclic graph) where each type can be relaxed into any of its ancestors.
;; We parse each value in the CSV file to the most-specific possible type for each column.
;; The most-specific possible type for a column is the closest common ancestor of the types for each value in the
;; column, found by walking through the graph in topological order, following edges from left to right.
;; Note that this type is not guaranteed to be one of the least common ancestors!
;;
;; See [[metabase.util.ordered-hierarchy/first-common-ancestor]] for more details.
;;
;; <pre><code>
;;
;; text
;; |
;; |
;; varchar-255──────
;; / / \ \
;; / / \ \
;; boolean float datetime offset-datetime
;; | │ │
;; | │ │
;; │ *float-or-int* │
;; │ │ │
;; │ │ │
;; | int date
;; | / \
;; | / \
;; *boolean-int* auto-incrementing-int-pk
;;
;; </code></pre>
;;
;; We have a number of special "abstract" nodes in this graph:
;;
;; - `*boolean-int*` is an ambiguous node, that could either be parsed as a boolean or as an integer.
;; - `*float-or-int*` is any integer, whether it has an explicit decimal point or not.
;;
;; While a `*boolean-int*` is a genuinely ambiguous value, `*float-or-int*` exist to power our desired value-type
;; coercion and column-type promotion behaviour.
;;
;; - If we encounter a `*float-or-int*` inside an `int` column, then we can safely coerce it down to an integer.
;; - If we encounter a `float` (i.e. a non-zero fraction component), then we need to promote the column to a `float.`
;;
;; Columns can not have an abstract type, which has no meaning outside of inference and reconciliation.
;; If we are left with an abstract type after having processed all the values, we first check whether we can coerce
;; the type to the existing column type, and otherwise traverse further up the graph until we reach a concrete type.
;;
;; For ease of reference and explicitness these corresponding values are given in the `abstract->concrete` map.
;; One can figure out these mappings by simply looking up through the ancestors. For now, we require that it is always
;; a direct ancestor, and lay out or graph so that it is the left-most one.
(def ^:private h
"This hierarchy defines a relationship between value types and their specializations.
We use an [[metabase.util.ordered-hierarchy]] for its topological sorting, which simplify writing efficient and
consistent implementations for of our type inference, parsing, and relaxation."
(make-hierarchy
[::text
[::varchar-255
[::boolean ::*boolean-int*]
[::float
;; A number value with a decimal separator, but a zero fractional component.
[::*float-or-int*
[::int
;; A value that could be legally parsed as either a boolean OR an integer
::*boolean-int*
::auto-incrementing-int-pk]]]
[::datetime ::date]
::offset-datetime]]))
(def ^:private abstract->concrete
"Not all value types correspond to column types. We refer to these as \"abstract\" types, and give them *ear-muffs*.
This maps implicitly defines the abstract types, by mapping them each to a default concretion."
{::*boolean-int* ::boolean
::*float-or-int* ::float})
(def ^:private allowed-promotions
"A mapping of which types a column can be implicitly relaxed to, based on the content of appended values.
If we require a relaxation which is not allow-listed here, we will reject the corresponding file."
{::int #{::float}})
(def ^:private column-type->coercible-value-types
"A mapping of which value types should be coerced to the given existing type, rather than triggering promotion."
{::int #{::*float-or-int*}})
(defn- coerce?
"Can values of the given type be coerced to the given existing column type, in a lossless fashion?"
[column-type value-type]
(contains? (column-type->coercible-value-types column-type) value-type))
(def ^:private value-types
"All type tags which values can be inferred as. An ordered set from most to least specialized."
(ordered-hierarchy/sorted-tags h))
(def ^:private column-types
"All type tags that correspond to concrete column types."
(into #{} (remove abstract->concrete) value-types))
(defn- column-type?
[value-type]
(contains? column-types value-type))
(defn ^:private concretize
"Determine the desired column-type given the existing column-type (nil if it's new) and the value-type of the data.
If there's a valid coercion to the existing type, we will preserve it, but otherwise we will relax abstract types
further to a concrete type."
[existing-type value-type]
(cond
;; If the type is concrete, there is nothing to do.
(column-type? value-type) value-type
;; If we know nothing about the value type, treat it as an arbitrary string.
(nil? value-type) ::text
;; If configured, coerce the value to the existing type
(coerce? existing-type value-type) existing-type
;; Otherwise, project it to its canonical concretion.
:else (abstract->concrete value-type)))
;;;;;;;;;;;;;;;;;;;;;;;;;;
;; [[value->type]] helpers
(defn- with-parens
"Returns a regex that matches the argument, with or without surrounding parentheses."
[number-regex]
(re-pattern (str "(" number-regex ")|(\\(" number-regex "\\))")))
(defn- with-currency
"Returns a regex that matches a positive or negative number, including currency symbols"
[number-regex]
;; currency signs can be all over: $2, -$2, $-2, 2€
(re-pattern (str upload-parsing/currency-regex "?\\s*-?"
upload-parsing/currency-regex "?"
number-regex
"\\s*" upload-parsing/currency-regex "?")))
(defn- int-regex
"Matches numbers which do not have a decimal separator."
[number-separators]
(with-parens
(with-currency
(case number-separators
("." ".,") #"\d[\d,]*"
",." #"\d[\d.]*"
", " #"\d[\d \u00A0]*"
".’" #"\d[\d’]*"))))
(defn- float-or-int-regex
"Matches integral numbers, even if they have a decimal separator - e.g. 2 or 2.0"
[number-separators]
(with-parens
(with-currency
(case number-separators
("." ".,") #"\d[\d,]*(\.0+)?"
",." #"\d[\d.]*(\,[0]+)?"
", " #"\d[\d \u00A0]*(\,[0.]+)?"
".’" #"\d[\d’]*(\.[0.]+)?"))))
(defn- float-regex
"Matches numbers, regardless of whether they have a decimal separator - e.g. 2, 2.0, or 2.2"
[number-separators]
(with-parens
(with-currency
(case number-separators
("." ".,") #"\d[\d,]*(\.\d+)?"
",." #"\d[\d.]*(\,[\d]+)?"
", " #"\d[\d \u00A0]*(\,[\d.]+)?"
".’" #"\d[\d’]*(\.[\d.]+)?"))))
(defmacro does-not-throw?
"Returns true if the given body does not throw an exception."
[body]
`(try
~body
true
(catch Throwable e#
false)))
(defn- date-string? [s]
(does-not-throw? (upload-parsing/parse-local-date s)))
(defn- datetime-string? [s]
(does-not-throw? (upload-parsing/parse-local-datetime s)))
(defn- offset-datetime-string? [s]
(does-not-throw? (upload-parsing/parse-offset-datetime s)))
(defn- boolean-string? [s]
(boolean (re-matches #"(?i)true|t|yes|y|1|false|f|no|n|0" s)))
(defn- boolean-int-string? [s]
(contains? #{"0" "1"} s))
(defn- varchar-255? [s]
(<= (count s) 255))
(defn- regex-matcher [regex]
(fn [s]
(boolean (re-matches regex s))))
;; end [[value->type]] helpers
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(def ^:private non-inferable-types
#{::auto-incrementing-int-pk})
(def ^:private type->check-schema
"Every inferable value-type needs to have a detection function registered."
(into [:map] (map #(vector % [:=> [:cat :string] :boolean])
(remove non-inferable-types value-types))))
(mu/defn ^:private settings->type->check :- type->check-schema
[{:keys [number-separators] :as _settings}]
(let [int-string? (regex-matcher (int-regex number-separators))
float-or-int? (regex-matcher (float-or-int-regex number-separators))
float-string? (regex-matcher (float-regex number-separators))]
{::*boolean-int* boolean-int-string?
::boolean boolean-string?
::offset-datetime offset-datetime-string?
::date date-string?
::datetime datetime-string?
::int int-string?
::*float-or-int* float-or-int?
::float float-string?
::varchar-255 varchar-255?
::text (constantly true)}))
(defn- value->type
"Determine the most specific type that is compatible with the given value.
Numbers are assumed to use separators corresponding to the locale defined in the application settings"
[type->check value]
(when-not (str/blank? value)
(let [trimmed (str/trim value)]
(->> (remove non-inferable-types value-types)
(filter #((type->check %) trimmed))
first))))
(defn- relax-type
"Given an existing column type, and a new value, relax the type until it includes the value."
[type->check current-type value]
(cond (nil? value) current-type
(nil? current-type) (value->type type->check value)
:else (let [trimmed (str/trim value)]
(if (str/blank? trimmed)
current-type
(->> (cons current-type (ancestors h current-type))
(filter #((type->check %) trimmed))
first)))))
(defn- normalize-column-name
[raw-name]
(if (str/blank? raw-name)
......@@ -307,20 +58,6 @@
(= (normalize-column-name (:name field)) auto-pk-column-name))
(t2/select :model/Field :table_id table-id :active true))))
(defn- type-relaxer
"Given a map of {value-type -> predicate}, return a reducing fn which updates our inferred schema using the next row."
[settings]
(let [relax (partial relax-type (settings->type->check settings))]
(fn [value-types row]
;; It's important to realize this lazy sequence, because otherwise we can build a huge stack and overflow.
(vec (u/map-all relax value-types row)))))
(mu/defn column-types-from-rows :- [:sequential (into [:enum] column-types)]
"Given the types of the existing columns (if there are any), and rows to be added, infer the best supporting types."
[settings existing-types rows]
(->> (reduce (type-relaxer settings) existing-types rows)
(u/map-all concretize existing-types)))
(defn- detect-schema
"Consumes the header and rows from a CSV file.
......@@ -336,11 +73,10 @@
unique-header (map keyword (mbql.u/uniquify-names normalized-header))
column-count (count normalized-header)
initial-types (repeat column-count nil)
col-name+type-pairs (->> (column-types-from-rows settings initial-types rows)
col-name+type-pairs (->> (upload-types/column-types-from-rows settings initial-types rows)
(map vector unique-header))]
{:extant-columns (ordered-map/ordered-map col-name+type-pairs)
:generated-columns (ordered-map/ordered-map auto-pk-column-keyword ::auto-incrementing-int-pk)}))
:generated-columns (ordered-map/ordered-map auto-pk-column-keyword ::upload-types/auto-incrementing-int-pk)}))
;;;; +------------------+
;;;; | Parsing values |
......@@ -372,10 +108,16 @@
(str truncated-name-without-time
(t/format time-format (strictly-monotonic-now)))))
(mu/defn ^:private database-type
[driver
column-type :- (into [:enum] upload-types/column-types)]
(let [external-type (keyword "metabase.upload" (name column-type))]
(driver/upload-type->database-type driver external-type)))
(defn- column-definitions
"Returns a map of column-name -> column-definition from a map of column-name -> upload-type."
[driver col->upload-type]
(update-vals col->upload-type (partial driver/upload-type->database-type driver)))
(update-vals col->upload-type (partial database-type driver)))
(defn current-database
"The database being used for uploads (as per the `uploads-database-id` setting)."
......@@ -685,20 +427,6 @@
;;; | appending to uploaded table
;;; +-----------------------------
(defn- base-type->upload-type
"Returns the most specific upload type for the given base type."
[base-type]
(when base-type
(condp #(isa? %2 %1) base-type
:type/Float ::float
:type/BigInteger ::int
:type/Integer ::int
:type/Boolean ::boolean
:type/DateTimeWithTZ ::offset-datetime
:type/DateTime ::datetime
:type/Date ::date
:type/Text ::text)))
(defn- not-blank [s]
(when-not (str/blank? s)
s))
......@@ -734,12 +462,6 @@
(when-let [error-message (extra-and-missing-error-markdown extra missing)]
(throw (ex-info error-message {:status-code 422})))))
(defn- matching-or-promotable? [current-type relaxed-type]
(or (nil? current-type)
(= current-type relaxed-type)
(when-let [f (allowed-promotions current-type)]
(f relaxed-type))))
(defn- field-changes
"Given existing and newly inferred types for the given `field-names`, calculate which fields need to be added or updated, along with their new types."
[field-names existing-types new-types]
......@@ -756,7 +478,7 @@
(m/map-kv
(fn [field-name col-type]
[(keyword field-name)
(driver/upload-type->database-type driver col-type)])
(database-type driver col-type)])
field->col-type))
(defn- add-columns! [driver database table field->type & args]
......@@ -786,12 +508,12 @@
(not (contains? normed-name->field auto-pk-column-name)))
_ (check-schema (dissoc normed-name->field auto-pk-column-name) header)
settings (upload-parsing/get-settings)
old-types (map (comp base-type->upload-type :base_type normed-name->field) normed-header)
old-types (map (comp upload-types/base-type->upload-type :base_type normed-name->field) normed-header)
;; in the happy, and most common, case all the values will match the existing types
;; for now we just plan for the worst and perform a fairly expensive operation to detect any type changes
;; we can come back and optimize this to an optimistic-with-fallback approach later.
detected-types (column-types-from-rows settings old-types rows)
new-types (map #(if (matching-or-promotable? %1 %2) %2 %1) old-types detected-types)
detected-types (upload-types/column-types-from-rows settings old-types rows)
new-types (map upload-types/new-type old-types detected-types)
;; avoid any schema modification unless all the promotions required by the file are supported,
;; choosing to not promote means that we will defer failure until we hit the first value that cannot
;; be parsed as its existing type - there is scope to improve these error messages in the future.
......@@ -818,7 +540,7 @@
(when create-auto-pk?
(add-columns! driver database table
{auto-pk-column-keyword ::auto-incrementing-int-pk}
{auto-pk-column-keyword ::upload-types/auto-incrementing-int-pk}
:primary-key [auto-pk-column-keyword]))
(scan-and-sync-table! database table)
......
......@@ -183,45 +183,45 @@
(fn [upload-type _]
upload-type))
(defmethod upload-type->parser :metabase.upload/varchar-255
(defmethod upload-type->parser :metabase.upload.types/varchar-255
[_ _]
identity)
(defmethod upload-type->parser :metabase.upload/text
(defmethod upload-type->parser :metabase.upload.types/text
[_ _]
identity)
(defmethod upload-type->parser :metabase.upload/int
(defmethod upload-type->parser :metabase.upload.types/int
[_ {:keys [number-separators]}]
(partial parse-as-biginteger number-separators))
(defmethod upload-type->parser :metabase.upload/float
(defmethod upload-type->parser :metabase.upload.types/float
[_ {:keys [number-separators]}]
(partial parse-number number-separators))
(defmethod upload-type->parser :metabase.upload/auto-incrementing-int-pk
(defmethod upload-type->parser :metabase.upload.types/auto-incrementing-int-pk
[_ {:keys [number-separators]}]
(partial parse-as-biginteger number-separators))
(defmethod upload-type->parser :metabase.upload/boolean
(defmethod upload-type->parser :metabase.upload.types/boolean
[_ _]
(comp
parse-bool
str/trim))
(defmethod upload-type->parser :metabase.upload/date
(defmethod upload-type->parser :metabase.upload.types/date
[_ _]
(comp
parse-local-date
str/trim))
(defmethod upload-type->parser :metabase.upload/datetime
(defmethod upload-type->parser :metabase.upload.types/datetime
[_ _]
(comp
parse-as-datetime
str/trim))
(defmethod upload-type->parser :metabase.upload/offset-datetime
(defmethod upload-type->parser :metabase.upload.types/offset-datetime
[_ _]
(comp
parse-offset-datetime
......
(ns metabase.upload.types
(:refer-clojure :exclude [make-hierarchy])
(:require
[clojure.string :as str]
[metabase.upload.parsing :as upload-parsing]
[metabase.util :as u]
[metabase.util.malli :as mu]
[metabase.util.ordered-hierarchy :as ordered-hierarchy :refer [make-hierarchy]]))
;; Upload value-types form a directed acyclic graph where each type can be relaxed into any of its ancestors.
;; We parse each value in the CSV file to the most-specific possible type for each column.
;; The most-specific possible type for a column is the closest common ancestor of the types for each value in the
;; column, found by walking through the graph in topological order, following edges from left to right.
;; Note that this type is not guaranteed to be one of the least common ancestors!
;;
;; See [[metabase.util.ordered-hierarchy/first-common-ancestor]] for more details.
;;
;; <pre><code>
;;
;; text
;; |
;; |
;; varchar-255──────
;; / / \ \
;; / / \ \
;; boolean float datetime offset-datetime
;; | │ │
;; | │ │
;; │ *float-or-int* │
;; │ │ │
;; │ │ │
;; | int date
;; | / \
;; | / \
;; *boolean-int* auto-incrementing-int-pk
;;
;; </code></pre>
;;
;; We have a number of special "abstract" nodes in this graph:
;;
;; - `*boolean-int*` is an ambiguous node, that could either be parsed as a boolean or as an integer.
;; - `*float-or-int*` is any integer, whether it has an explicit decimal point or not.
;;
;; While a `*boolean-int*` is a genuinely ambiguous value, `*float-or-int*` exist to power our desired value-type
;; coercion and column-type promotion behaviour.
;;
;; - If we encounter a `*float-or-int*` inside an `int` column, then we can safely coerce it down to an integer.
;; - If we encounter a `float` (i.e. a non-zero fraction component), then we need to promote the column to a `float.`
;;
;; Columns can not have an abstract type, which has no meaning outside of inference and reconciliation.
;; If we are left with an abstract type after having processed all the values, we first check whether we can coerce
;; the type to the existing column type, and otherwise traverse further up the graph until we reach a concrete type.
;;
;; For ease of reference and explicitness these corresponding values are given in the `abstract->concrete` map.
;; One can figure out these mappings by simply looking up through the ancestors. For now, we require that it is always
;; a direct ancestor, and lay out or graph so that it is the left-most one.
(def h
"This hierarchy defines a relationship between value types and their specializations.
We use an [[metabase.util.ordered-hierarchy]] for its topological sorting, which simplify writing efficient and
consistent implementations for of our type inference, parsing, and relaxation."
(make-hierarchy
[::text
[::varchar-255
[::boolean ::*boolean-int*]
[::float
;; A number value with a decimal separator, but a zero fractional component.
[::*float-or-int*
[::int
;; A value that could be legally parsed as either a boolean OR an integer
::*boolean-int*
::auto-incrementing-int-pk]]]
[::datetime ::date]
::offset-datetime]]))
(def ^:private abstract->concrete
"Not all value types correspond to column types. We refer to these as \"abstract\" types, and give them *ear-muffs*.
This maps implicitly defines the abstract types, by mapping them each to a default concretion."
{::*boolean-int* ::boolean
::*float-or-int* ::float})
(def ^:private allowed-promotions
"A mapping of which types a column can be implicitly relaxed to, based on the content of appended values.
If we require a relaxation which is not allow-listed here, we will reject the corresponding file."
{::int #{::float}})
(def ^:private column-type->coercible-value-types
"A mapping of which value types should be coerced to the given existing type, rather than triggering promotion."
{::int #{::*float-or-int*}})
(defn- coerce?
"Can values of the given type be coerced to the given existing column type, in a lossless fashion?"
[column-type value-type]
(contains? (column-type->coercible-value-types column-type) value-type))
(def value-types
"All type tags which values can be inferred as. An ordered set from most to least specialized."
(ordered-hierarchy/sorted-tags h))
(def column-types
"All type tags that correspond to concrete column types."
(into #{} (remove abstract->concrete) value-types))
(defn- column-type?
[value-type]
(contains? column-types value-type))
(defn concretize
"Determine the desired column-type given the existing column-type (nil if it's new) and the value-type of the data.
If there's a valid coercion to the existing type, we will preserve it, but otherwise we will relax abstract types
further to a concrete type."
[existing-type value-type]
(cond
;; If the type is concrete, there is nothing to do.
(column-type? value-type) value-type
;; If we know nothing about the value type, treat it as an arbitrary string.
(nil? value-type) ::text
;; If configured, coerce the value to the existing type
(coerce? existing-type value-type) existing-type
;; Otherwise, project it to its canonical concretion.
:else (abstract->concrete value-type)))
;;;;;;;;;;;;;;;;;;;;;;;;;;
;; [[value->type]] helpers
(defn- with-parens
"Returns a regex that matches the argument, with or without surrounding parentheses."
[number-regex]
(re-pattern (str "(" number-regex ")|(\\(" number-regex "\\))")))
(defn- with-currency
"Returns a regex that matches a positive or negative number, including currency symbols"
[number-regex]
;; currency signs can be all over: $2, -$2, $-2, 2€
(re-pattern (str upload-parsing/currency-regex "?\\s*-?"
upload-parsing/currency-regex "?"
number-regex
"\\s*" upload-parsing/currency-regex "?")))
(defn- int-regex
"Matches numbers which do not have a decimal separator."
[number-separators]
(with-parens
(with-currency
(case number-separators
("." ".,") #"\d[\d,]*"
",." #"\d[\d.]*"
", " #"\d[\d \u00A0]*"
".’" #"\d[\d’]*"))))
(defn- float-or-int-regex
"Matches integral numbers, even if they have a decimal separator - e.g. 2 or 2.0"
[number-separators]
(with-parens
(with-currency
(case number-separators
("." ".,") #"\d[\d,]*(\.0+)?"
",." #"\d[\d.]*(\,[0]+)?"
", " #"\d[\d \u00A0]*(\,[0.]+)?"
".’" #"\d[\d’]*(\.[0.]+)?"))))
(defn- float-regex
"Matches numbers, regardless of whether they have a decimal separator - e.g. 2, 2.0, or 2.2"
[number-separators]
(with-parens
(with-currency
(case number-separators
("." ".,") #"\d[\d,]*(\.\d+)?"
",." #"\d[\d.]*(\,[\d]+)?"
", " #"\d[\d \u00A0]*(\,[\d.]+)?"
".’" #"\d[\d’]*(\.[\d.]+)?"))))
(defmacro does-not-throw?
"Returns true if the given body does not throw an exception."
[body]
`(try
~body
true
(catch Throwable _e#
false)))
(defn- date-string? [s]
(does-not-throw? (upload-parsing/parse-local-date s)))
(defn- datetime-string? [s]
(does-not-throw? (upload-parsing/parse-local-datetime s)))
(defn- offset-datetime-string? [s]
(does-not-throw? (upload-parsing/parse-offset-datetime s)))
(defn- boolean-string? [s]
(boolean (re-matches #"(?i)true|t|yes|y|1|false|f|no|n|0" s)))
(defn- boolean-int-string? [s]
(contains? #{"0" "1"} s))
(defn- varchar-255? [s]
(<= (count s) 255))
(defn- regex-matcher [regex]
(fn [s]
(boolean (re-matches regex s))))
;; end [[value->type]] helpers
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(def ^:private non-inferable-types
#{::auto-incrementing-int-pk})
(def ^:private type->check-schema
"Every inferable value-type needs to have a detection function registered."
(into [:map] (map #(vector % [:=> [:cat :string] :boolean])
(remove non-inferable-types value-types))))
(mu/defn ^:private settings->type->check :- type->check-schema
[{:keys [number-separators] :as _settings}]
(let [int-string? (regex-matcher (int-regex number-separators))
float-or-int? (regex-matcher (float-or-int-regex number-separators))
float-string? (regex-matcher (float-regex number-separators))]
{::*boolean-int* boolean-int-string?
::boolean boolean-string?
::offset-datetime offset-datetime-string?
::date date-string?
::datetime datetime-string?
::int int-string?
::*float-or-int* float-or-int?
::float float-string?
::varchar-255 varchar-255?
::text (constantly true)}))
(defn- value->type
"Determine the most specific type that is compatible with the given value.
Numbers are assumed to use separators corresponding to the locale defined in the application settings"
[type->check value]
(when-not (str/blank? value)
(let [trimmed (str/trim value)]
(->> (remove non-inferable-types value-types)
(filter #((type->check %) trimmed))
first))))
(defn- relax-type
"Given an existing column type, and a new value, relax the type until it includes the value."
[type->check current-type value]
(cond (nil? value) current-type
(nil? current-type) (value->type type->check value)
:else (let [trimmed (str/trim value)]
(if (str/blank? trimmed)
current-type
(->> (cons current-type (ancestors h current-type))
(filter #((type->check %) trimmed))
first)))))
(defn type-relaxer
"Given a map of {value-type -> predicate}, return a reducing fn which updates our inferred schema using the next row."
[settings]
(let [relax (partial relax-type (settings->type->check settings))]
(fn [value-types row]
;; It's important to realize this lazy sequence, because otherwise we can build a huge stack and overflow.
(vec (u/map-all relax value-types row)))))
(mu/defn column-types-from-rows :- [:sequential (into [:enum] column-types)]
"Given the types of the existing columns (if there are any), and rows to be added, infer the best supporting types."
[settings existing-types rows]
(->> (reduce (type-relaxer settings) existing-types rows)
(u/map-all concretize existing-types)))
(defn base-type->upload-type
"Returns the most specific upload type for the given base type."
[base-type]
(when base-type
(condp #(isa? %2 %1) base-type
:type/Float ::float
:type/BigInteger ::int
:type/Integer ::int
:type/Boolean ::boolean
:type/DateTimeWithTZ ::offset-datetime
:type/DateTime ::datetime
:type/Date ::date
:type/Text ::text)))
(defn- promotable?
"Are we allowed to promote a column's schema from `current-type` to `inferred-type`?"
[current-type inferred-type]
(when-let [allowed? (allowed-promotions current-type)]
(allowed? inferred-type)))
(defn new-type
"Given the `current-type` of a column, and an `inferred-type` for new values to be added, return its new type.
This assumes we have already coerced the new values down to the existing type, if possible."
[current-type inferred-type]
(cond
;; No restriction on new columns
(nil? current-type) inferred-type
;; No changes required if inferred type matches
(= current-type inferred-type) current-type
:else
;; Keep the existing type unless a promotion is allowed.
(if (promotable? current-type inferred-type)
inferred-type
current-type)))
(ns ^:mb/once metabase.upload.types-test
(:require
[clojure.test :refer [deftest is testing]]
[clojure.walk :as walk]
[java-time.api :as t]
[metabase.upload.parsing :as upload-parsing]
[metabase.upload.types :as upload-types]
[metabase.util.ordered-hierarchy :as ordered-hierarchy]))
(def ^:private bool-type ::upload-types/boolean)
(def ^:private int-type ::upload-types/int)
(def ^:private bool-int-type ::upload-types/*boolean-int*)
(def ^:private float-type ::upload-types/float)
(def ^:private float-or-int-type ::upload-types/*float-or-int*)
(def ^:private vchar-type ::upload-types/varchar-255)
(def ^:private date-type ::upload-types/date)
(def ^:private datetime-type ::upload-types/datetime)
(def ^:private offset-dt-type ::upload-types/offset-datetime)
(def ^:private text-type ::upload-types/text)
(deftest ^:parallel type-detection-and-parse-test
(doseq [[string-value expected-value expected-type separators]
;; Number-related
[["0.0" 0 float-or-int-type "."]
["0.0" 0 float-or-int-type ".,"]
["0,0" 0 float-or-int-type ",."]
["0,0" 0 float-or-int-type ", "]
["0.0" 0 float-or-int-type ".’"]
["-0.0" -0.0 float-or-int-type "."]
["-0.0" -0.0 float-or-int-type ".,"]
["-0,0" -0.0 float-or-int-type ",."]
["-0,0" -0.0 float-or-int-type ", "]
["-0.0" -0.0 float-or-int-type ".’"]
["(0.0)" -0.0 float-or-int-type "."]
["(0.0)" -0.0 float-or-int-type ".,"]
["(0,0)" -0.0 float-or-int-type ",."]
["(0,0)" -0.0 float-or-int-type ", "]
["(0.0)" -0.0 float-or-int-type ".’"]
["-4300.00€" -4300 float-or-int-type ".,"]
["£1,000.00" 1000 float-or-int-type]
["£1,000.00" 1000 float-or-int-type "."]
["£1,000.00" 1000 float-or-int-type ".,"]
["£1.000,00" 1000 float-or-int-type ",."]
["£1 000,00" 1000 float-or-int-type ", "]
["£1’000.00" 1000 float-or-int-type ".’"]
["$2" 2 int-type]
["$ 3" 3 int-type]
["-43€" -43 int-type]
["(86)" -86 int-type]
["($86)" -86 int-type]
["£1000" 1000 int-type]
["£1000" 1000 int-type "."]
["£1000" 1000 int-type ".,"]
["£1000" 1000 int-type ",."]
["£1000" 1000 int-type ", "]
["£1000" 1000 int-type ".’"]
["-¥9" -9 int-type]
["₹ -13" -13 int-type]
["₪13" 13 int-type]
["₩-13" -13 int-type]
["₿42" 42 int-type]
["-99¢" -99 int-type]
["2" 2 int-type]
["-86" -86 int-type]
["9,986,000" 9986000 int-type]
["9,986,000" 9986000 int-type "."]
["9,986,000" 9986000 int-type ".,"]
["9.986.000" 9986000 int-type ",."]
["9’986’000" 9986000 int-type ".’"]
["$0" 0 int-type]
["-1" -1 int-type]
["0" false bool-int-type]
["1" true bool-int-type]
["9.986.000" "9.986.000" vchar-type ".,"]
["3.14" 3.14 float-type]
["3.14" 3.14 float-type "."]
["3.14" 3.14 float-type ".,"]
["3,14" 3.14 float-type ",."]
["3,14" 3.14 float-type ", "]
["(3.14)" -3.14 float-type]
["3.14" 3.14 float-type ".’"]
[".14" ".14" vchar-type ".,"] ;; TODO: this should be a float type
["0.14" 0.14 float-type ".,"]
["-9986.567" -9986.567 float-type ".,"]
["$2.0" 2 float-or-int-type ".,"]
["$ 3.50" 3.50 float-type ".,"]
["-4300.23€" -4300.23 float-type ".,"]
["£1,000.23" 1000.23 float-type]
["£1,000.23" 1000.23 float-type "."]
["£1,000.23" 1000.23 float-type ".,"]
["£1.000,23" 1000.23 float-type ",."]
["£1 000,23" 1000.23 float-type ", "]
["£1’000.23" 1000.23 float-type ".’"]
["-¥9.99" -9.99 float-type ".,"]
["₹ -13.23" -13.23 float-type ".,"]
["₪13.01" 13.01 float-type ".,"]
["₩13.33" 13.33 float-type ".,"]
["₿42.243646" 42.243646 float-type ".,"]
["-99.99¢" -99.99 float-type ".,"]
["." "." vchar-type]
;; String-related
[(apply str (repeat 255 "x")) (apply str (repeat 255 "x")) vchar-type]
[(apply str (repeat 256 "x")) (apply str (repeat 256 "x")) text-type]
["86 is my favorite number" "86 is my favorite number" vchar-type]
["My favorite number is 86" "My favorite number is 86" vchar-type]
;; Date-related
[" 2022-01-01 " #t "2022-01-01" date-type]
[" 2022-02-30 " " 2022-02-30 " vchar-type]
[" -2022-01-01 " #t "-2022-01-01" date-type]
[" Jan 1 2018" #t "2018-01-01" date-type]
[" Jan 02 2018" #t "2018-01-02" date-type]
[" Jan 30 -2018" #t "-2018-01-30" date-type]
[" Jan 1, 2018" #t "2018-01-01" date-type]
[" Jan 02, 2018" #t "2018-01-02" date-type]
[" Feb 30, 2018" " Feb 30, 2018" vchar-type]
[" 1 Jan 2018" #t "2018-01-01" date-type]
[" 02 Jan 2018" #t "2018-01-02" date-type]
[" 1 Jan, 2018" #t "2018-01-01" date-type]
[" 02 Jan, 2018" #t "2018-01-02" date-type]
[" January 1 2018" #t "2018-01-01" date-type]
[" January 02 2018" #t "2018-01-02" date-type]
[" January 1, 2018" #t "2018-01-01" date-type]
[" January 02, 2018" #t "2018-01-02" date-type]
[" 1 January 2018" #t "2018-01-01" date-type]
[" 02 January 2018" #t "2018-01-02" date-type]
[" 1 January, 2018" #t "2018-01-01" date-type]
[" 02 January, 2018" #t "2018-01-02" date-type]
[" Saturday, January 1 2000" #t "2000-01-01" date-type]
[" Sunday, January 02 2000" #t "2000-01-02" date-type]
[" Saturday, January 1, 2000" #t "2000-01-01" date-type]
[" Sunday, January 02, 2000" #t "2000-01-02" date-type]
[" 2022-01-01T01:00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01t01:00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01 01:00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01T01:00:00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01t01:00:00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01 01:00:00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01T01:00:00.00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01t01:00:00.00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01 01:00:00.00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01T01:00:00.000000000 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01t01:00:00.000000000 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01 01:00:00.000000000 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01T01:00:00.00-07 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01t01:00:00.00-07 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01 01:00:00.00-07 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01T01:00:00.00-07:00 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01t01:00:00.00-07:00 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01 01:00:00.00-07:00 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01T01:00:00.00-07:00 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01t01:00:00.00-07:00 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01 01:00:00.00-07:00 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01T01:00:00.00Z " (t/offset-date-time "2022-01-01T01:00+00:00") offset-dt-type]
[" 2022-01-01t01:00:00.00Z " (t/offset-date-time "2022-01-01T01:00+00:00") offset-dt-type]
[" 2022-01-01 01:00:00.00Z " (t/offset-date-time "2022-01-01T01:00+00:00") offset-dt-type]]]
(let [settings {:number-separators (or separators ".,")}
type->check (#'upload-types/settings->type->check settings)
value-type (#'upload-types/value->type type->check string-value)
;; get the type of the column, if we created it based on only that value
col-type (first (upload-types/column-types-from-rows settings nil [[string-value]]))
parser (upload-parsing/upload-type->parser col-type settings)]
(testing (format "\"%s\" is a %s" string-value value-type)
(is (= expected-type
value-type)))
(testing (format "\"%s\" is parsed into %s" string-value expected-value)
(is (= expected-value
(parser string-value)))))))
(deftest ^:parallel type-coalescing-test
(doseq [[type-a type-b expected]
[[bool-type bool-type bool-type]
[bool-type int-type vchar-type]
[bool-type bool-int-type bool-type]
[bool-type date-type vchar-type]
[bool-type datetime-type vchar-type]
[bool-type vchar-type vchar-type]
[bool-type text-type text-type]
[int-type bool-type vchar-type]
[int-type float-type float-type]
[int-type date-type vchar-type]
[int-type datetime-type vchar-type]
[int-type vchar-type vchar-type]
[int-type text-type text-type]
[int-type bool-int-type int-type]
[bool-int-type bool-int-type bool-int-type]
[float-type vchar-type vchar-type]
[float-type text-type text-type]
[float-type date-type vchar-type]
[float-type datetime-type vchar-type]
[float-type text-type text-type]
[float-type date-type vchar-type]
[float-type datetime-type vchar-type]
[date-type datetime-type datetime-type]
[date-type vchar-type vchar-type]
[date-type text-type text-type]
[datetime-type vchar-type vchar-type]
[offset-dt-type vchar-type vchar-type]
[datetime-type text-type text-type]
[offset-dt-type text-type text-type]
[vchar-type text-type text-type]]]
(is (= expected (ordered-hierarchy/first-common-ancestor upload-types/h type-a type-b))
(format "%s + %s = %s" (name type-a) (name type-b) (name expected)))))
(deftest ^:parallel coercion-soundness-test
(testing "Every coercion maps to a stricter type that is a direct descendant"
(is (empty?
;; Build a set of all type-pairs that violate this constraint
(into (sorted-set)
(comp (mapcat (fn [[column-type value-types]]
(for [value-type value-types]
[column-type value-type])))
(remove (fn [[column-type value-type]]
;; Strictly speaking we only require that there is a route which only traverses
;; through abstract nodes, but it's much simpler to enforce this stronger condition:
;; that `column-type` is a child of `value-type`
(contains? (ordered-hierarchy/children upload-types/h value-type) column-type))))
@#'upload-types/column-type->coercible-value-types)))))
(deftest ^:parallel initial-column-type-test
(let [column-type (partial upload-types/concretize nil)]
(testing "Unknown value types are treated as text"
(is (= ::upload-types/text (column-type nil))))
(testing "Non-abstract value types resolve to themselves"
(let [ts upload-types/column-types]
(is (= (zipmap ts ts)
(zipmap (map column-type ts) ts)))))
(testing "Abstract values are resolved using an explicit map, which is consistent with the hierarchy"
(doseq [[value-type expected-column-type] @#'upload-types/abstract->concrete]
(is (= expected-column-type (column-type value-type)))
;; Strictly speaking we only require that there is a route from each abstract node to its column type which only
;; traverses through abstract nodes, but it's much simpler to enforce this stronger condition.
(is (contains? (parents upload-types/h value-type) expected-column-type))))))
(deftest ^:parallel append-column-type-test
(doseq [existing-type upload-types/value-types
value-type upload-types/value-types]
(case [existing-type value-type]
[::upload-types/int ::upload-types/*float-or-int*]
(testing "We coerce floats with fractional part to plan integers when appending into an existing integer column"
(is (= ::upload-types/int (upload-types/concretize existing-type value-type))))
;; This is unsatisfying, would be good if this interface also covered promoting columns and rejecting values.
(testing (format "We append %s values to %s columns as if we were inserting them into new columns" existing-type value-type)
(is (= (upload-types/concretize nil value-type)
(upload-types/concretize existing-type value-type)))))))
(defn- re-namespace
"Replace the namespace in any namespaced keyword with this test namespace, return all other values unchanged."
[x]
(if (and (keyword? x) (namespace x))
(keyword (name (ns-name *ns*)) (name x))
x))
;; Translate the hierarchy to talk about this namespace, so that we can omit an explicit namespace for brevity.
(def ^:private h (walk/postwalk re-namespace upload-types/h))
(deftest ^:parallel parents-test
(testing "Parents are listed according to the order that this tag was derived from each of them"
(is (nil? (parents h ::text)))
(is (= [::text] (vec (parents h ::varchar-255))))
(is (= [::*float-or-int*] (vec (parents h ::int))))
(is (= [::boolean ::int] (vec (parents h ::*boolean-int*))))))
(deftest ^:parallel children-test
(testing "Children are listed in reverse order to when they were each derived from this tag"
(is (nil? (ordered-hierarchy/children h ::*boolean-int*)))
(is (= [::varchar-255] (vec (ordered-hierarchy/children h ::text))))
(is (= [::*float-or-int*] (vec (ordered-hierarchy/children h ::float))))
(is (= [::auto-incrementing-int-pk ::*boolean-int*] (vec (ordered-hierarchy/children h ::int))))))
(deftest ^:parallel ancestors-test
(testing "Linear ancestors are listed in order"
(is (nil? (ancestors h ::text)))
(is (= [::text] (vec (ancestors h ::varchar-255))))
(is (= [::varchar-255 ::text] (vec (ancestors h ::boolean))))
(is (= [::*float-or-int* ::float ::varchar-255 ::text] (vec (ancestors h ::int)))))
(testing "Non-linear ancestors are listed in topological order, following edges in the order they were defined."
(is (= [::boolean
::int
::*float-or-int*
::float
::varchar-255
::text]
(vec (ancestors h ::*boolean-int*))))))
(deftest ^:parallel descendants-test
(testing "Linear descendants are listed in order"
(is (nil? (descendants h ::*boolean-int*)))
(is (nil? (descendants h ::date)))
(is (= [::date] (vec (descendants h ::datetime))))
(is (= [::*boolean-int*] (vec (descendants h ::boolean)))))
(testing "Non-linear descendants are listed in reverse topological order, following edges in reserve order."
(is (= [::*float-or-int*
::int
::auto-incrementing-int-pk
::*boolean-int*]
(vec (descendants h ::float))))
(is (= [::varchar-255
::offset-datetime
::datetime
::date
::float
::*float-or-int*
::int
::auto-incrementing-int-pk
::boolean
::*boolean-int*]
(vec (descendants h ::text))))))
(deftest ^:parallel sorted-tags-test
(testing "Tags are returned in a topological ordering that also preserves insertion order of the edges."
(is (= [::*boolean-int*
::boolean
::auto-incrementing-int-pk
::int
::*float-or-int*
::float
::date
::datetime
::offset-datetime
::varchar-255
::text]
(vec (ordered-hierarchy/sorted-tags h))))))
(deftest ^:parallel first-common-ancestor-test
(testing "The first-common-ancestor is the first tag in the lineage of tag-a that is also in the lineage of tag-b"
(is (= ::*boolean-int* (ordered-hierarchy/first-common-ancestor h ::*boolean-int* nil)))
(is (= ::*boolean-int* (ordered-hierarchy/first-common-ancestor h ::*boolean-int* ::*boolean-int*)))
(is (= ::boolean (ordered-hierarchy/first-common-ancestor h ::*boolean-int* ::boolean)))
(is (= ::varchar-255 (ordered-hierarchy/first-common-ancestor h ::boolean ::int)))))
......@@ -7,7 +7,6 @@
[clojure.string :as str]
[clojure.test :refer :all]
[flatland.ordered.map :as ordered-map]
[java-time.api :as t]
[metabase.analytics.snowplow-test :as snowplow-test]
[metabase.driver :as driver]
[metabase.driver.ddl.interface :as ddl.i]
......@@ -23,25 +22,23 @@
[metabase.test.data.sql :as sql.tx]
[metabase.upload :as upload]
[metabase.upload.parsing :as upload-parsing]
[metabase.upload.types :as upload-types]
[metabase.util :as u]
[metabase.util.ordered-hierarchy :as ordered-hierarchy]
[toucan2.core :as t2])
(:import
(java.io File)))
(set! *warn-on-reflection* true)
(def ^:private bool-type ::upload/boolean)
(def ^:private int-type ::upload/int)
(def ^:private bool-int-type ::upload/*boolean-int*)
(def ^:private float-type ::upload/float)
(def ^:private float-or-int-type ::upload/*float-or-int*)
(def ^:private vchar-type ::upload/varchar-255)
(def ^:private date-type ::upload/date)
(def ^:private datetime-type ::upload/datetime)
(def ^:private offset-dt-type ::upload/offset-datetime)
(def ^:private text-type ::upload/text)
(def ^:private auto-pk-type ::upload/auto-incrementing-int-pk)
(def ^:private bool-type ::upload-types/boolean)
(def ^:private int-type ::upload-types/int)
(def ^:private float-type ::upload-types/float)
(def ^:private vchar-type ::upload-types/varchar-255)
(def ^:private date-type ::upload-types/date)
(def ^:private datetime-type ::upload-types/datetime)
(def ^:private offset-dt-type ::upload-types/offset-datetime)
(def ^:private text-type ::upload-types/text)
(def ^:private auto-pk-type ::upload-types/auto-incrementing-int-pk)
(defn- local-infile-on? []
(= "ON" (-> (sql-jdbc.conn/db->pooled-connection-spec (mt/db))
......@@ -115,189 +112,6 @@
(#'upload/scan-and-sync-table! database table))
(t2/select-one :model/Table (:id table))))
(deftest type-detection-and-parse-test
(doseq [[string-value expected-value expected-type seps]
;; Number-related
[["0.0" 0 float-or-int-type "."]
["0.0" 0 float-or-int-type ".,"]
["0,0" 0 float-or-int-type ",."]
["0,0" 0 float-or-int-type ", "]
["0.0" 0 float-or-int-type ".’"]
["-0.0" -0.0 float-or-int-type "."]
["-0.0" -0.0 float-or-int-type ".,"]
["-0,0" -0.0 float-or-int-type ",."]
["-0,0" -0.0 float-or-int-type ", "]
["-0.0" -0.0 float-or-int-type ".’"]
["(0.0)" -0.0 float-or-int-type "."]
["(0.0)" -0.0 float-or-int-type ".,"]
["(0,0)" -0.0 float-or-int-type ",."]
["(0,0)" -0.0 float-or-int-type ", "]
["(0.0)" -0.0 float-or-int-type ".’"]
["-4300.00€" -4300 float-or-int-type ".,"]
["£1,000.00" 1000 float-or-int-type]
["£1,000.00" 1000 float-or-int-type "."]
["£1,000.00" 1000 float-or-int-type ".,"]
["£1.000,00" 1000 float-or-int-type ",."]
["£1 000,00" 1000 float-or-int-type ", "]
["£1’000.00" 1000 float-or-int-type ".’"]
["$2" 2 int-type]
["$ 3" 3 int-type]
["-43€" -43 int-type]
["(86)" -86 int-type]
["($86)" -86 int-type]
["£1000" 1000 int-type]
["£1000" 1000 int-type "."]
["£1000" 1000 int-type ".,"]
["£1000" 1000 int-type ",."]
["£1000" 1000 int-type ", "]
["£1000" 1000 int-type ".’"]
["-¥9" -9 int-type]
["₹ -13" -13 int-type]
["₪13" 13 int-type]
["₩-13" -13 int-type]
["₿42" 42 int-type]
["-99¢" -99 int-type]
["2" 2 int-type]
["-86" -86 int-type]
["9,986,000" 9986000 int-type]
["9,986,000" 9986000 int-type "."]
["9,986,000" 9986000 int-type ".,"]
["9.986.000" 9986000 int-type ",."]
["9’986’000" 9986000 int-type ".’"]
["$0" 0 int-type]
["-1" -1 int-type]
["0" false bool-int-type]
["1" true bool-int-type]
["9.986.000" "9.986.000" vchar-type ".,"]
["3.14" 3.14 float-type]
["3.14" 3.14 float-type "."]
["3.14" 3.14 float-type ".,"]
["3,14" 3.14 float-type ",."]
["3,14" 3.14 float-type ", "]
["(3.14)" -3.14 float-type]
["3.14" 3.14 float-type ".’"]
[".14" ".14" vchar-type ".,"] ;; TODO: this should be a float type
["0.14" 0.14 float-type ".,"]
["-9986.567" -9986.567 float-type ".,"]
["$2.0" 2 float-or-int-type ".,"]
["$ 3.50" 3.50 float-type ".,"]
["-4300.23€" -4300.23 float-type ".,"]
["£1,000.23" 1000.23 float-type]
["£1,000.23" 1000.23 float-type "."]
["£1,000.23" 1000.23 float-type ".,"]
["£1.000,23" 1000.23 float-type ",."]
["£1 000,23" 1000.23 float-type ", "]
["£1’000.23" 1000.23 float-type ".’"]
["-¥9.99" -9.99 float-type ".,"]
["₹ -13.23" -13.23 float-type ".,"]
["₪13.01" 13.01 float-type ".,"]
["₩13.33" 13.33 float-type ".,"]
["₿42.243646" 42.243646 float-type ".,"]
["-99.99¢" -99.99 float-type ".,"]
["." "." vchar-type]
;; String-related
[(apply str (repeat 255 "x")) (apply str (repeat 255 "x")) vchar-type]
[(apply str (repeat 256 "x")) (apply str (repeat 256 "x")) text-type]
["86 is my favorite number" "86 is my favorite number" vchar-type]
["My favorite number is 86" "My favorite number is 86" vchar-type]
;; Date-related
[" 2022-01-01 " #t "2022-01-01" date-type]
[" 2022-02-30 " " 2022-02-30 " vchar-type]
[" -2022-01-01 " #t "-2022-01-01" date-type]
[" Jan 1 2018" #t "2018-01-01" date-type]
[" Jan 02 2018" #t "2018-01-02" date-type]
[" Jan 30 -2018" #t "-2018-01-30" date-type]
[" Jan 1, 2018" #t "2018-01-01" date-type]
[" Jan 02, 2018" #t "2018-01-02" date-type]
[" Feb 30, 2018" " Feb 30, 2018" vchar-type]
[" 1 Jan 2018" #t "2018-01-01" date-type]
[" 02 Jan 2018" #t "2018-01-02" date-type]
[" 1 Jan, 2018" #t "2018-01-01" date-type]
[" 02 Jan, 2018" #t "2018-01-02" date-type]
[" January 1 2018" #t "2018-01-01" date-type]
[" January 02 2018" #t "2018-01-02" date-type]
[" January 1, 2018" #t "2018-01-01" date-type]
[" January 02, 2018" #t "2018-01-02" date-type]
[" 1 January 2018" #t "2018-01-01" date-type]
[" 02 January 2018" #t "2018-01-02" date-type]
[" 1 January, 2018" #t "2018-01-01" date-type]
[" 02 January, 2018" #t "2018-01-02" date-type]
[" Saturday, January 1 2000" #t "2000-01-01" date-type]
[" Sunday, January 02 2000" #t "2000-01-02" date-type]
[" Saturday, January 1, 2000" #t "2000-01-01" date-type]
[" Sunday, January 02, 2000" #t "2000-01-02" date-type]
[" 2022-01-01T01:00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01t01:00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01 01:00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01T01:00:00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01t01:00:00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01 01:00:00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01T01:00:00.00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01t01:00:00.00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01 01:00:00.00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01T01:00:00.000000000 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01t01:00:00.000000000 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01 01:00:00.000000000 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01T01:00:00.00-07 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01t01:00:00.00-07 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01 01:00:00.00-07 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01T01:00:00.00-07:00 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01t01:00:00.00-07:00 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01 01:00:00.00-07:00 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01T01:00:00.00-07:00 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01t01:00:00.00-07:00 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01 01:00:00.00-07:00 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01T01:00:00.00Z " (t/offset-date-time "2022-01-01T01:00+00:00") offset-dt-type]
[" 2022-01-01t01:00:00.00Z " (t/offset-date-time "2022-01-01T01:00+00:00") offset-dt-type]
[" 2022-01-01 01:00:00.00Z " (t/offset-date-time "2022-01-01T01:00+00:00") offset-dt-type]]]
(let [settings {:number-separators (or seps ".,")}
type->check (#'upload/settings->type->check settings)
value-type (#'upload/value->type type->check string-value)
;; get the type of the column, if we created it based on only that value
col-type (first (upload/column-types-from-rows settings nil [[string-value]]))
parser (upload-parsing/upload-type->parser col-type settings)]
(testing (format "\"%s\" is a %s" string-value value-type)
(is (= expected-type
value-type)))
(testing (format "\"%s\" is parsed into %s" string-value expected-value)
(is (= expected-value
(parser string-value)))))))
(deftest ^:parallel type-coalescing-test
(doseq [[type-a type-b expected]
[[bool-type bool-type bool-type]
[bool-type int-type vchar-type]
[bool-type bool-int-type bool-type]
[bool-type date-type vchar-type]
[bool-type datetime-type vchar-type]
[bool-type vchar-type vchar-type]
[bool-type text-type text-type]
[int-type bool-type vchar-type]
[int-type float-type float-type]
[int-type date-type vchar-type]
[int-type datetime-type vchar-type]
[int-type vchar-type vchar-type]
[int-type text-type text-type]
[int-type bool-int-type int-type]
[bool-int-type bool-int-type bool-int-type]
[float-type vchar-type vchar-type]
[float-type text-type text-type]
[float-type date-type vchar-type]
[float-type datetime-type vchar-type]
[float-type text-type text-type]
[float-type date-type vchar-type]
[float-type datetime-type vchar-type]
[date-type datetime-type datetime-type]
[date-type vchar-type vchar-type]
[date-type text-type text-type]
[datetime-type vchar-type vchar-type]
[offset-dt-type vchar-type vchar-type]
[datetime-type text-type text-type]
[offset-dt-type text-type text-type]
[vchar-type text-type text-type]]]
(is (= expected (#'ordered-hierarchy/first-common-ancestor @#'upload/h type-a type-b))
(format "%s + %s = %s" (name type-a) (name type-b) (name expected)))))
(defn csv-file-with
"Create a temp csv file with the given content and return the file"
(^File [rows]
......@@ -1279,8 +1093,8 @@
:or {table-name (mt/random-name)
schema-name (sql.tx/session-schema driver/*driver*)
col->upload-type (ordered-map/ordered-map
upload/auto-pk-column-keyword ::upload/auto-incrementing-int-pk
:name ::upload/varchar-255)
upload/auto-pk-column-keyword auto-pk-type
:name vchar-type)
rows [["Obi-Wan Kenobi"]]}}]
(let [driver driver/*driver*
db-id (mt/id)
......@@ -1384,9 +1198,9 @@
["name,id" "Luke Skywalker,20" "Darth Vader,30"]]] ;; different order
(with-upload-table!
[table (create-upload-table! {:col->upload-type (ordered-map/ordered-map
:_mb_row_id ::upload/auto-incrementing-int-pk
:id ::upload/int
:name ::upload/varchar-255)
:_mb_row_id auto-pk-type
:id int-type
:name vchar-type)
:rows [[10 "Obi-Wan Kenobi"]]})]
(let [file (csv-file-with csv-rows (mt/random-name))]
(is (some? (append-csv! {:file file
......@@ -1435,8 +1249,8 @@
(with-upload-table!
[table (create-upload-table!
{:col->upload-type (ordered-map/ordered-map
:id ::upload/int
:name ::upload/varchar-255)
:id int-type
:name vchar-type)
:rows [[1,"some_text"]]})]
(let [file (csv-file-with csv-rows (mt/random-name))]
......@@ -1465,14 +1279,14 @@
(with-upload-table!
[table (create-upload-table!
{:col->upload-type (ordered-map/ordered-map
:_mb_row_id ::upload/auto-incrementing-int-pk
:biginteger ::upload/int
:float ::upload/float
:text ::upload/varchar-255
:boolean ::upload/boolean
:date ::upload/date
:datetime ::upload/datetime
:offset_datetime ::upload/offset-datetime)
:_mb_row_id auto-pk-type
:biginteger int-type
:float float-type
:text vchar-type
:boolean bool-type
:date date-type
:datetime datetime-type
:offset_datetime offset-dt-type)
:rows [[1000000,1.0,"some_text",false,#t "2020-01-01",#t "2020-01-01T00:00:00",#t "2020-01-01T00:00:00"]]})]
(let [csv-rows ["biginteger,float,text,boolean,date,datetime,offset_datetime"
"2000000,2.0,some_text,true,2020-02-02,2020-02-02T02:02:02,2020-02-02T02:02:02+02:00"]
......@@ -1506,7 +1320,7 @@
(testing "If the table doesn't have _mb_row_id but the CSV does, ignore the CSV _mb_row_id but create the column anyway"
(with-upload-table!
[table (create-upload-table! {:col->upload-type (ordered-map/ordered-map
:name ::upload/varchar-255)
:name vchar-type)
:rows [["Obi-Wan Kenobi"]]})]
(let [csv-rows ["_MB-row ID,name" "1000,Luke Skywalker"]
file (csv-file-with csv-rows (mt/random-name))]
......@@ -1546,7 +1360,7 @@
(testing "If the table doesn't have _mb_row_id and a failure occurs, we shouldn't create a _mb_row_id column"
(with-upload-table!
[table (create-upload-table! {:col->upload-type (ordered-map/ordered-map
:bool_column ::upload/boolean)
:bool_column bool-type)
:rows [[true]]})]
(let [csv-rows ["bool_column" "not a bool"]
file (csv-file-with csv-rows (mt/random-name))
......@@ -1691,9 +1505,9 @@
(testing "Append should handle the columns in the CSV file being reordered"
(with-upload-table! [table (create-upload-table!
:col->upload-type (ordered-map/ordered-map
upload/auto-pk-column-keyword ::upload/auto-incrementing-int-pk
:name ::upload/varchar-255
:shame ::upload/varchar-255)
upload/auto-pk-column-keyword auto-pk-type
:name vchar-type
:shame vchar-type)
:rows [["Obi-Wan Kenobi" "No one really knows me"]])]
(let [csv-rows ["shame,name" "Nothing - you can't prove it,Puke Nightstalker"]
......@@ -1731,27 +1545,27 @@
(doseq [auto-pk-column? [true false]]
(testing (str "\nFor a table that has " (if auto-pk-column? "an" " no") " automatically generated PK already")
(doseq [{:keys [upload-type valid invalid msg]}
[{:upload-type ::upload/int
[{:upload-type int-type
:valid 1
:invalid "not an int"
:msg "'not an int' is not a recognizable number"}
{:upload-type ::upload/float
{:upload-type float-type
:valid 1.1
:invalid "not a float"
:msg "'not a float' is not a recognizable number"}
{:upload-type ::upload/boolean
{:upload-type bool-type
:valid true
:invalid "correct"
:msg "'correct' is not a recognizable boolean"}
{:upload-type ::upload/date
{:upload-type date-type
:valid #t "2000-01-01"
:invalid "2023-01-01T00:00:00"
:msg "'2023-01-01T00:00:00' is not a recognizable date"}
{:upload-type ::upload/datetime
{:upload-type datetime-type
:valid #t "2000-01-01T00:00:00"
:invalid "2023-01-01T00:00:00+01"
:msg "'2023-01-01T00:00:00+01' is not a recognizable datetime"}
{:upload-type ::upload/offset-datetime
{:upload-type offset-dt-type
:valid #t "2000-01-01T00:00:00+01"
:invalid "2023-01-01T00:00:00[Europe/Helsinki]"
:msg "'2023-01-01T00:00:00[Europe/Helsinki]' is not a recognizable zoned datetime"}]]
......@@ -1760,9 +1574,9 @@
[table (create-upload-table!
{:col->upload-type (cond-> (ordered-map/ordered-map
:test_column upload-type
:name ::upload/varchar-255)
:name vchar-type)
auto-pk-column?
(assoc upload/auto-pk-column-keyword ::upload/auto-incrementing-int-pk))
(assoc upload/auto-pk-column-keyword auto-pk-type))
:rows [[valid "Obi-Wan Kenobi"]]})]
(let [;; The CSV contains 50 valid rows and 1 invalid row
csv-rows `["test_column,name" ~@(repeat 50 (str valid ",Darth Vadar")) ~(str invalid ",Luke Skywalker")]
......@@ -1785,7 +1599,9 @@
(mt/test-drivers (filter (fn [driver]
;; use of varchar(255) is not universal for all drivers, so only test drivers that
;; have different database types for varchar(255) and text
(apply not= (map (partial driver/upload-type->database-type driver) [::upload/varchar-255 ::upload/text])))
(apply not= (->> [vchar-type text-type]
(map #(keyword "metabase.upload" (name %)))
(map (partial driver/upload-type->database-type driver)))))
(mt/normal-drivers-with-feature :uploads))
(with-mysql-local-infile-off
(testing "Append fails if the CSV file contains string values that are too long for the column"
......@@ -1794,8 +1610,8 @@
(binding [driver/*insert-chunk-rows* 1]
(with-upload-table!
[table (create-upload-table! {:col->upload-type (ordered-map/ordered-map
upload/auto-pk-column-keyword ::upload/auto-incrementing-int-pk
:test_column ::upload/varchar-255)
upload/auto-pk-column-keyword auto-pk-type
:test_column vchar-type)
:rows [["valid"]]})]
(let [csv-rows `["test_column" ~@(repeat 50 "valid too") ~(apply str (repeat 256 "x"))]
file (csv-file-with csv-rows (mt/random-name))]
......@@ -1816,14 +1632,14 @@
;; for drivers that insert rows in chunks, we change the chunk size to 1 so that we can test that the
;; inserted rows are rolled back
(binding [driver/*insert-chunk-rows* 1]
(let [upload-type ::upload/varchar-255,
(let [upload-type vchar-type,
uncoerced (apply str (repeat 256 "x"))
coerced (apply str (repeat 255 "x"))]
(testing (format "\nUploading %s into a column of type %s should be coerced to %s"
uncoerced (name upload-type) coerced)
(with-upload-table!
[table (create-upload-table! {:col->upload-type (ordered-map/ordered-map
upload/auto-pk-column-keyword ::upload/auto-incrementing-int-pk
upload/auto-pk-column-keyword auto-pk-type
:test_column upload-type)
:rows []})]
(let [csv-rows ["test_column" uncoerced]
......@@ -1845,16 +1661,16 @@
;; inserted rows are rolled back
(binding [driver/*insert-chunk-rows* 1]
(doseq [{:keys [upload-type uncoerced coerced fail-msg] :as args}
[{:upload-type ::upload/int, :uncoerced "2.0", :coerced 2} ;; value is coerced to int
{:upload-type ::upload/int, :uncoerced "2.1", :coerced 2.1} ;; column is promoted to float
{:upload-type ::upload/float, :uncoerced "2", :coerced 2.0}
{:upload-type ::upload/boolean, :uncoerced "0", :coerced false}
{:upload-type ::upload/boolean, :uncoerced "1.0", :fail-msg "'1.0' is not a recognizable boolean"}
{:upload-type ::upload/boolean, :uncoerced "0.0", :fail-msg "'0.0' is not a recognizable boolean"}
{:upload-type ::upload/int, :uncoerced "01/01/2012", :fail-msg "'01/01/2012' is not a recognizable number"}]]
[{:upload-type int-type, :uncoerced "2.0", :coerced 2} ;; value is coerced to int
{:upload-type int-type, :uncoerced "2.1", :coerced 2.1} ;; column is promoted to float
{:upload-type float-type, :uncoerced "2", :coerced 2.0}
{:upload-type bool-type, :uncoerced "0", :coerced false}
{:upload-type bool-type, :uncoerced "1.0", :fail-msg "'1.0' is not a recognizable boolean"}
{:upload-type bool-type, :uncoerced "0.0", :fail-msg "'0.0' is not a recognizable boolean"}
{:upload-type int-type, :uncoerced "01/01/2012", :fail-msg "'01/01/2012' is not a recognizable number"}]]
(with-upload-table!
[table (create-upload-table! {:col->upload-type (ordered-map/ordered-map
upload/auto-pk-column-keyword ::upload/auto-incrementing-int-pk
upload/auto-pk-column-keyword auto-pk-type
:test_column upload-type)
:rows []})]
(let [csv-rows ["test_column" uncoerced]
......@@ -1878,36 +1694,6 @@
(append!)))))
(io/delete-file file)))))))))
(def ^:private concretize @#'upload/concretize)
(deftest initial-column-type-test
(let [column-type (partial concretize nil)]
(testing "Unknown value types are treated as text"
(is (= ::upload/text (column-type nil))))
(testing "Non-abstract value types resolve to themselves"
(let [ts @#'upload/column-types]
(is (= (zipmap ts ts)
(zipmap (map column-type ts) ts)))))
(testing "Abstract values are resolved using an explicit map, which is consistent with the hierarchy"
(doseq [[value-type expected-column-type] @#'upload/abstract->concrete]
(is (= expected-column-type (column-type value-type)))
;; Strictly speaking we only require that there is a route from each abstract node to its column type which only
;; traverses through abstract nodes, but it's much simpler to enforce this stronger condition.
(is (contains? (parents @#'upload/h value-type) expected-column-type))))))
(deftest append-column-type-test
(doseq [existing-type @#'upload/value-types
value-type @#'upload/value-types]
(case [existing-type value-type]
[::upload/int ::upload/*float-or-int*]
(testing "We coerce floats with fractional part to plan integers when appending into an existing integer column"
(is (= ::upload/int (concretize existing-type value-type))))
;; This is unsatisfying, would be good if this interface also covered promoting columns and rejecting values.
(testing (format "We append %s values to %s columns as if we were inserting them into new columns" existing-type value-type)
(is (= (concretize nil value-type)
(concretize existing-type value-type)))))))
(deftest create-from-csv-int-and-float-test
(testing "Creation should handle a mix of int and float-or-int values in any order"
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
......@@ -1951,9 +1737,9 @@
(testing "Append should handle a mix of int and float-or-int values being appended to an int column"
(with-upload-table! [table (create-upload-table!
:col->upload-type (ordered-map/ordered-map
:_mb_row_id ::upload/auto-incrementing-int-pk
:number_1 ::upload/int
:number_2 ::upload/int)
:_mb_row_id auto-pk-type
:number_1 int-type
:number_2 int-type)
:rows [[1, 1]])]
(let [csv-rows ["number-1, number-2"
......@@ -1973,9 +1759,9 @@
(testing "Append should handle a mix of int and float-or-int values being appended to an int column"
(with-upload-table! [table (create-upload-table!
:col->upload-type (ordered-map/ordered-map
:_mb_row_id ::upload/auto-incrementing-int-pk
:number_1 ::upload/int
:number_2 ::upload/int)
:_mb_row_id auto-pk-type
:number_1 int-type
:number_2 int-type)
:rows [[1, 1]])]
(let [csv-rows ["number-1, number-2"
......@@ -1991,18 +1777,3 @@
table-rows)))
(io/delete-file file))))))
(deftest coercion-soundness-test
(testing "Every coercion maps to a stricter type that is a direct descendant"
(is (empty?
;; Build a set of all type-pairs that violate this constraint
(into (sorted-set)
(comp (mapcat (fn [[column-type value-types]]
(for [value-type value-types]
[column-type value-type])))
(remove (fn [[column-type value-type]]
;; Strictly speaking we only require that there is a route which only traverses
;; through abstract nodes, but it's much simpler to enforce this stronger condition:
;; that `column-type` is a child of `value-type`
(contains? (ordered-hierarchy/children @#'upload/h value-type) column-type))))
@#'upload/column-type->coercible-value-types)))))
(ns metabase.util.ordered-hierarchy-test
(:require
[clojure.test :refer [deftest is testing]]
[clojure.walk :as walk]
[metabase.upload :as upload]
[metabase.util.ordered-hierarchy :as ordered-hierarchy]))
;;; It would be nice to have property tests, to expose any subtle edge cases.
;;; For now, we use a translation of some real world usage in the app.
(def ^:private h
(walk/postwalk
(fn [x]
(if (and (keyword? x) (namespace x))
(keyword (name (ns-name *ns*)) (name x))
x))
@#'upload/h))
(deftest ^:parallel parents-test
(testing "Parents are listed according to the order that this tag was derived from each of them"
(is (nil? (parents h ::text)))
(is (= [::text] (vec (parents h ::varchar-255))))
(is (= [::*float-or-int*] (vec (parents h ::int))))
(is (= [::boolean ::int] (vec (parents h ::*boolean-int*))))))
(deftest ^:parallel children-test
(testing "Children are listed in reverse order to when they were each derived from this tag"
(is (nil? (ordered-hierarchy/children h ::*boolean-int*)))
(is (= [::varchar-255] (vec (ordered-hierarchy/children h ::text))))
(is (= [::*float-or-int*] (vec (ordered-hierarchy/children h ::float))))
(is (= [::auto-incrementing-int-pk ::*boolean-int*] (vec (ordered-hierarchy/children h ::int))))))
(deftest ^:parallel ancestors-test
(testing "Linear ancestors are listed in order"
(is (nil? (ancestors h ::text)))
(is (= [::text] (vec (ancestors h ::varchar-255))))
(is (= [::varchar-255 ::text] (vec (ancestors h ::boolean))))
(is (= [::*float-or-int* ::float ::varchar-255 ::text] (vec (ancestors h ::int)))))
(testing "Non-linear ancestors are listed in topological order, following edges in the order they were defined."
(is (= [::boolean
::int
::*float-or-int*
::float
::varchar-255
::text]
(vec (ancestors h ::*boolean-int*))))))
(deftest ^:parallel descendants-test
(testing "Linear descendants are listed in order"
(is (nil? (descendants h ::*boolean-int*)))
(is (nil? (descendants h ::date)))
(is (= [::date] (vec (descendants h ::datetime))))
(is (= [::*boolean-int*] (vec (descendants h ::boolean)))))
(testing "Non-linear descendants are listed in reverse topological order, following edges in reserve order."
(is (= [::*float-or-int*
::int
::auto-incrementing-int-pk
::*boolean-int*]
(vec (descendants h ::float))))
(is (= [::varchar-255
::offset-datetime
::datetime
::date
::float
::*float-or-int*
::int
::auto-incrementing-int-pk
::boolean
::*boolean-int*]
(vec (descendants h ::text))))))
(deftest ^:parallel sorted-tags-test
(testing "Tags are returned in a topological ordering that also preserves insertion order of the edges."
(is (= [::*boolean-int*
::boolean
::auto-incrementing-int-pk
::int
::*float-or-int*
::float
::date
::datetime
::offset-datetime
::varchar-255
::text]
(vec (ordered-hierarchy/sorted-tags h))))))
(deftest ^:parallel first-common-ancestor-test
(testing "The first-common-ancestor is the first tag in the lineage of tag-a that is also in the lineage of tag-b"
(is (= ::*boolean-int* (ordered-hierarchy/first-common-ancestor h ::*boolean-int* nil)))
(is (= ::*boolean-int* (ordered-hierarchy/first-common-ancestor h ::*boolean-int* ::*boolean-int*)))
(is (= ::boolean (ordered-hierarchy/first-common-ancestor h ::*boolean-int* ::boolean)))
(is (= ::varchar-255 (ordered-hierarchy/first-common-ancestor h ::boolean ::int)))))
(def ^:private polygons
(ordered-hierarchy/make-hierarchy
[:quadrilateral
[:trapezoid :isosceles-trapezoid :right-trapezoid]
[:kite [:rhombus :square]]
[:parallelogram
:rhombus
[:rectangle :square]]]
[:triangle
:scalene-triangle
[:isosceles-triangle :equilateral-triangle]
[:acute-triangle :equilateral-triangle]
:right-angled-triangle
:obtuse-triangle]))
(deftest make-hierarchy-test
(testing "Hiccup structures have the expected topological order"
(is (= [:isosceles-trapezoid
:right-trapezoid
:trapezoid
:square
:rhombus
:kite
:rectangle
:parallelogram
:quadrilateral
;; it's unfortunate that we would exhaustively test all the quadrilateral types, before checking
;; if it's a triangle (if "hypothetically" we were using the topological order to test a value
;; ... this is a case where a root-to-leaf traversal would make more sense.
:scalene-triangle
:equilateral-triangle
:isosceles-triangle
:acute-triangle
:right-angled-triangle
:obtuse-triangle
:triangle]
(vec (ordered-hierarchy/sorted-tags polygons)))))
(testing "Hiccup structures are translated into the expected graph structure"
(is (= {:trapezoid [:quadrilateral]
:isosceles-trapezoid [:trapezoid]
:right-trapezoid [:trapezoid]
:kite [:quadrilateral]
:rhombus [:kite :parallelogram]
:square [:rhombus :rectangle]
:parallelogram [:quadrilateral]
:rectangle [:parallelogram]
:scalene-triangle [:triangle]
:isosceles-triangle [:triangle]
:equilateral-triangle [:isosceles-triangle :acute-triangle]
:acute-triangle [:triangle]
:right-angled-triangle [:triangle]
:obtuse-triangle [:triangle]}
(update-vals (:parents polygons) vec)))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment