Skip to content
Snippets Groups Projects
Unverified Commit 862daf33 authored by Chris Truter's avatar Chris Truter Committed by GitHub
Browse files

Break out upload.types namespace (#40741)

parent 2d281d2c
No related branches found
No related tags found
No related merge requests found
(ns metabase.upload (ns metabase.upload
(:refer-clojure :exclude [derive make-hierarchy parents])
(:require (:require
[clj-bom.core :as bom] [clj-bom.core :as bom]
[clojure.data :as data] [clojure.data :as data]
...@@ -29,265 +28,17 @@ ...@@ -29,265 +28,17 @@
[metabase.sync.sync-metadata.fields :as sync-fields] [metabase.sync.sync-metadata.fields :as sync-fields]
[metabase.sync.sync-metadata.tables :as sync-tables] [metabase.sync.sync-metadata.tables :as sync-tables]
[metabase.upload.parsing :as upload-parsing] [metabase.upload.parsing :as upload-parsing]
[metabase.upload.types :as upload-types]
[metabase.util :as u] [metabase.util :as u]
[metabase.util.i18n :refer [tru]] [metabase.util.i18n :refer [tru]]
[metabase.util.malli :as mu] [metabase.util.malli :as mu]
[metabase.util.malli.schema :as ms] [metabase.util.malli.schema :as ms]
[metabase.util.ordered-hierarchy :as ordered-hierarchy :refer [make-hierarchy]]
[toucan2.core :as t2]) [toucan2.core :as t2])
(:import (:import
(java.io File))) (java.io File)))
(set! *warn-on-reflection* true) (set! *warn-on-reflection* true)
;;;; +------------------+
;;;; | Schema detection |
;;;; +------------------+
;; Upload value-types form a DAG (directed acyclic graph) where each type can be relaxed into any of its ancestors.
;; We parse each value in the CSV file to the most-specific possible type for each column.
;; The most-specific possible type for a column is the closest common ancestor of the types for each value in the
;; column, found by walking through the graph in topological order, following edges from left to right.
;; Note that this type is not guaranteed to be one of the least common ancestors!
;;
;; See [[metabase.util.ordered-hierarchy/first-common-ancestor]] for more details.
;;
;; <pre><code>
;;
;; text
;; |
;; |
;; varchar-255──────
;; / / \ \
;; / / \ \
;; boolean float datetime offset-datetime
;; | │ │
;; | │ │
;; │ *float-or-int* │
;; │ │ │
;; │ │ │
;; | int date
;; | / \
;; | / \
;; *boolean-int* auto-incrementing-int-pk
;;
;; </code></pre>
;;
;; We have a number of special "abstract" nodes in this graph:
;;
;; - `*boolean-int*` is an ambiguous node, that could either be parsed as a boolean or as an integer.
;; - `*float-or-int*` is any integer, whether it has an explicit decimal point or not.
;;
;; While a `*boolean-int*` is a genuinely ambiguous value, `*float-or-int*` exist to power our desired value-type
;; coercion and column-type promotion behaviour.
;;
;; - If we encounter a `*float-or-int*` inside an `int` column, then we can safely coerce it down to an integer.
;; - If we encounter a `float` (i.e. a non-zero fraction component), then we need to promote the column to a `float.`
;;
;; Columns can not have an abstract type, which has no meaning outside of inference and reconciliation.
;; If we are left with an abstract type after having processed all the values, we first check whether we can coerce
;; the type to the existing column type, and otherwise traverse further up the graph until we reach a concrete type.
;;
;; For ease of reference and explicitness these corresponding values are given in the `abstract->concrete` map.
;; One can figure out these mappings by simply looking up through the ancestors. For now, we require that it is always
;; a direct ancestor, and lay out or graph so that it is the left-most one.
(def ^:private h
"This hierarchy defines a relationship between value types and their specializations.
We use an [[metabase.util.ordered-hierarchy]] for its topological sorting, which simplify writing efficient and
consistent implementations for of our type inference, parsing, and relaxation."
(make-hierarchy
[::text
[::varchar-255
[::boolean ::*boolean-int*]
[::float
;; A number value with a decimal separator, but a zero fractional component.
[::*float-or-int*
[::int
;; A value that could be legally parsed as either a boolean OR an integer
::*boolean-int*
::auto-incrementing-int-pk]]]
[::datetime ::date]
::offset-datetime]]))
(def ^:private abstract->concrete
"Not all value types correspond to column types. We refer to these as \"abstract\" types, and give them *ear-muffs*.
This maps implicitly defines the abstract types, by mapping them each to a default concretion."
{::*boolean-int* ::boolean
::*float-or-int* ::float})
(def ^:private allowed-promotions
"A mapping of which types a column can be implicitly relaxed to, based on the content of appended values.
If we require a relaxation which is not allow-listed here, we will reject the corresponding file."
{::int #{::float}})
(def ^:private column-type->coercible-value-types
"A mapping of which value types should be coerced to the given existing type, rather than triggering promotion."
{::int #{::*float-or-int*}})
(defn- coerce?
"Can values of the given type be coerced to the given existing column type, in a lossless fashion?"
[column-type value-type]
(contains? (column-type->coercible-value-types column-type) value-type))
(def ^:private value-types
"All type tags which values can be inferred as. An ordered set from most to least specialized."
(ordered-hierarchy/sorted-tags h))
(def ^:private column-types
"All type tags that correspond to concrete column types."
(into #{} (remove abstract->concrete) value-types))
(defn- column-type?
[value-type]
(contains? column-types value-type))
(defn ^:private concretize
"Determine the desired column-type given the existing column-type (nil if it's new) and the value-type of the data.
If there's a valid coercion to the existing type, we will preserve it, but otherwise we will relax abstract types
further to a concrete type."
[existing-type value-type]
(cond
;; If the type is concrete, there is nothing to do.
(column-type? value-type) value-type
;; If we know nothing about the value type, treat it as an arbitrary string.
(nil? value-type) ::text
;; If configured, coerce the value to the existing type
(coerce? existing-type value-type) existing-type
;; Otherwise, project it to its canonical concretion.
:else (abstract->concrete value-type)))
;;;;;;;;;;;;;;;;;;;;;;;;;;
;; [[value->type]] helpers
(defn- with-parens
"Returns a regex that matches the argument, with or without surrounding parentheses."
[number-regex]
(re-pattern (str "(" number-regex ")|(\\(" number-regex "\\))")))
(defn- with-currency
"Returns a regex that matches a positive or negative number, including currency symbols"
[number-regex]
;; currency signs can be all over: $2, -$2, $-2, 2€
(re-pattern (str upload-parsing/currency-regex "?\\s*-?"
upload-parsing/currency-regex "?"
number-regex
"\\s*" upload-parsing/currency-regex "?")))
(defn- int-regex
"Matches numbers which do not have a decimal separator."
[number-separators]
(with-parens
(with-currency
(case number-separators
("." ".,") #"\d[\d,]*"
",." #"\d[\d.]*"
", " #"\d[\d \u00A0]*"
".’" #"\d[\d’]*"))))
(defn- float-or-int-regex
"Matches integral numbers, even if they have a decimal separator - e.g. 2 or 2.0"
[number-separators]
(with-parens
(with-currency
(case number-separators
("." ".,") #"\d[\d,]*(\.0+)?"
",." #"\d[\d.]*(\,[0]+)?"
", " #"\d[\d \u00A0]*(\,[0.]+)?"
".’" #"\d[\d’]*(\.[0.]+)?"))))
(defn- float-regex
"Matches numbers, regardless of whether they have a decimal separator - e.g. 2, 2.0, or 2.2"
[number-separators]
(with-parens
(with-currency
(case number-separators
("." ".,") #"\d[\d,]*(\.\d+)?"
",." #"\d[\d.]*(\,[\d]+)?"
", " #"\d[\d \u00A0]*(\,[\d.]+)?"
".’" #"\d[\d’]*(\.[\d.]+)?"))))
(defmacro does-not-throw?
"Returns true if the given body does not throw an exception."
[body]
`(try
~body
true
(catch Throwable e#
false)))
(defn- date-string? [s]
(does-not-throw? (upload-parsing/parse-local-date s)))
(defn- datetime-string? [s]
(does-not-throw? (upload-parsing/parse-local-datetime s)))
(defn- offset-datetime-string? [s]
(does-not-throw? (upload-parsing/parse-offset-datetime s)))
(defn- boolean-string? [s]
(boolean (re-matches #"(?i)true|t|yes|y|1|false|f|no|n|0" s)))
(defn- boolean-int-string? [s]
(contains? #{"0" "1"} s))
(defn- varchar-255? [s]
(<= (count s) 255))
(defn- regex-matcher [regex]
(fn [s]
(boolean (re-matches regex s))))
;; end [[value->type]] helpers
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(def ^:private non-inferable-types
#{::auto-incrementing-int-pk})
(def ^:private type->check-schema
"Every inferable value-type needs to have a detection function registered."
(into [:map] (map #(vector % [:=> [:cat :string] :boolean])
(remove non-inferable-types value-types))))
(mu/defn ^:private settings->type->check :- type->check-schema
[{:keys [number-separators] :as _settings}]
(let [int-string? (regex-matcher (int-regex number-separators))
float-or-int? (regex-matcher (float-or-int-regex number-separators))
float-string? (regex-matcher (float-regex number-separators))]
{::*boolean-int* boolean-int-string?
::boolean boolean-string?
::offset-datetime offset-datetime-string?
::date date-string?
::datetime datetime-string?
::int int-string?
::*float-or-int* float-or-int?
::float float-string?
::varchar-255 varchar-255?
::text (constantly true)}))
(defn- value->type
"Determine the most specific type that is compatible with the given value.
Numbers are assumed to use separators corresponding to the locale defined in the application settings"
[type->check value]
(when-not (str/blank? value)
(let [trimmed (str/trim value)]
(->> (remove non-inferable-types value-types)
(filter #((type->check %) trimmed))
first))))
(defn- relax-type
"Given an existing column type, and a new value, relax the type until it includes the value."
[type->check current-type value]
(cond (nil? value) current-type
(nil? current-type) (value->type type->check value)
:else (let [trimmed (str/trim value)]
(if (str/blank? trimmed)
current-type
(->> (cons current-type (ancestors h current-type))
(filter #((type->check %) trimmed))
first)))))
(defn- normalize-column-name (defn- normalize-column-name
[raw-name] [raw-name]
(if (str/blank? raw-name) (if (str/blank? raw-name)
...@@ -307,20 +58,6 @@ ...@@ -307,20 +58,6 @@
(= (normalize-column-name (:name field)) auto-pk-column-name)) (= (normalize-column-name (:name field)) auto-pk-column-name))
(t2/select :model/Field :table_id table-id :active true)))) (t2/select :model/Field :table_id table-id :active true))))
(defn- type-relaxer
"Given a map of {value-type -> predicate}, return a reducing fn which updates our inferred schema using the next row."
[settings]
(let [relax (partial relax-type (settings->type->check settings))]
(fn [value-types row]
;; It's important to realize this lazy sequence, because otherwise we can build a huge stack and overflow.
(vec (u/map-all relax value-types row)))))
(mu/defn column-types-from-rows :- [:sequential (into [:enum] column-types)]
"Given the types of the existing columns (if there are any), and rows to be added, infer the best supporting types."
[settings existing-types rows]
(->> (reduce (type-relaxer settings) existing-types rows)
(u/map-all concretize existing-types)))
(defn- detect-schema (defn- detect-schema
"Consumes the header and rows from a CSV file. "Consumes the header and rows from a CSV file.
...@@ -336,11 +73,10 @@ ...@@ -336,11 +73,10 @@
unique-header (map keyword (mbql.u/uniquify-names normalized-header)) unique-header (map keyword (mbql.u/uniquify-names normalized-header))
column-count (count normalized-header) column-count (count normalized-header)
initial-types (repeat column-count nil) initial-types (repeat column-count nil)
col-name+type-pairs (->> (column-types-from-rows settings initial-types rows) col-name+type-pairs (->> (upload-types/column-types-from-rows settings initial-types rows)
(map vector unique-header))] (map vector unique-header))]
{:extant-columns (ordered-map/ordered-map col-name+type-pairs) {:extant-columns (ordered-map/ordered-map col-name+type-pairs)
:generated-columns (ordered-map/ordered-map auto-pk-column-keyword ::auto-incrementing-int-pk)})) :generated-columns (ordered-map/ordered-map auto-pk-column-keyword ::upload-types/auto-incrementing-int-pk)}))
;;;; +------------------+ ;;;; +------------------+
;;;; | Parsing values | ;;;; | Parsing values |
...@@ -372,10 +108,16 @@ ...@@ -372,10 +108,16 @@
(str truncated-name-without-time (str truncated-name-without-time
(t/format time-format (strictly-monotonic-now))))) (t/format time-format (strictly-monotonic-now)))))
(mu/defn ^:private database-type
[driver
column-type :- (into [:enum] upload-types/column-types)]
(let [external-type (keyword "metabase.upload" (name column-type))]
(driver/upload-type->database-type driver external-type)))
(defn- column-definitions (defn- column-definitions
"Returns a map of column-name -> column-definition from a map of column-name -> upload-type." "Returns a map of column-name -> column-definition from a map of column-name -> upload-type."
[driver col->upload-type] [driver col->upload-type]
(update-vals col->upload-type (partial driver/upload-type->database-type driver))) (update-vals col->upload-type (partial database-type driver)))
(defn current-database (defn current-database
"The database being used for uploads (as per the `uploads-database-id` setting)." "The database being used for uploads (as per the `uploads-database-id` setting)."
...@@ -685,20 +427,6 @@ ...@@ -685,20 +427,6 @@
;;; | appending to uploaded table ;;; | appending to uploaded table
;;; +----------------------------- ;;; +-----------------------------
(defn- base-type->upload-type
"Returns the most specific upload type for the given base type."
[base-type]
(when base-type
(condp #(isa? %2 %1) base-type
:type/Float ::float
:type/BigInteger ::int
:type/Integer ::int
:type/Boolean ::boolean
:type/DateTimeWithTZ ::offset-datetime
:type/DateTime ::datetime
:type/Date ::date
:type/Text ::text)))
(defn- not-blank [s] (defn- not-blank [s]
(when-not (str/blank? s) (when-not (str/blank? s)
s)) s))
...@@ -734,12 +462,6 @@ ...@@ -734,12 +462,6 @@
(when-let [error-message (extra-and-missing-error-markdown extra missing)] (when-let [error-message (extra-and-missing-error-markdown extra missing)]
(throw (ex-info error-message {:status-code 422}))))) (throw (ex-info error-message {:status-code 422})))))
(defn- matching-or-promotable? [current-type relaxed-type]
(or (nil? current-type)
(= current-type relaxed-type)
(when-let [f (allowed-promotions current-type)]
(f relaxed-type))))
(defn- field-changes (defn- field-changes
"Given existing and newly inferred types for the given `field-names`, calculate which fields need to be added or updated, along with their new types." "Given existing and newly inferred types for the given `field-names`, calculate which fields need to be added or updated, along with their new types."
[field-names existing-types new-types] [field-names existing-types new-types]
...@@ -756,7 +478,7 @@ ...@@ -756,7 +478,7 @@
(m/map-kv (m/map-kv
(fn [field-name col-type] (fn [field-name col-type]
[(keyword field-name) [(keyword field-name)
(driver/upload-type->database-type driver col-type)]) (database-type driver col-type)])
field->col-type)) field->col-type))
(defn- add-columns! [driver database table field->type & args] (defn- add-columns! [driver database table field->type & args]
...@@ -786,12 +508,12 @@ ...@@ -786,12 +508,12 @@
(not (contains? normed-name->field auto-pk-column-name))) (not (contains? normed-name->field auto-pk-column-name)))
_ (check-schema (dissoc normed-name->field auto-pk-column-name) header) _ (check-schema (dissoc normed-name->field auto-pk-column-name) header)
settings (upload-parsing/get-settings) settings (upload-parsing/get-settings)
old-types (map (comp base-type->upload-type :base_type normed-name->field) normed-header) old-types (map (comp upload-types/base-type->upload-type :base_type normed-name->field) normed-header)
;; in the happy, and most common, case all the values will match the existing types ;; in the happy, and most common, case all the values will match the existing types
;; for now we just plan for the worst and perform a fairly expensive operation to detect any type changes ;; for now we just plan for the worst and perform a fairly expensive operation to detect any type changes
;; we can come back and optimize this to an optimistic-with-fallback approach later. ;; we can come back and optimize this to an optimistic-with-fallback approach later.
detected-types (column-types-from-rows settings old-types rows) detected-types (upload-types/column-types-from-rows settings old-types rows)
new-types (map #(if (matching-or-promotable? %1 %2) %2 %1) old-types detected-types) new-types (map upload-types/new-type old-types detected-types)
;; avoid any schema modification unless all the promotions required by the file are supported, ;; avoid any schema modification unless all the promotions required by the file are supported,
;; choosing to not promote means that we will defer failure until we hit the first value that cannot ;; choosing to not promote means that we will defer failure until we hit the first value that cannot
;; be parsed as its existing type - there is scope to improve these error messages in the future. ;; be parsed as its existing type - there is scope to improve these error messages in the future.
...@@ -818,7 +540,7 @@ ...@@ -818,7 +540,7 @@
(when create-auto-pk? (when create-auto-pk?
(add-columns! driver database table (add-columns! driver database table
{auto-pk-column-keyword ::auto-incrementing-int-pk} {auto-pk-column-keyword ::upload-types/auto-incrementing-int-pk}
:primary-key [auto-pk-column-keyword])) :primary-key [auto-pk-column-keyword]))
(scan-and-sync-table! database table) (scan-and-sync-table! database table)
......
...@@ -183,45 +183,45 @@ ...@@ -183,45 +183,45 @@
(fn [upload-type _] (fn [upload-type _]
upload-type)) upload-type))
(defmethod upload-type->parser :metabase.upload/varchar-255 (defmethod upload-type->parser :metabase.upload.types/varchar-255
[_ _] [_ _]
identity) identity)
(defmethod upload-type->parser :metabase.upload/text (defmethod upload-type->parser :metabase.upload.types/text
[_ _] [_ _]
identity) identity)
(defmethod upload-type->parser :metabase.upload/int (defmethod upload-type->parser :metabase.upload.types/int
[_ {:keys [number-separators]}] [_ {:keys [number-separators]}]
(partial parse-as-biginteger number-separators)) (partial parse-as-biginteger number-separators))
(defmethod upload-type->parser :metabase.upload/float (defmethod upload-type->parser :metabase.upload.types/float
[_ {:keys [number-separators]}] [_ {:keys [number-separators]}]
(partial parse-number number-separators)) (partial parse-number number-separators))
(defmethod upload-type->parser :metabase.upload/auto-incrementing-int-pk (defmethod upload-type->parser :metabase.upload.types/auto-incrementing-int-pk
[_ {:keys [number-separators]}] [_ {:keys [number-separators]}]
(partial parse-as-biginteger number-separators)) (partial parse-as-biginteger number-separators))
(defmethod upload-type->parser :metabase.upload/boolean (defmethod upload-type->parser :metabase.upload.types/boolean
[_ _] [_ _]
(comp (comp
parse-bool parse-bool
str/trim)) str/trim))
(defmethod upload-type->parser :metabase.upload/date (defmethod upload-type->parser :metabase.upload.types/date
[_ _] [_ _]
(comp (comp
parse-local-date parse-local-date
str/trim)) str/trim))
(defmethod upload-type->parser :metabase.upload/datetime (defmethod upload-type->parser :metabase.upload.types/datetime
[_ _] [_ _]
(comp (comp
parse-as-datetime parse-as-datetime
str/trim)) str/trim))
(defmethod upload-type->parser :metabase.upload/offset-datetime (defmethod upload-type->parser :metabase.upload.types/offset-datetime
[_ _] [_ _]
(comp (comp
parse-offset-datetime parse-offset-datetime
......
(ns metabase.upload.types
(:refer-clojure :exclude [make-hierarchy])
(:require
[clojure.string :as str]
[metabase.upload.parsing :as upload-parsing]
[metabase.util :as u]
[metabase.util.malli :as mu]
[metabase.util.ordered-hierarchy :as ordered-hierarchy :refer [make-hierarchy]]))
;; Upload value-types form a directed acyclic graph where each type can be relaxed into any of its ancestors.
;; We parse each value in the CSV file to the most-specific possible type for each column.
;; The most-specific possible type for a column is the closest common ancestor of the types for each value in the
;; column, found by walking through the graph in topological order, following edges from left to right.
;; Note that this type is not guaranteed to be one of the least common ancestors!
;;
;; See [[metabase.util.ordered-hierarchy/first-common-ancestor]] for more details.
;;
;; <pre><code>
;;
;; text
;; |
;; |
;; varchar-255──────
;; / / \ \
;; / / \ \
;; boolean float datetime offset-datetime
;; | │ │
;; | │ │
;; │ *float-or-int* │
;; │ │ │
;; │ │ │
;; | int date
;; | / \
;; | / \
;; *boolean-int* auto-incrementing-int-pk
;;
;; </code></pre>
;;
;; We have a number of special "abstract" nodes in this graph:
;;
;; - `*boolean-int*` is an ambiguous node, that could either be parsed as a boolean or as an integer.
;; - `*float-or-int*` is any integer, whether it has an explicit decimal point or not.
;;
;; While a `*boolean-int*` is a genuinely ambiguous value, `*float-or-int*` exist to power our desired value-type
;; coercion and column-type promotion behaviour.
;;
;; - If we encounter a `*float-or-int*` inside an `int` column, then we can safely coerce it down to an integer.
;; - If we encounter a `float` (i.e. a non-zero fraction component), then we need to promote the column to a `float.`
;;
;; Columns can not have an abstract type, which has no meaning outside of inference and reconciliation.
;; If we are left with an abstract type after having processed all the values, we first check whether we can coerce
;; the type to the existing column type, and otherwise traverse further up the graph until we reach a concrete type.
;;
;; For ease of reference and explicitness these corresponding values are given in the `abstract->concrete` map.
;; One can figure out these mappings by simply looking up through the ancestors. For now, we require that it is always
;; a direct ancestor, and lay out or graph so that it is the left-most one.
(def h
"This hierarchy defines a relationship between value types and their specializations.
We use an [[metabase.util.ordered-hierarchy]] for its topological sorting, which simplify writing efficient and
consistent implementations for of our type inference, parsing, and relaxation."
(make-hierarchy
[::text
[::varchar-255
[::boolean ::*boolean-int*]
[::float
;; A number value with a decimal separator, but a zero fractional component.
[::*float-or-int*
[::int
;; A value that could be legally parsed as either a boolean OR an integer
::*boolean-int*
::auto-incrementing-int-pk]]]
[::datetime ::date]
::offset-datetime]]))
(def ^:private abstract->concrete
"Not all value types correspond to column types. We refer to these as \"abstract\" types, and give them *ear-muffs*.
This maps implicitly defines the abstract types, by mapping them each to a default concretion."
{::*boolean-int* ::boolean
::*float-or-int* ::float})
(def ^:private allowed-promotions
"A mapping of which types a column can be implicitly relaxed to, based on the content of appended values.
If we require a relaxation which is not allow-listed here, we will reject the corresponding file."
{::int #{::float}})
(def ^:private column-type->coercible-value-types
"A mapping of which value types should be coerced to the given existing type, rather than triggering promotion."
{::int #{::*float-or-int*}})
(defn- coerce?
"Can values of the given type be coerced to the given existing column type, in a lossless fashion?"
[column-type value-type]
(contains? (column-type->coercible-value-types column-type) value-type))
(def value-types
"All type tags which values can be inferred as. An ordered set from most to least specialized."
(ordered-hierarchy/sorted-tags h))
(def column-types
"All type tags that correspond to concrete column types."
(into #{} (remove abstract->concrete) value-types))
(defn- column-type?
[value-type]
(contains? column-types value-type))
(defn concretize
"Determine the desired column-type given the existing column-type (nil if it's new) and the value-type of the data.
If there's a valid coercion to the existing type, we will preserve it, but otherwise we will relax abstract types
further to a concrete type."
[existing-type value-type]
(cond
;; If the type is concrete, there is nothing to do.
(column-type? value-type) value-type
;; If we know nothing about the value type, treat it as an arbitrary string.
(nil? value-type) ::text
;; If configured, coerce the value to the existing type
(coerce? existing-type value-type) existing-type
;; Otherwise, project it to its canonical concretion.
:else (abstract->concrete value-type)))
;;;;;;;;;;;;;;;;;;;;;;;;;;
;; [[value->type]] helpers
(defn- with-parens
"Returns a regex that matches the argument, with or without surrounding parentheses."
[number-regex]
(re-pattern (str "(" number-regex ")|(\\(" number-regex "\\))")))
(defn- with-currency
"Returns a regex that matches a positive or negative number, including currency symbols"
[number-regex]
;; currency signs can be all over: $2, -$2, $-2, 2€
(re-pattern (str upload-parsing/currency-regex "?\\s*-?"
upload-parsing/currency-regex "?"
number-regex
"\\s*" upload-parsing/currency-regex "?")))
(defn- int-regex
"Matches numbers which do not have a decimal separator."
[number-separators]
(with-parens
(with-currency
(case number-separators
("." ".,") #"\d[\d,]*"
",." #"\d[\d.]*"
", " #"\d[\d \u00A0]*"
".’" #"\d[\d’]*"))))
(defn- float-or-int-regex
"Matches integral numbers, even if they have a decimal separator - e.g. 2 or 2.0"
[number-separators]
(with-parens
(with-currency
(case number-separators
("." ".,") #"\d[\d,]*(\.0+)?"
",." #"\d[\d.]*(\,[0]+)?"
", " #"\d[\d \u00A0]*(\,[0.]+)?"
".’" #"\d[\d’]*(\.[0.]+)?"))))
(defn- float-regex
"Matches numbers, regardless of whether they have a decimal separator - e.g. 2, 2.0, or 2.2"
[number-separators]
(with-parens
(with-currency
(case number-separators
("." ".,") #"\d[\d,]*(\.\d+)?"
",." #"\d[\d.]*(\,[\d]+)?"
", " #"\d[\d \u00A0]*(\,[\d.]+)?"
".’" #"\d[\d’]*(\.[\d.]+)?"))))
(defmacro does-not-throw?
"Returns true if the given body does not throw an exception."
[body]
`(try
~body
true
(catch Throwable _e#
false)))
(defn- date-string? [s]
(does-not-throw? (upload-parsing/parse-local-date s)))
(defn- datetime-string? [s]
(does-not-throw? (upload-parsing/parse-local-datetime s)))
(defn- offset-datetime-string? [s]
(does-not-throw? (upload-parsing/parse-offset-datetime s)))
(defn- boolean-string? [s]
(boolean (re-matches #"(?i)true|t|yes|y|1|false|f|no|n|0" s)))
(defn- boolean-int-string? [s]
(contains? #{"0" "1"} s))
(defn- varchar-255? [s]
(<= (count s) 255))
(defn- regex-matcher [regex]
(fn [s]
(boolean (re-matches regex s))))
;; end [[value->type]] helpers
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(def ^:private non-inferable-types
#{::auto-incrementing-int-pk})
(def ^:private type->check-schema
"Every inferable value-type needs to have a detection function registered."
(into [:map] (map #(vector % [:=> [:cat :string] :boolean])
(remove non-inferable-types value-types))))
(mu/defn ^:private settings->type->check :- type->check-schema
[{:keys [number-separators] :as _settings}]
(let [int-string? (regex-matcher (int-regex number-separators))
float-or-int? (regex-matcher (float-or-int-regex number-separators))
float-string? (regex-matcher (float-regex number-separators))]
{::*boolean-int* boolean-int-string?
::boolean boolean-string?
::offset-datetime offset-datetime-string?
::date date-string?
::datetime datetime-string?
::int int-string?
::*float-or-int* float-or-int?
::float float-string?
::varchar-255 varchar-255?
::text (constantly true)}))
(defn- value->type
"Determine the most specific type that is compatible with the given value.
Numbers are assumed to use separators corresponding to the locale defined in the application settings"
[type->check value]
(when-not (str/blank? value)
(let [trimmed (str/trim value)]
(->> (remove non-inferable-types value-types)
(filter #((type->check %) trimmed))
first))))
(defn- relax-type
"Given an existing column type, and a new value, relax the type until it includes the value."
[type->check current-type value]
(cond (nil? value) current-type
(nil? current-type) (value->type type->check value)
:else (let [trimmed (str/trim value)]
(if (str/blank? trimmed)
current-type
(->> (cons current-type (ancestors h current-type))
(filter #((type->check %) trimmed))
first)))))
(defn type-relaxer
"Given a map of {value-type -> predicate}, return a reducing fn which updates our inferred schema using the next row."
[settings]
(let [relax (partial relax-type (settings->type->check settings))]
(fn [value-types row]
;; It's important to realize this lazy sequence, because otherwise we can build a huge stack and overflow.
(vec (u/map-all relax value-types row)))))
(mu/defn column-types-from-rows :- [:sequential (into [:enum] column-types)]
"Given the types of the existing columns (if there are any), and rows to be added, infer the best supporting types."
[settings existing-types rows]
(->> (reduce (type-relaxer settings) existing-types rows)
(u/map-all concretize existing-types)))
(defn base-type->upload-type
"Returns the most specific upload type for the given base type."
[base-type]
(when base-type
(condp #(isa? %2 %1) base-type
:type/Float ::float
:type/BigInteger ::int
:type/Integer ::int
:type/Boolean ::boolean
:type/DateTimeWithTZ ::offset-datetime
:type/DateTime ::datetime
:type/Date ::date
:type/Text ::text)))
(defn- promotable?
"Are we allowed to promote a column's schema from `current-type` to `inferred-type`?"
[current-type inferred-type]
(when-let [allowed? (allowed-promotions current-type)]
(allowed? inferred-type)))
(defn new-type
"Given the `current-type` of a column, and an `inferred-type` for new values to be added, return its new type.
This assumes we have already coerced the new values down to the existing type, if possible."
[current-type inferred-type]
(cond
;; No restriction on new columns
(nil? current-type) inferred-type
;; No changes required if inferred type matches
(= current-type inferred-type) current-type
:else
;; Keep the existing type unless a promotion is allowed.
(if (promotable? current-type inferred-type)
inferred-type
current-type)))
(ns ^:mb/once metabase.upload.types-test
(:require
[clojure.test :refer [deftest is testing]]
[clojure.walk :as walk]
[java-time.api :as t]
[metabase.upload.parsing :as upload-parsing]
[metabase.upload.types :as upload-types]
[metabase.util.ordered-hierarchy :as ordered-hierarchy]))
(def ^:private bool-type ::upload-types/boolean)
(def ^:private int-type ::upload-types/int)
(def ^:private bool-int-type ::upload-types/*boolean-int*)
(def ^:private float-type ::upload-types/float)
(def ^:private float-or-int-type ::upload-types/*float-or-int*)
(def ^:private vchar-type ::upload-types/varchar-255)
(def ^:private date-type ::upload-types/date)
(def ^:private datetime-type ::upload-types/datetime)
(def ^:private offset-dt-type ::upload-types/offset-datetime)
(def ^:private text-type ::upload-types/text)
(deftest ^:parallel type-detection-and-parse-test
(doseq [[string-value expected-value expected-type separators]
;; Number-related
[["0.0" 0 float-or-int-type "."]
["0.0" 0 float-or-int-type ".,"]
["0,0" 0 float-or-int-type ",."]
["0,0" 0 float-or-int-type ", "]
["0.0" 0 float-or-int-type ".’"]
["-0.0" -0.0 float-or-int-type "."]
["-0.0" -0.0 float-or-int-type ".,"]
["-0,0" -0.0 float-or-int-type ",."]
["-0,0" -0.0 float-or-int-type ", "]
["-0.0" -0.0 float-or-int-type ".’"]
["(0.0)" -0.0 float-or-int-type "."]
["(0.0)" -0.0 float-or-int-type ".,"]
["(0,0)" -0.0 float-or-int-type ",."]
["(0,0)" -0.0 float-or-int-type ", "]
["(0.0)" -0.0 float-or-int-type ".’"]
["-4300.00€" -4300 float-or-int-type ".,"]
["£1,000.00" 1000 float-or-int-type]
["£1,000.00" 1000 float-or-int-type "."]
["£1,000.00" 1000 float-or-int-type ".,"]
["£1.000,00" 1000 float-or-int-type ",."]
["£1 000,00" 1000 float-or-int-type ", "]
["£1’000.00" 1000 float-or-int-type ".’"]
["$2" 2 int-type]
["$ 3" 3 int-type]
["-43€" -43 int-type]
["(86)" -86 int-type]
["($86)" -86 int-type]
["£1000" 1000 int-type]
["£1000" 1000 int-type "."]
["£1000" 1000 int-type ".,"]
["£1000" 1000 int-type ",."]
["£1000" 1000 int-type ", "]
["£1000" 1000 int-type ".’"]
["-¥9" -9 int-type]
["₹ -13" -13 int-type]
["₪13" 13 int-type]
["₩-13" -13 int-type]
["₿42" 42 int-type]
["-99¢" -99 int-type]
["2" 2 int-type]
["-86" -86 int-type]
["9,986,000" 9986000 int-type]
["9,986,000" 9986000 int-type "."]
["9,986,000" 9986000 int-type ".,"]
["9.986.000" 9986000 int-type ",."]
["9’986’000" 9986000 int-type ".’"]
["$0" 0 int-type]
["-1" -1 int-type]
["0" false bool-int-type]
["1" true bool-int-type]
["9.986.000" "9.986.000" vchar-type ".,"]
["3.14" 3.14 float-type]
["3.14" 3.14 float-type "."]
["3.14" 3.14 float-type ".,"]
["3,14" 3.14 float-type ",."]
["3,14" 3.14 float-type ", "]
["(3.14)" -3.14 float-type]
["3.14" 3.14 float-type ".’"]
[".14" ".14" vchar-type ".,"] ;; TODO: this should be a float type
["0.14" 0.14 float-type ".,"]
["-9986.567" -9986.567 float-type ".,"]
["$2.0" 2 float-or-int-type ".,"]
["$ 3.50" 3.50 float-type ".,"]
["-4300.23€" -4300.23 float-type ".,"]
["£1,000.23" 1000.23 float-type]
["£1,000.23" 1000.23 float-type "."]
["£1,000.23" 1000.23 float-type ".,"]
["£1.000,23" 1000.23 float-type ",."]
["£1 000,23" 1000.23 float-type ", "]
["£1’000.23" 1000.23 float-type ".’"]
["-¥9.99" -9.99 float-type ".,"]
["₹ -13.23" -13.23 float-type ".,"]
["₪13.01" 13.01 float-type ".,"]
["₩13.33" 13.33 float-type ".,"]
["₿42.243646" 42.243646 float-type ".,"]
["-99.99¢" -99.99 float-type ".,"]
["." "." vchar-type]
;; String-related
[(apply str (repeat 255 "x")) (apply str (repeat 255 "x")) vchar-type]
[(apply str (repeat 256 "x")) (apply str (repeat 256 "x")) text-type]
["86 is my favorite number" "86 is my favorite number" vchar-type]
["My favorite number is 86" "My favorite number is 86" vchar-type]
;; Date-related
[" 2022-01-01 " #t "2022-01-01" date-type]
[" 2022-02-30 " " 2022-02-30 " vchar-type]
[" -2022-01-01 " #t "-2022-01-01" date-type]
[" Jan 1 2018" #t "2018-01-01" date-type]
[" Jan 02 2018" #t "2018-01-02" date-type]
[" Jan 30 -2018" #t "-2018-01-30" date-type]
[" Jan 1, 2018" #t "2018-01-01" date-type]
[" Jan 02, 2018" #t "2018-01-02" date-type]
[" Feb 30, 2018" " Feb 30, 2018" vchar-type]
[" 1 Jan 2018" #t "2018-01-01" date-type]
[" 02 Jan 2018" #t "2018-01-02" date-type]
[" 1 Jan, 2018" #t "2018-01-01" date-type]
[" 02 Jan, 2018" #t "2018-01-02" date-type]
[" January 1 2018" #t "2018-01-01" date-type]
[" January 02 2018" #t "2018-01-02" date-type]
[" January 1, 2018" #t "2018-01-01" date-type]
[" January 02, 2018" #t "2018-01-02" date-type]
[" 1 January 2018" #t "2018-01-01" date-type]
[" 02 January 2018" #t "2018-01-02" date-type]
[" 1 January, 2018" #t "2018-01-01" date-type]
[" 02 January, 2018" #t "2018-01-02" date-type]
[" Saturday, January 1 2000" #t "2000-01-01" date-type]
[" Sunday, January 02 2000" #t "2000-01-02" date-type]
[" Saturday, January 1, 2000" #t "2000-01-01" date-type]
[" Sunday, January 02, 2000" #t "2000-01-02" date-type]
[" 2022-01-01T01:00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01t01:00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01 01:00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01T01:00:00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01t01:00:00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01 01:00:00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01T01:00:00.00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01t01:00:00.00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01 01:00:00.00 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01T01:00:00.000000000 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01t01:00:00.000000000 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01 01:00:00.000000000 " #t "2022-01-01T01:00" datetime-type]
[" 2022-01-01T01:00:00.00-07 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01t01:00:00.00-07 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01 01:00:00.00-07 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01T01:00:00.00-07:00 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01t01:00:00.00-07:00 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01 01:00:00.00-07:00 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01T01:00:00.00-07:00 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01t01:00:00.00-07:00 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01 01:00:00.00-07:00 " #t "2022-01-01T01:00-07:00" offset-dt-type]
[" 2022-01-01T01:00:00.00Z " (t/offset-date-time "2022-01-01T01:00+00:00") offset-dt-type]
[" 2022-01-01t01:00:00.00Z " (t/offset-date-time "2022-01-01T01:00+00:00") offset-dt-type]
[" 2022-01-01 01:00:00.00Z " (t/offset-date-time "2022-01-01T01:00+00:00") offset-dt-type]]]
(let [settings {:number-separators (or separators ".,")}
type->check (#'upload-types/settings->type->check settings)
value-type (#'upload-types/value->type type->check string-value)
;; get the type of the column, if we created it based on only that value
col-type (first (upload-types/column-types-from-rows settings nil [[string-value]]))
parser (upload-parsing/upload-type->parser col-type settings)]
(testing (format "\"%s\" is a %s" string-value value-type)
(is (= expected-type
value-type)))
(testing (format "\"%s\" is parsed into %s" string-value expected-value)
(is (= expected-value
(parser string-value)))))))
(deftest ^:parallel type-coalescing-test
(doseq [[type-a type-b expected]
[[bool-type bool-type bool-type]
[bool-type int-type vchar-type]
[bool-type bool-int-type bool-type]
[bool-type date-type vchar-type]
[bool-type datetime-type vchar-type]
[bool-type vchar-type vchar-type]
[bool-type text-type text-type]
[int-type bool-type vchar-type]
[int-type float-type float-type]
[int-type date-type vchar-type]
[int-type datetime-type vchar-type]
[int-type vchar-type vchar-type]
[int-type text-type text-type]
[int-type bool-int-type int-type]
[bool-int-type bool-int-type bool-int-type]
[float-type vchar-type vchar-type]
[float-type text-type text-type]
[float-type date-type vchar-type]
[float-type datetime-type vchar-type]
[float-type text-type text-type]
[float-type date-type vchar-type]
[float-type datetime-type vchar-type]
[date-type datetime-type datetime-type]
[date-type vchar-type vchar-type]
[date-type text-type text-type]
[datetime-type vchar-type vchar-type]
[offset-dt-type vchar-type vchar-type]
[datetime-type text-type text-type]
[offset-dt-type text-type text-type]
[vchar-type text-type text-type]]]
(is (= expected (ordered-hierarchy/first-common-ancestor upload-types/h type-a type-b))
(format "%s + %s = %s" (name type-a) (name type-b) (name expected)))))
(deftest ^:parallel coercion-soundness-test
(testing "Every coercion maps to a stricter type that is a direct descendant"
(is (empty?
;; Build a set of all type-pairs that violate this constraint
(into (sorted-set)
(comp (mapcat (fn [[column-type value-types]]
(for [value-type value-types]
[column-type value-type])))
(remove (fn [[column-type value-type]]
;; Strictly speaking we only require that there is a route which only traverses
;; through abstract nodes, but it's much simpler to enforce this stronger condition:
;; that `column-type` is a child of `value-type`
(contains? (ordered-hierarchy/children upload-types/h value-type) column-type))))
@#'upload-types/column-type->coercible-value-types)))))
(deftest ^:parallel initial-column-type-test
(let [column-type (partial upload-types/concretize nil)]
(testing "Unknown value types are treated as text"
(is (= ::upload-types/text (column-type nil))))
(testing "Non-abstract value types resolve to themselves"
(let [ts upload-types/column-types]
(is (= (zipmap ts ts)
(zipmap (map column-type ts) ts)))))
(testing "Abstract values are resolved using an explicit map, which is consistent with the hierarchy"
(doseq [[value-type expected-column-type] @#'upload-types/abstract->concrete]
(is (= expected-column-type (column-type value-type)))
;; Strictly speaking we only require that there is a route from each abstract node to its column type which only
;; traverses through abstract nodes, but it's much simpler to enforce this stronger condition.
(is (contains? (parents upload-types/h value-type) expected-column-type))))))
(deftest ^:parallel append-column-type-test
(doseq [existing-type upload-types/value-types
value-type upload-types/value-types]
(case [existing-type value-type]
[::upload-types/int ::upload-types/*float-or-int*]
(testing "We coerce floats with fractional part to plan integers when appending into an existing integer column"
(is (= ::upload-types/int (upload-types/concretize existing-type value-type))))
;; This is unsatisfying, would be good if this interface also covered promoting columns and rejecting values.
(testing (format "We append %s values to %s columns as if we were inserting them into new columns" existing-type value-type)
(is (= (upload-types/concretize nil value-type)
(upload-types/concretize existing-type value-type)))))))
(defn- re-namespace
"Replace the namespace in any namespaced keyword with this test namespace, return all other values unchanged."
[x]
(if (and (keyword? x) (namespace x))
(keyword (name (ns-name *ns*)) (name x))
x))
;; Translate the hierarchy to talk about this namespace, so that we can omit an explicit namespace for brevity.
(def ^:private h (walk/postwalk re-namespace upload-types/h))
(deftest ^:parallel parents-test
(testing "Parents are listed according to the order that this tag was derived from each of them"
(is (nil? (parents h ::text)))
(is (= [::text] (vec (parents h ::varchar-255))))
(is (= [::*float-or-int*] (vec (parents h ::int))))
(is (= [::boolean ::int] (vec (parents h ::*boolean-int*))))))
(deftest ^:parallel children-test
(testing "Children are listed in reverse order to when they were each derived from this tag"
(is (nil? (ordered-hierarchy/children h ::*boolean-int*)))
(is (= [::varchar-255] (vec (ordered-hierarchy/children h ::text))))
(is (= [::*float-or-int*] (vec (ordered-hierarchy/children h ::float))))
(is (= [::auto-incrementing-int-pk ::*boolean-int*] (vec (ordered-hierarchy/children h ::int))))))
(deftest ^:parallel ancestors-test
(testing "Linear ancestors are listed in order"
(is (nil? (ancestors h ::text)))
(is (= [::text] (vec (ancestors h ::varchar-255))))
(is (= [::varchar-255 ::text] (vec (ancestors h ::boolean))))
(is (= [::*float-or-int* ::float ::varchar-255 ::text] (vec (ancestors h ::int)))))
(testing "Non-linear ancestors are listed in topological order, following edges in the order they were defined."
(is (= [::boolean
::int
::*float-or-int*
::float
::varchar-255
::text]
(vec (ancestors h ::*boolean-int*))))))
(deftest ^:parallel descendants-test
(testing "Linear descendants are listed in order"
(is (nil? (descendants h ::*boolean-int*)))
(is (nil? (descendants h ::date)))
(is (= [::date] (vec (descendants h ::datetime))))
(is (= [::*boolean-int*] (vec (descendants h ::boolean)))))
(testing "Non-linear descendants are listed in reverse topological order, following edges in reserve order."
(is (= [::*float-or-int*
::int
::auto-incrementing-int-pk
::*boolean-int*]
(vec (descendants h ::float))))
(is (= [::varchar-255
::offset-datetime
::datetime
::date
::float
::*float-or-int*
::int
::auto-incrementing-int-pk
::boolean
::*boolean-int*]
(vec (descendants h ::text))))))
(deftest ^:parallel sorted-tags-test
(testing "Tags are returned in a topological ordering that also preserves insertion order of the edges."
(is (= [::*boolean-int*
::boolean
::auto-incrementing-int-pk
::int
::*float-or-int*
::float
::date
::datetime
::offset-datetime
::varchar-255
::text]
(vec (ordered-hierarchy/sorted-tags h))))))
(deftest ^:parallel first-common-ancestor-test
(testing "The first-common-ancestor is the first tag in the lineage of tag-a that is also in the lineage of tag-b"
(is (= ::*boolean-int* (ordered-hierarchy/first-common-ancestor h ::*boolean-int* nil)))
(is (= ::*boolean-int* (ordered-hierarchy/first-common-ancestor h ::*boolean-int* ::*boolean-int*)))
(is (= ::boolean (ordered-hierarchy/first-common-ancestor h ::*boolean-int* ::boolean)))
(is (= ::varchar-255 (ordered-hierarchy/first-common-ancestor h ::boolean ::int)))))
This diff is collapsed.
(ns metabase.util.ordered-hierarchy-test (ns metabase.util.ordered-hierarchy-test
(:require (:require
[clojure.test :refer [deftest is testing]] [clojure.test :refer [deftest is testing]]
[clojure.walk :as walk]
[metabase.upload :as upload]
[metabase.util.ordered-hierarchy :as ordered-hierarchy])) [metabase.util.ordered-hierarchy :as ordered-hierarchy]))
;;; It would be nice to have property tests, to expose any subtle edge cases. (def ^:private polygons
;;; For now, we use a translation of some real world usage in the app. (ordered-hierarchy/make-hierarchy
(def ^:private h [:quadrilateral
(walk/postwalk [:trapezoid :isosceles-trapezoid :right-trapezoid]
(fn [x] [:kite [:rhombus :square]]
(if (and (keyword? x) (namespace x)) [:parallelogram
(keyword (name (ns-name *ns*)) (name x)) :rhombus
x)) [:rectangle :square]]]
@#'upload/h)) [:triangle
:scalene-triangle
(deftest ^:parallel parents-test [:isosceles-triangle :equilateral-triangle]
(testing "Parents are listed according to the order that this tag was derived from each of them" [:acute-triangle :equilateral-triangle]
(is (nil? (parents h ::text))) :right-angled-triangle
(is (= [::text] (vec (parents h ::varchar-255)))) :obtuse-triangle]))
(is (= [::*float-or-int*] (vec (parents h ::int))))
(is (= [::boolean ::int] (vec (parents h ::*boolean-int*)))))) (deftest make-hierarchy-test
(testing "Hiccup structures have the expected topological order"
(deftest ^:parallel children-test (is (= [:isosceles-trapezoid
(testing "Children are listed in reverse order to when they were each derived from this tag" :right-trapezoid
(is (nil? (ordered-hierarchy/children h ::*boolean-int*))) :trapezoid
(is (= [::varchar-255] (vec (ordered-hierarchy/children h ::text)))) :square
(is (= [::*float-or-int*] (vec (ordered-hierarchy/children h ::float)))) :rhombus
(is (= [::auto-incrementing-int-pk ::*boolean-int*] (vec (ordered-hierarchy/children h ::int)))))) :kite
:rectangle
(deftest ^:parallel ancestors-test :parallelogram
(testing "Linear ancestors are listed in order" :quadrilateral
(is (nil? (ancestors h ::text))) ;; it's unfortunate that we would exhaustively test all the quadrilateral types, before checking
(is (= [::text] (vec (ancestors h ::varchar-255)))) ;; if it's a triangle (if "hypothetically" we were using the topological order to test a value
(is (= [::varchar-255 ::text] (vec (ancestors h ::boolean)))) ;; ... this is a case where a root-to-leaf traversal would make more sense.
(is (= [::*float-or-int* ::float ::varchar-255 ::text] (vec (ancestors h ::int))))) :scalene-triangle
:equilateral-triangle
(testing "Non-linear ancestors are listed in topological order, following edges in the order they were defined." :isosceles-triangle
(is (= [::boolean :acute-triangle
::int :right-angled-triangle
::*float-or-int* :obtuse-triangle
::float :triangle]
::varchar-255 (vec (ordered-hierarchy/sorted-tags polygons)))))
::text]
(vec (ancestors h ::*boolean-int*)))))) (testing "Hiccup structures are translated into the expected graph structure"
(is (= {:trapezoid [:quadrilateral]
(deftest ^:parallel descendants-test :isosceles-trapezoid [:trapezoid]
(testing "Linear descendants are listed in order" :right-trapezoid [:trapezoid]
(is (nil? (descendants h ::*boolean-int*))) :kite [:quadrilateral]
(is (nil? (descendants h ::date))) :rhombus [:kite :parallelogram]
(is (= [::date] (vec (descendants h ::datetime)))) :square [:rhombus :rectangle]
(is (= [::*boolean-int*] (vec (descendants h ::boolean))))) :parallelogram [:quadrilateral]
:rectangle [:parallelogram]
(testing "Non-linear descendants are listed in reverse topological order, following edges in reserve order." :scalene-triangle [:triangle]
(is (= [::*float-or-int* :isosceles-triangle [:triangle]
::int :equilateral-triangle [:isosceles-triangle :acute-triangle]
::auto-incrementing-int-pk :acute-triangle [:triangle]
::*boolean-int*] :right-angled-triangle [:triangle]
(vec (descendants h ::float)))) :obtuse-triangle [:triangle]}
(is (= [::varchar-255 (update-vals (:parents polygons) vec)))))
::offset-datetime
::datetime
::date
::float
::*float-or-int*
::int
::auto-incrementing-int-pk
::boolean
::*boolean-int*]
(vec (descendants h ::text))))))
(deftest ^:parallel sorted-tags-test
(testing "Tags are returned in a topological ordering that also preserves insertion order of the edges."
(is (= [::*boolean-int*
::boolean
::auto-incrementing-int-pk
::int
::*float-or-int*
::float
::date
::datetime
::offset-datetime
::varchar-255
::text]
(vec (ordered-hierarchy/sorted-tags h))))))
(deftest ^:parallel first-common-ancestor-test
(testing "The first-common-ancestor is the first tag in the lineage of tag-a that is also in the lineage of tag-b"
(is (= ::*boolean-int* (ordered-hierarchy/first-common-ancestor h ::*boolean-int* nil)))
(is (= ::*boolean-int* (ordered-hierarchy/first-common-ancestor h ::*boolean-int* ::*boolean-int*)))
(is (= ::boolean (ordered-hierarchy/first-common-ancestor h ::*boolean-int* ::boolean)))
(is (= ::varchar-255 (ordered-hierarchy/first-common-ancestor h ::boolean ::int)))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment