From 862daf338a63d23a9bba8eab3707e33bc4788881 Mon Sep 17 00:00:00 2001
From: Chris Truter <crisptrutski@users.noreply.github.com>
Date: Thu, 4 Apr 2024 12:30:28 +0200
Subject: [PATCH] Break out upload.types namespace (#40741)

---
 src/metabase/upload.clj                       | 308 +---------------
 src/metabase/upload/parsing.clj               |  18 +-
 src/metabase/upload/types.cljc                | 301 +++++++++++++++
 test/metabase/upload/types_test.clj           | 332 +++++++++++++++++
 test/metabase/upload_test.clj                 | 347 +++---------------
 test/metabase/util/ordered_hierarchy_test.clj | 142 +++----
 6 files changed, 770 insertions(+), 678 deletions(-)
 create mode 100644 src/metabase/upload/types.cljc
 create mode 100644 test/metabase/upload/types_test.clj

diff --git a/src/metabase/upload.clj b/src/metabase/upload.clj
index 36fc8b9e877..f1e96a9912a 100644
--- a/src/metabase/upload.clj
+++ b/src/metabase/upload.clj
@@ -1,5 +1,4 @@
 (ns metabase.upload
-  (:refer-clojure :exclude [derive make-hierarchy parents])
   (:require
    [clj-bom.core :as bom]
    [clojure.data :as data]
@@ -29,265 +28,17 @@
    [metabase.sync.sync-metadata.fields :as sync-fields]
    [metabase.sync.sync-metadata.tables :as sync-tables]
    [metabase.upload.parsing :as upload-parsing]
+   [metabase.upload.types :as upload-types]
    [metabase.util :as u]
    [metabase.util.i18n :refer [tru]]
    [metabase.util.malli :as mu]
    [metabase.util.malli.schema :as ms]
-   [metabase.util.ordered-hierarchy :as ordered-hierarchy :refer [make-hierarchy]]
    [toucan2.core :as t2])
   (:import
    (java.io File)))
 
 (set! *warn-on-reflection* true)
 
-;;;; +------------------+
-;;;; | Schema detection |
-;;;; +------------------+
-
-;; Upload value-types form a DAG (directed acyclic graph) where each type can be relaxed into any of its ancestors.
-;; We parse each value in the CSV file to the most-specific possible type for each column.
-
-;; The most-specific possible type for a column is the closest common ancestor of the types for each value in the
-;; column, found by walking through the graph in topological order, following edges from left to right.
-;; Note that this type is not guaranteed to be one of the least common ancestors!
-;;
-;; See [[metabase.util.ordered-hierarchy/first-common-ancestor]] for more details.
-;;
-;; <pre><code>
-;;
-;;              text
-;;               |
-;;               |
-;;          varchar-255──────
-;;        /      /  \        \
-;;      /      /      \       \
-;;  boolean  float   datetime  offset-datetime
-;;     |       │        │
-;;     |       │        │
-;;     │ *float-or-int* │
-;;     │       │        │
-;;     │       │        │
-;;     |      int      date
-;;     |   /       \
-;;     |  /         \
-;; *boolean-int*  auto-incrementing-int-pk
-;;
-;; </code></pre>
-;;
-;; We have a number of special "abstract" nodes in this graph:
-;;
-;; - `*boolean-int*` is an ambiguous node, that could either be parsed as a boolean or as an integer.
-;; - `*float-or-int*` is any integer, whether it has an explicit decimal point or not.
-;;
-;; While a `*boolean-int*` is a genuinely ambiguous value, `*float-or-int*` exist to power our desired value-type
-;; coercion and column-type promotion behaviour.
-;;
-;; - If we encounter a `*float-or-int*` inside an `int` column, then we can safely coerce it down to an integer.
-;; - If we encounter a `float` (i.e. a non-zero fraction component), then we need to promote the column to a `float.`
-;;
-;; Columns can not have an abstract type, which has no meaning outside of inference and reconciliation.
-;; If we are left with an abstract type after having processed all the values, we first check whether we can coerce
-;; the type to the existing column type, and otherwise traverse further up the graph until we reach a concrete type.
-;;
-;; For ease of reference and explicitness these corresponding values are given in the `abstract->concrete` map.
-;; One can figure out these mappings by simply looking up through the ancestors. For now, we require that it is always
-;; a direct ancestor, and lay out or graph so that it is the left-most one.
-
-(def ^:private h
-  "This hierarchy defines a relationship between value types and their specializations.
-  We use an [[metabase.util.ordered-hierarchy]] for its topological sorting, which simplify writing efficient and
-  consistent implementations for of our type inference, parsing, and relaxation."
-  (make-hierarchy
-   [::text
-    [::varchar-255
-     [::boolean ::*boolean-int*]
-     [::float
-      ;; A number value with a decimal separator, but a zero fractional component.
-      [::*float-or-int*
-       [::int
-        ;; A value that could be legally parsed as either a boolean OR an integer
-        ::*boolean-int*
-        ::auto-incrementing-int-pk]]]
-     [::datetime ::date]
-     ::offset-datetime]]))
-
-(def ^:private abstract->concrete
-  "Not all value types correspond to column types. We refer to these as \"abstract\" types, and give them *ear-muffs*.
-  This maps implicitly defines the abstract types, by mapping them each to a default concretion."
-  {::*boolean-int*  ::boolean
-   ::*float-or-int* ::float})
-
-(def ^:private allowed-promotions
-  "A mapping of which types a column can be implicitly relaxed to, based on the content of appended values.
-  If we require a relaxation which is not allow-listed here, we will reject the corresponding file."
-  {::int #{::float}})
-
-(def ^:private column-type->coercible-value-types
-  "A mapping of which value types should be coerced to the given existing type, rather than triggering promotion."
-  {::int #{::*float-or-int*}})
-
-(defn- coerce?
-  "Can values of the given type be coerced to the given existing column type, in a lossless fashion?"
-  [column-type value-type]
-  (contains? (column-type->coercible-value-types column-type) value-type))
-
-(def ^:private value-types
-  "All type tags which values can be inferred as. An ordered set from most to least specialized."
-  (ordered-hierarchy/sorted-tags h))
-
-(def ^:private column-types
-  "All type tags that correspond to concrete column types."
-  (into #{} (remove abstract->concrete) value-types))
-
-(defn- column-type?
-  [value-type]
-  (contains? column-types value-type))
-
-(defn ^:private concretize
-  "Determine the desired column-type given the existing column-type (nil if it's new) and the value-type of the data.
-  If there's a valid coercion to the existing type, we will preserve it, but otherwise we will relax abstract types
-  further to a concrete type."
-  [existing-type value-type]
-  (cond
-    ;; If the type is concrete, there is nothing to do.
-    (column-type? value-type) value-type
-    ;; If we know nothing about the value type, treat it as an arbitrary string.
-    (nil? value-type) ::text
-    ;; If configured, coerce the value to the existing type
-    (coerce? existing-type value-type) existing-type
-    ;; Otherwise, project it to its canonical concretion.
-    :else (abstract->concrete value-type)))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; [[value->type]] helpers
-
-(defn- with-parens
-  "Returns a regex that matches the argument, with or without surrounding parentheses."
-  [number-regex]
-  (re-pattern (str "(" number-regex ")|(\\(" number-regex "\\))")))
-
-(defn- with-currency
-  "Returns a regex that matches a positive or negative number, including currency symbols"
-  [number-regex]
-  ;; currency signs can be all over: $2, -$2, $-2, 2€
-  (re-pattern (str upload-parsing/currency-regex "?\\s*-?"
-                   upload-parsing/currency-regex "?"
-                   number-regex
-                   "\\s*" upload-parsing/currency-regex "?")))
-
-(defn- int-regex
-  "Matches numbers which do not have a decimal separator."
-  [number-separators]
-  (with-parens
-    (with-currency
-      (case number-separators
-        ("." ".,") #"\d[\d,]*"
-        ",." #"\d[\d.]*"
-        ", " #"\d[\d \u00A0]*"
-        ".’" #"\d[\d’]*"))))
-
-(defn- float-or-int-regex
-  "Matches integral numbers, even if they have a decimal separator - e.g. 2 or 2.0"
-  [number-separators]
-  (with-parens
-   (with-currency
-    (case number-separators
-      ("." ".,") #"\d[\d,]*(\.0+)?"
-      ",." #"\d[\d.]*(\,[0]+)?"
-      ", " #"\d[\d \u00A0]*(\,[0.]+)?"
-      ".’" #"\d[\d’]*(\.[0.]+)?"))))
-
-(defn- float-regex
-  "Matches numbers, regardless of whether they have a decimal separator - e.g. 2, 2.0, or 2.2"
-  [number-separators]
-  (with-parens
-    (with-currency
-      (case number-separators
-        ("." ".,") #"\d[\d,]*(\.\d+)?"
-        ",." #"\d[\d.]*(\,[\d]+)?"
-        ", " #"\d[\d \u00A0]*(\,[\d.]+)?"
-        ".’" #"\d[\d’]*(\.[\d.]+)?"))))
-
-(defmacro does-not-throw?
-  "Returns true if the given body does not throw an exception."
-  [body]
-  `(try
-     ~body
-     true
-     (catch Throwable e#
-       false)))
-
-(defn- date-string? [s]
-  (does-not-throw? (upload-parsing/parse-local-date s)))
-
-(defn- datetime-string? [s]
-  (does-not-throw? (upload-parsing/parse-local-datetime s)))
-
-(defn- offset-datetime-string? [s]
-  (does-not-throw? (upload-parsing/parse-offset-datetime s)))
-
-(defn- boolean-string? [s]
-  (boolean (re-matches #"(?i)true|t|yes|y|1|false|f|no|n|0" s)))
-
-(defn- boolean-int-string? [s]
-  (contains? #{"0" "1"} s))
-
-(defn- varchar-255? [s]
-  (<= (count s) 255))
-
-(defn- regex-matcher [regex]
-  (fn [s]
-    (boolean (re-matches regex s))))
-
-;; end [[value->type]] helpers
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(def ^:private non-inferable-types
-  #{::auto-incrementing-int-pk})
-
-(def ^:private type->check-schema
-  "Every inferable value-type needs to have a detection function registered."
-  (into [:map] (map #(vector % [:=> [:cat :string] :boolean])
-                    (remove non-inferable-types value-types))))
-
-(mu/defn ^:private settings->type->check :- type->check-schema
-  [{:keys [number-separators] :as _settings}]
-  (let [int-string?   (regex-matcher (int-regex number-separators))
-        float-or-int? (regex-matcher (float-or-int-regex number-separators))
-        float-string? (regex-matcher (float-regex number-separators))]
-    {::*boolean-int*   boolean-int-string?
-     ::boolean         boolean-string?
-     ::offset-datetime offset-datetime-string?
-     ::date            date-string?
-     ::datetime        datetime-string?
-     ::int             int-string?
-     ::*float-or-int*  float-or-int?
-     ::float           float-string?
-     ::varchar-255     varchar-255?
-     ::text            (constantly true)}))
-
-(defn- value->type
-  "Determine the most specific type that is compatible with the given value.
-  Numbers are assumed to use separators corresponding to the locale defined in the application settings"
-  [type->check value]
-  (when-not (str/blank? value)
-    (let [trimmed (str/trim value)]
-      (->> (remove non-inferable-types value-types)
-           (filter #((type->check %) trimmed))
-           first))))
-
-(defn- relax-type
-  "Given an existing column type, and a new value, relax the type until it includes the value."
-  [type->check current-type value]
-  (cond (nil? value) current-type
-        (nil? current-type) (value->type type->check value)
-        :else (let [trimmed (str/trim value)]
-                (if (str/blank? trimmed)
-                  current-type
-                  (->> (cons current-type (ancestors h current-type))
-                       (filter #((type->check %) trimmed))
-                       first)))))
-
 (defn- normalize-column-name
   [raw-name]
   (if (str/blank? raw-name)
@@ -307,20 +58,6 @@
                    (= (normalize-column-name (:name field)) auto-pk-column-name))
                  (t2/select :model/Field :table_id table-id :active true))))
 
-(defn- type-relaxer
-  "Given a map of {value-type -> predicate}, return a reducing fn which updates our inferred schema using the next row."
-  [settings]
-  (let [relax (partial relax-type (settings->type->check settings))]
-    (fn [value-types row]
-      ;; It's important to realize this lazy sequence, because otherwise we can build a huge stack and overflow.
-      (vec (u/map-all relax value-types row)))))
-
-(mu/defn column-types-from-rows :- [:sequential (into [:enum] column-types)]
-  "Given the types of the existing columns (if there are any), and rows to be added, infer the best supporting types."
-  [settings existing-types rows]
-  (->> (reduce (type-relaxer settings) existing-types rows)
-       (u/map-all concretize existing-types)))
-
 (defn- detect-schema
   "Consumes the header and rows from a CSV file.
 
@@ -336,11 +73,10 @@
         unique-header       (map keyword (mbql.u/uniquify-names normalized-header))
         column-count        (count normalized-header)
         initial-types       (repeat column-count nil)
-        col-name+type-pairs (->> (column-types-from-rows settings initial-types rows)
+        col-name+type-pairs (->> (upload-types/column-types-from-rows settings initial-types rows)
                                  (map vector unique-header))]
     {:extant-columns    (ordered-map/ordered-map col-name+type-pairs)
-     :generated-columns (ordered-map/ordered-map auto-pk-column-keyword ::auto-incrementing-int-pk)}))
-
+     :generated-columns (ordered-map/ordered-map auto-pk-column-keyword ::upload-types/auto-incrementing-int-pk)}))
 
 ;;;; +------------------+
 ;;;; |  Parsing values  |
@@ -372,10 +108,16 @@
     (str truncated-name-without-time
          (t/format time-format (strictly-monotonic-now)))))
 
+(mu/defn ^:private database-type
+  [driver
+   column-type :- (into [:enum] upload-types/column-types)]
+  (let [external-type (keyword "metabase.upload" (name column-type))]
+    (driver/upload-type->database-type driver external-type)))
+
 (defn- column-definitions
   "Returns a map of column-name -> column-definition from a map of column-name -> upload-type."
   [driver col->upload-type]
-  (update-vals col->upload-type (partial driver/upload-type->database-type driver)))
+  (update-vals col->upload-type (partial database-type driver)))
 
 (defn current-database
   "The database being used for uploads (as per the `uploads-database-id` setting)."
@@ -685,20 +427,6 @@
 ;;; |  appending to uploaded table
 ;;; +-----------------------------
 
-(defn- base-type->upload-type
-  "Returns the most specific upload type for the given base type."
-  [base-type]
-  (when base-type
-    (condp #(isa? %2 %1) base-type
-      :type/Float                  ::float
-      :type/BigInteger             ::int
-      :type/Integer                ::int
-      :type/Boolean                ::boolean
-      :type/DateTimeWithTZ         ::offset-datetime
-      :type/DateTime               ::datetime
-      :type/Date                   ::date
-      :type/Text                   ::text)))
-
 (defn- not-blank [s]
   (when-not (str/blank? s)
     s))
@@ -734,12 +462,6 @@
     (when-let [error-message (extra-and-missing-error-markdown extra missing)]
       (throw (ex-info error-message {:status-code 422})))))
 
-(defn- matching-or-promotable? [current-type relaxed-type]
-  (or (nil? current-type)
-      (= current-type relaxed-type)
-      (when-let [f (allowed-promotions current-type)]
-        (f relaxed-type))))
-
 (defn- field-changes
   "Given existing and newly inferred types for the given `field-names`, calculate which fields need to be added or updated, along with their new types."
   [field-names existing-types new-types]
@@ -756,7 +478,7 @@
   (m/map-kv
    (fn [field-name col-type]
      [(keyword field-name)
-      (driver/upload-type->database-type driver col-type)])
+      (database-type driver col-type)])
    field->col-type))
 
 (defn- add-columns! [driver database table field->type & args]
@@ -786,12 +508,12 @@
                                   (not (contains? normed-name->field auto-pk-column-name)))
               _                  (check-schema (dissoc normed-name->field auto-pk-column-name) header)
               settings           (upload-parsing/get-settings)
-              old-types          (map (comp base-type->upload-type :base_type normed-name->field) normed-header)
+              old-types          (map (comp upload-types/base-type->upload-type :base_type normed-name->field) normed-header)
               ;; in the happy, and most common, case all the values will match the existing types
               ;; for now we just plan for the worst and perform a fairly expensive operation to detect any type changes
               ;; we can come back and optimize this to an optimistic-with-fallback approach later.
-              detected-types     (column-types-from-rows settings old-types rows)
-              new-types          (map #(if (matching-or-promotable? %1 %2) %2 %1) old-types detected-types)
+              detected-types     (upload-types/column-types-from-rows settings old-types rows)
+              new-types          (map upload-types/new-type old-types detected-types)
               ;; avoid any schema modification unless all the promotions required by the file are supported,
               ;; choosing to not promote means that we will defer failure until we hit the first value that cannot
               ;; be parsed as its existing type - there is scope to improve these error messages in the future.
@@ -818,7 +540,7 @@
 
           (when create-auto-pk?
             (add-columns! driver database table
-                          {auto-pk-column-keyword ::auto-incrementing-int-pk}
+                          {auto-pk-column-keyword ::upload-types/auto-incrementing-int-pk}
                           :primary-key [auto-pk-column-keyword]))
 
           (scan-and-sync-table! database table)
diff --git a/src/metabase/upload/parsing.clj b/src/metabase/upload/parsing.clj
index 1d96cb94b9b..475a9323dbb 100644
--- a/src/metabase/upload/parsing.clj
+++ b/src/metabase/upload/parsing.clj
@@ -183,45 +183,45 @@
   (fn [upload-type _]
     upload-type))
 
-(defmethod upload-type->parser :metabase.upload/varchar-255
+(defmethod upload-type->parser :metabase.upload.types/varchar-255
   [_ _]
   identity)
 
-(defmethod upload-type->parser :metabase.upload/text
+(defmethod upload-type->parser :metabase.upload.types/text
   [_ _]
   identity)
 
-(defmethod upload-type->parser :metabase.upload/int
+(defmethod upload-type->parser :metabase.upload.types/int
   [_ {:keys [number-separators]}]
   (partial parse-as-biginteger number-separators))
 
-(defmethod upload-type->parser :metabase.upload/float
+(defmethod upload-type->parser :metabase.upload.types/float
   [_ {:keys [number-separators]}]
   (partial parse-number number-separators))
 
-(defmethod upload-type->parser :metabase.upload/auto-incrementing-int-pk
+(defmethod upload-type->parser :metabase.upload.types/auto-incrementing-int-pk
   [_ {:keys [number-separators]}]
   (partial parse-as-biginteger number-separators))
 
-(defmethod upload-type->parser :metabase.upload/boolean
+(defmethod upload-type->parser :metabase.upload.types/boolean
   [_ _]
   (comp
    parse-bool
    str/trim))
 
-(defmethod upload-type->parser :metabase.upload/date
+(defmethod upload-type->parser :metabase.upload.types/date
   [_ _]
   (comp
    parse-local-date
    str/trim))
 
-(defmethod upload-type->parser :metabase.upload/datetime
+(defmethod upload-type->parser :metabase.upload.types/datetime
   [_ _]
   (comp
    parse-as-datetime
    str/trim))
 
-(defmethod upload-type->parser :metabase.upload/offset-datetime
+(defmethod upload-type->parser :metabase.upload.types/offset-datetime
   [_ _]
   (comp
    parse-offset-datetime
diff --git a/src/metabase/upload/types.cljc b/src/metabase/upload/types.cljc
new file mode 100644
index 00000000000..94a1f721309
--- /dev/null
+++ b/src/metabase/upload/types.cljc
@@ -0,0 +1,301 @@
+(ns metabase.upload.types
+  (:refer-clojure :exclude [make-hierarchy])
+  (:require
+   [clojure.string :as str]
+   [metabase.upload.parsing :as upload-parsing]
+   [metabase.util :as u]
+   [metabase.util.malli :as mu]
+   [metabase.util.ordered-hierarchy :as ordered-hierarchy :refer [make-hierarchy]]))
+
+;; Upload value-types form a directed acyclic graph where each type can be relaxed into any of its ancestors.
+;; We parse each value in the CSV file to the most-specific possible type for each column.
+
+;; The most-specific possible type for a column is the closest common ancestor of the types for each value in the
+;; column, found by walking through the graph in topological order, following edges from left to right.
+;; Note that this type is not guaranteed to be one of the least common ancestors!
+;;
+;; See [[metabase.util.ordered-hierarchy/first-common-ancestor]] for more details.
+;;
+;; <pre><code>
+;;
+;;              text
+;;               |
+;;               |
+;;          varchar-255──────
+;;        /      /  \        \
+;;      /      /      \       \
+;;  boolean  float   datetime  offset-datetime
+;;     |       │        │
+;;     |       │        │
+;;     │ *float-or-int* │
+;;     │       │        │
+;;     │       │        │
+;;     |      int      date
+;;     |   /       \
+;;     |  /         \
+;; *boolean-int*  auto-incrementing-int-pk
+;;
+;; </code></pre>
+;;
+;; We have a number of special "abstract" nodes in this graph:
+;;
+;; - `*boolean-int*` is an ambiguous node, that could either be parsed as a boolean or as an integer.
+;; - `*float-or-int*` is any integer, whether it has an explicit decimal point or not.
+;;
+;; While a `*boolean-int*` is a genuinely ambiguous value, `*float-or-int*` exist to power our desired value-type
+;; coercion and column-type promotion behaviour.
+;;
+;; - If we encounter a `*float-or-int*` inside an `int` column, then we can safely coerce it down to an integer.
+;; - If we encounter a `float` (i.e. a non-zero fraction component), then we need to promote the column to a `float.`
+;;
+;; Columns can not have an abstract type, which has no meaning outside of inference and reconciliation.
+;; If we are left with an abstract type after having processed all the values, we first check whether we can coerce
+;; the type to the existing column type, and otherwise traverse further up the graph until we reach a concrete type.
+;;
+;; For ease of reference and explicitness these corresponding values are given in the `abstract->concrete` map.
+;; One can figure out these mappings by simply looking up through the ancestors. For now, we require that it is always
+;; a direct ancestor, and lay out or graph so that it is the left-most one.
+
+(def h
+  "This hierarchy defines a relationship between value types and their specializations.
+  We use an [[metabase.util.ordered-hierarchy]] for its topological sorting, which simplify writing efficient and
+  consistent implementations for of our type inference, parsing, and relaxation."
+  (make-hierarchy
+   [::text
+    [::varchar-255
+     [::boolean ::*boolean-int*]
+     [::float
+      ;; A number value with a decimal separator, but a zero fractional component.
+      [::*float-or-int*
+       [::int
+        ;; A value that could be legally parsed as either a boolean OR an integer
+        ::*boolean-int*
+        ::auto-incrementing-int-pk]]]
+     [::datetime ::date]
+     ::offset-datetime]]))
+
+(def ^:private abstract->concrete
+  "Not all value types correspond to column types. We refer to these as \"abstract\" types, and give them *ear-muffs*.
+  This maps implicitly defines the abstract types, by mapping them each to a default concretion."
+  {::*boolean-int*  ::boolean
+   ::*float-or-int* ::float})
+
+(def ^:private allowed-promotions
+  "A mapping of which types a column can be implicitly relaxed to, based on the content of appended values.
+  If we require a relaxation which is not allow-listed here, we will reject the corresponding file."
+  {::int #{::float}})
+
+(def ^:private column-type->coercible-value-types
+  "A mapping of which value types should be coerced to the given existing type, rather than triggering promotion."
+  {::int #{::*float-or-int*}})
+
+(defn- coerce?
+  "Can values of the given type be coerced to the given existing column type, in a lossless fashion?"
+  [column-type value-type]
+  (contains? (column-type->coercible-value-types column-type) value-type))
+
+(def value-types
+  "All type tags which values can be inferred as. An ordered set from most to least specialized."
+  (ordered-hierarchy/sorted-tags h))
+
+(def column-types
+  "All type tags that correspond to concrete column types."
+  (into #{} (remove abstract->concrete) value-types))
+
+(defn- column-type?
+  [value-type]
+  (contains? column-types value-type))
+
+(defn concretize
+  "Determine the desired column-type given the existing column-type (nil if it's new) and the value-type of the data.
+  If there's a valid coercion to the existing type, we will preserve it, but otherwise we will relax abstract types
+  further to a concrete type."
+  [existing-type value-type]
+  (cond
+    ;; If the type is concrete, there is nothing to do.
+    (column-type? value-type) value-type
+    ;; If we know nothing about the value type, treat it as an arbitrary string.
+    (nil? value-type) ::text
+    ;; If configured, coerce the value to the existing type
+    (coerce? existing-type value-type) existing-type
+    ;; Otherwise, project it to its canonical concretion.
+    :else (abstract->concrete value-type)))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; [[value->type]] helpers
+
+(defn- with-parens
+  "Returns a regex that matches the argument, with or without surrounding parentheses."
+  [number-regex]
+  (re-pattern (str "(" number-regex ")|(\\(" number-regex "\\))")))
+
+(defn- with-currency
+  "Returns a regex that matches a positive or negative number, including currency symbols"
+  [number-regex]
+  ;; currency signs can be all over: $2, -$2, $-2, 2€
+  (re-pattern (str upload-parsing/currency-regex "?\\s*-?"
+                   upload-parsing/currency-regex "?"
+                   number-regex
+                   "\\s*" upload-parsing/currency-regex "?")))
+
+(defn- int-regex
+  "Matches numbers which do not have a decimal separator."
+  [number-separators]
+  (with-parens
+   (with-currency
+    (case number-separators
+      ("." ".,") #"\d[\d,]*"
+      ",." #"\d[\d.]*"
+      ", " #"\d[\d \u00A0]*"
+      ".’" #"\d[\d’]*"))))
+
+(defn- float-or-int-regex
+  "Matches integral numbers, even if they have a decimal separator - e.g. 2 or 2.0"
+  [number-separators]
+  (with-parens
+   (with-currency
+    (case number-separators
+      ("." ".,") #"\d[\d,]*(\.0+)?"
+      ",." #"\d[\d.]*(\,[0]+)?"
+      ", " #"\d[\d \u00A0]*(\,[0.]+)?"
+      ".’" #"\d[\d’]*(\.[0.]+)?"))))
+
+(defn- float-regex
+  "Matches numbers, regardless of whether they have a decimal separator - e.g. 2, 2.0, or 2.2"
+  [number-separators]
+  (with-parens
+   (with-currency
+    (case number-separators
+      ("." ".,") #"\d[\d,]*(\.\d+)?"
+      ",." #"\d[\d.]*(\,[\d]+)?"
+      ", " #"\d[\d \u00A0]*(\,[\d.]+)?"
+      ".’" #"\d[\d’]*(\.[\d.]+)?"))))
+
+(defmacro does-not-throw?
+  "Returns true if the given body does not throw an exception."
+  [body]
+  `(try
+     ~body
+     true
+     (catch Throwable _e#
+       false)))
+
+(defn- date-string? [s]
+  (does-not-throw? (upload-parsing/parse-local-date s)))
+
+(defn- datetime-string? [s]
+  (does-not-throw? (upload-parsing/parse-local-datetime s)))
+
+(defn- offset-datetime-string? [s]
+  (does-not-throw? (upload-parsing/parse-offset-datetime s)))
+
+(defn- boolean-string? [s]
+  (boolean (re-matches #"(?i)true|t|yes|y|1|false|f|no|n|0" s)))
+
+(defn- boolean-int-string? [s]
+  (contains? #{"0" "1"} s))
+
+(defn- varchar-255? [s]
+  (<= (count s) 255))
+
+(defn- regex-matcher [regex]
+  (fn [s]
+    (boolean (re-matches regex s))))
+
+;; end [[value->type]] helpers
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(def ^:private non-inferable-types
+  #{::auto-incrementing-int-pk})
+
+(def ^:private type->check-schema
+  "Every inferable value-type needs to have a detection function registered."
+  (into [:map] (map #(vector % [:=> [:cat :string] :boolean])
+                    (remove non-inferable-types value-types))))
+
+(mu/defn ^:private settings->type->check :- type->check-schema
+  [{:keys [number-separators] :as _settings}]
+  (let [int-string?   (regex-matcher (int-regex number-separators))
+        float-or-int? (regex-matcher (float-or-int-regex number-separators))
+        float-string? (regex-matcher (float-regex number-separators))]
+    {::*boolean-int*   boolean-int-string?
+     ::boolean         boolean-string?
+     ::offset-datetime offset-datetime-string?
+     ::date            date-string?
+     ::datetime        datetime-string?
+     ::int             int-string?
+     ::*float-or-int*  float-or-int?
+     ::float           float-string?
+     ::varchar-255     varchar-255?
+     ::text            (constantly true)}))
+
+(defn- value->type
+  "Determine the most specific type that is compatible with the given value.
+  Numbers are assumed to use separators corresponding to the locale defined in the application settings"
+  [type->check value]
+  (when-not (str/blank? value)
+    (let [trimmed (str/trim value)]
+      (->> (remove non-inferable-types value-types)
+           (filter #((type->check %) trimmed))
+           first))))
+
+(defn- relax-type
+  "Given an existing column type, and a new value, relax the type until it includes the value."
+  [type->check current-type value]
+  (cond (nil? value) current-type
+        (nil? current-type) (value->type type->check value)
+        :else (let [trimmed (str/trim value)]
+                (if (str/blank? trimmed)
+                  current-type
+                  (->> (cons current-type (ancestors h current-type))
+                       (filter #((type->check %) trimmed))
+                       first)))))
+
+(defn type-relaxer
+  "Given a map of {value-type -> predicate}, return a reducing fn which updates our inferred schema using the next row."
+  [settings]
+  (let [relax (partial relax-type (settings->type->check settings))]
+    (fn [value-types row]
+      ;; It's important to realize this lazy sequence, because otherwise we can build a huge stack and overflow.
+      (vec (u/map-all relax value-types row)))))
+
+(mu/defn column-types-from-rows :- [:sequential (into [:enum] column-types)]
+  "Given the types of the existing columns (if there are any), and rows to be added, infer the best supporting types."
+  [settings existing-types rows]
+  (->> (reduce (type-relaxer settings) existing-types rows)
+       (u/map-all concretize existing-types)))
+
+(defn base-type->upload-type
+  "Returns the most specific upload type for the given base type."
+  [base-type]
+  (when base-type
+    (condp #(isa? %2 %1) base-type
+      :type/Float                  ::float
+      :type/BigInteger             ::int
+      :type/Integer                ::int
+      :type/Boolean                ::boolean
+      :type/DateTimeWithTZ         ::offset-datetime
+      :type/DateTime               ::datetime
+      :type/Date                   ::date
+      :type/Text                   ::text)))
+
+(defn- promotable?
+  "Are we allowed to promote a column's schema from `current-type` to `inferred-type`?"
+  [current-type inferred-type]
+  (when-let [allowed? (allowed-promotions current-type)]
+    (allowed? inferred-type)))
+
+(defn new-type
+  "Given the `current-type` of a column, and an `inferred-type` for new values to be added, return its new type.
+  This assumes we have already coerced the new values down to the existing type, if possible."
+  [current-type inferred-type]
+  (cond
+    ;; No restriction on new columns
+    (nil? current-type) inferred-type
+    ;; No changes required if inferred type matches
+    (= current-type inferred-type) current-type
+    :else
+    ;; Keep the existing type unless a promotion is allowed.
+    (if (promotable? current-type inferred-type)
+      inferred-type
+      current-type)))
diff --git a/test/metabase/upload/types_test.clj b/test/metabase/upload/types_test.clj
new file mode 100644
index 00000000000..ae303424556
--- /dev/null
+++ b/test/metabase/upload/types_test.clj
@@ -0,0 +1,332 @@
+(ns ^:mb/once metabase.upload.types-test
+  (:require
+   [clojure.test :refer [deftest is testing]]
+   [clojure.walk :as walk]
+   [java-time.api :as t]
+   [metabase.upload.parsing :as upload-parsing]
+   [metabase.upload.types :as upload-types]
+   [metabase.util.ordered-hierarchy :as ordered-hierarchy]))
+
+(def ^:private bool-type         ::upload-types/boolean)
+(def ^:private int-type          ::upload-types/int)
+(def ^:private bool-int-type     ::upload-types/*boolean-int*)
+(def ^:private float-type        ::upload-types/float)
+(def ^:private float-or-int-type ::upload-types/*float-or-int*)
+(def ^:private vchar-type        ::upload-types/varchar-255)
+(def ^:private date-type         ::upload-types/date)
+(def ^:private datetime-type     ::upload-types/datetime)
+(def ^:private offset-dt-type    ::upload-types/offset-datetime)
+(def ^:private text-type         ::upload-types/text)
+
+(deftest ^:parallel type-detection-and-parse-test
+  (doseq [[string-value expected-value expected-type separators]
+          ;; Number-related
+          [["0.0"        0              float-or-int-type "."]
+           ["0.0"        0              float-or-int-type ".,"]
+           ["0,0"        0              float-or-int-type ",."]
+           ["0,0"        0              float-or-int-type ", "]
+           ["0.0"        0              float-or-int-type ".’"]
+           ["-0.0"       -0.0           float-or-int-type "."]
+           ["-0.0"       -0.0           float-or-int-type ".,"]
+           ["-0,0"       -0.0           float-or-int-type ",."]
+           ["-0,0"       -0.0           float-or-int-type ", "]
+           ["-0.0"       -0.0           float-or-int-type ".’"]
+           ["(0.0)"      -0.0           float-or-int-type "."]
+           ["(0.0)"      -0.0           float-or-int-type ".,"]
+           ["(0,0)"      -0.0           float-or-int-type ",."]
+           ["(0,0)"      -0.0           float-or-int-type ", "]
+           ["(0.0)"      -0.0           float-or-int-type ".’"]
+           ["-4300.00€"  -4300          float-or-int-type ".,"]
+           ["£1,000.00"  1000           float-or-int-type]
+           ["£1,000.00"  1000           float-or-int-type "."]
+           ["£1,000.00"  1000           float-or-int-type ".,"]
+           ["£1.000,00"  1000           float-or-int-type ",."]
+           ["£1 000,00"  1000           float-or-int-type ", "]
+           ["£1’000.00"  1000           float-or-int-type ".’"]
+           ["$2"         2              int-type]
+           ["$ 3"        3              int-type]
+           ["-43€"       -43            int-type]
+           ["(86)"       -86            int-type]
+           ["($86)"      -86            int-type]
+           ["£1000"      1000           int-type]
+           ["£1000"      1000           int-type "."]
+           ["£1000"      1000           int-type ".,"]
+           ["£1000"      1000           int-type ",."]
+           ["£1000"      1000           int-type ", "]
+           ["£1000"      1000           int-type ".’"]
+           ["-Â¥9"        -9             int-type]
+           ["₹ -13"      -13            int-type]
+           ["₪13"        13             int-type]
+           ["â‚©-13"       -13            int-type]
+           ["â‚¿42"        42             int-type]
+           ["-99¢"       -99            int-type]
+           ["2"          2              int-type]
+           ["-86"        -86            int-type]
+           ["9,986,000"  9986000        int-type]
+           ["9,986,000"  9986000        int-type "."]
+           ["9,986,000"  9986000        int-type ".,"]
+           ["9.986.000"  9986000        int-type ",."]
+           ["9’986’000"  9986000        int-type ".’"]
+           ["$0"         0              int-type]
+           ["-1"         -1             int-type]
+           ["0"          false          bool-int-type]
+           ["1"          true           bool-int-type]
+           ["9.986.000"  "9.986.000"    vchar-type ".,"]
+           ["3.14"       3.14           float-type]
+           ["3.14"       3.14           float-type "."]
+           ["3.14"       3.14           float-type ".,"]
+           ["3,14"       3.14           float-type ",."]
+           ["3,14"       3.14           float-type ", "]
+           ["(3.14)"     -3.14          float-type]
+           ["3.14"       3.14           float-type ".’"]
+           [".14"        ".14"          vchar-type ".,"] ;; TODO: this should be a float type
+           ["0.14"       0.14           float-type ".,"]
+           ["-9986.567"  -9986.567      float-type ".,"]
+           ["$2.0"       2              float-or-int-type ".,"]
+           ["$ 3.50"     3.50           float-type ".,"]
+           ["-4300.23€"  -4300.23       float-type ".,"]
+           ["£1,000.23"  1000.23        float-type]
+           ["£1,000.23"  1000.23        float-type "."]
+           ["£1,000.23"  1000.23        float-type ".,"]
+           ["£1.000,23"  1000.23        float-type ",."]
+           ["£1 000,23"  1000.23        float-type ", "]
+           ["£1’000.23"  1000.23        float-type ".’"]
+           ["-Â¥9.99"     -9.99          float-type ".,"]
+           ["₹ -13.23"   -13.23         float-type ".,"]
+           ["₪13.01"     13.01          float-type ".,"]
+           ["â‚©13.33"     13.33          float-type ".,"]
+           ["â‚¿42.243646" 42.243646      float-type ".,"]
+           ["-99.99¢"    -99.99         float-type ".,"]
+           ["."          "."            vchar-type]
+           ;; String-related
+           [(apply str (repeat 255 "x")) (apply str (repeat 255 "x")) vchar-type]
+           [(apply str (repeat 256 "x")) (apply str (repeat 256 "x")) text-type]
+           ["86 is my favorite number"   "86 is my favorite number"   vchar-type]
+           ["My favorite number is 86"   "My favorite number is 86"   vchar-type]
+           ;; Date-related
+           [" 2022-01-01 "                    #t "2022-01-01"             date-type]
+           [" 2022-02-30 "                    " 2022-02-30 "              vchar-type]
+           [" -2022-01-01 "                   #t "-2022-01-01"            date-type]
+           [" Jan 1 2018"                     #t "2018-01-01"             date-type]
+           [" Jan 02 2018"                    #t "2018-01-02"             date-type]
+           [" Jan 30 -2018"                   #t "-2018-01-30"            date-type]
+           [" Jan 1, 2018"                    #t "2018-01-01"             date-type]
+           [" Jan 02, 2018"                   #t "2018-01-02"             date-type]
+           [" Feb 30, 2018"                   " Feb 30, 2018"             vchar-type]
+           [" 1 Jan 2018"                     #t "2018-01-01"             date-type]
+           [" 02 Jan 2018"                    #t "2018-01-02"             date-type]
+           [" 1 Jan, 2018"                    #t "2018-01-01"             date-type]
+           [" 02 Jan, 2018"                   #t "2018-01-02"             date-type]
+           [" January 1 2018"                 #t "2018-01-01"             date-type]
+           [" January 02 2018"                #t "2018-01-02"             date-type]
+           [" January 1, 2018"                #t "2018-01-01"             date-type]
+           [" January 02, 2018"               #t "2018-01-02"             date-type]
+           [" 1 January 2018"                 #t "2018-01-01"             date-type]
+           [" 02 January 2018"                #t "2018-01-02"             date-type]
+           [" 1 January, 2018"                #t "2018-01-01"             date-type]
+           [" 02 January, 2018"               #t "2018-01-02"             date-type]
+           [" Saturday, January 1 2000"       #t "2000-01-01"             date-type]
+           [" Sunday, January 02 2000"        #t "2000-01-02"             date-type]
+           [" Saturday, January 1, 2000"      #t "2000-01-01"             date-type]
+           [" Sunday, January 02, 2000"       #t "2000-01-02"             date-type]
+           [" 2022-01-01T01:00 "              #t "2022-01-01T01:00"       datetime-type]
+           [" 2022-01-01t01:00 "              #t "2022-01-01T01:00"       datetime-type]
+           [" 2022-01-01 01:00 "              #t "2022-01-01T01:00"       datetime-type]
+           [" 2022-01-01T01:00:00 "           #t "2022-01-01T01:00"       datetime-type]
+           [" 2022-01-01t01:00:00 "           #t "2022-01-01T01:00"       datetime-type]
+           [" 2022-01-01 01:00:00 "           #t "2022-01-01T01:00"       datetime-type]
+           [" 2022-01-01T01:00:00.00 "        #t "2022-01-01T01:00"       datetime-type]
+           [" 2022-01-01t01:00:00.00 "        #t "2022-01-01T01:00"       datetime-type]
+           [" 2022-01-01 01:00:00.00 "        #t "2022-01-01T01:00"       datetime-type]
+           [" 2022-01-01T01:00:00.000000000 " #t "2022-01-01T01:00"       datetime-type]
+           [" 2022-01-01t01:00:00.000000000 " #t "2022-01-01T01:00"       datetime-type]
+           [" 2022-01-01 01:00:00.000000000 " #t "2022-01-01T01:00"       datetime-type]
+           [" 2022-01-01T01:00:00.00-07 "     #t "2022-01-01T01:00-07:00" offset-dt-type]
+           [" 2022-01-01t01:00:00.00-07 "     #t "2022-01-01T01:00-07:00" offset-dt-type]
+           [" 2022-01-01 01:00:00.00-07 "     #t "2022-01-01T01:00-07:00" offset-dt-type]
+           [" 2022-01-01T01:00:00.00-07:00 "  #t "2022-01-01T01:00-07:00" offset-dt-type]
+           [" 2022-01-01t01:00:00.00-07:00 "  #t "2022-01-01T01:00-07:00" offset-dt-type]
+           [" 2022-01-01 01:00:00.00-07:00 "  #t "2022-01-01T01:00-07:00" offset-dt-type]
+           [" 2022-01-01T01:00:00.00-07:00 "  #t "2022-01-01T01:00-07:00" offset-dt-type]
+           [" 2022-01-01t01:00:00.00-07:00 "  #t "2022-01-01T01:00-07:00" offset-dt-type]
+           [" 2022-01-01 01:00:00.00-07:00 "  #t "2022-01-01T01:00-07:00" offset-dt-type]
+           [" 2022-01-01T01:00:00.00Z "       (t/offset-date-time "2022-01-01T01:00+00:00") offset-dt-type]
+           [" 2022-01-01t01:00:00.00Z "       (t/offset-date-time "2022-01-01T01:00+00:00") offset-dt-type]
+           [" 2022-01-01 01:00:00.00Z "       (t/offset-date-time "2022-01-01T01:00+00:00") offset-dt-type]]]
+    (let [settings    {:number-separators (or separators ".,")}
+          type->check (#'upload-types/settings->type->check settings)
+          value-type  (#'upload-types/value->type type->check string-value)
+          ;; get the type of the column, if we created it based on only that value
+          col-type    (first (upload-types/column-types-from-rows settings nil [[string-value]]))
+          parser      (upload-parsing/upload-type->parser col-type settings)]
+      (testing (format "\"%s\" is a %s" string-value value-type)
+        (is (= expected-type
+               value-type)))
+      (testing (format "\"%s\" is parsed into %s" string-value expected-value)
+        (is (= expected-value
+               (parser string-value)))))))
+
+(deftest ^:parallel type-coalescing-test
+  (doseq [[type-a            type-b           expected]
+          [[bool-type        bool-type        bool-type]
+           [bool-type        int-type         vchar-type]
+           [bool-type        bool-int-type    bool-type]
+           [bool-type        date-type        vchar-type]
+           [bool-type        datetime-type    vchar-type]
+           [bool-type        vchar-type       vchar-type]
+           [bool-type        text-type        text-type]
+           [int-type         bool-type        vchar-type]
+           [int-type         float-type       float-type]
+           [int-type         date-type        vchar-type]
+           [int-type         datetime-type    vchar-type]
+           [int-type         vchar-type       vchar-type]
+           [int-type         text-type        text-type]
+           [int-type         bool-int-type    int-type]
+           [bool-int-type    bool-int-type    bool-int-type]
+           [float-type       vchar-type       vchar-type]
+           [float-type       text-type        text-type]
+           [float-type       date-type        vchar-type]
+           [float-type       datetime-type    vchar-type]
+           [float-type       text-type        text-type]
+           [float-type       date-type        vchar-type]
+           [float-type       datetime-type    vchar-type]
+           [date-type        datetime-type    datetime-type]
+           [date-type        vchar-type       vchar-type]
+           [date-type        text-type        text-type]
+           [datetime-type    vchar-type       vchar-type]
+           [offset-dt-type   vchar-type       vchar-type]
+           [datetime-type    text-type        text-type]
+           [offset-dt-type   text-type        text-type]
+           [vchar-type       text-type        text-type]]]
+    (is (= expected (ordered-hierarchy/first-common-ancestor upload-types/h type-a type-b))
+        (format "%s + %s = %s" (name type-a) (name type-b) (name expected)))))
+
+(deftest ^:parallel coercion-soundness-test
+  (testing "Every coercion maps to a stricter type that is a direct descendant"
+    (is (empty?
+         ;; Build a set of all type-pairs that violate this constraint
+         (into (sorted-set)
+               (comp (mapcat (fn [[column-type value-types]]
+                               (for [value-type value-types]
+                                 [column-type value-type])))
+                     (remove (fn [[column-type value-type]]
+                               ;; Strictly speaking we only require that there is a route which only traverses
+                               ;; through abstract nodes, but it's much simpler to enforce this stronger condition:
+                               ;; that `column-type` is a child of `value-type`
+                               (contains? (ordered-hierarchy/children upload-types/h value-type) column-type))))
+               @#'upload-types/column-type->coercible-value-types)))))
+
+(deftest ^:parallel initial-column-type-test
+  (let [column-type (partial upload-types/concretize nil)]
+    (testing "Unknown value types are treated as text"
+      (is (= ::upload-types/text (column-type nil))))
+    (testing "Non-abstract value types resolve to themselves"
+      (let [ts upload-types/column-types]
+        (is (= (zipmap ts ts)
+               (zipmap (map column-type ts) ts)))))
+    (testing "Abstract values are resolved using an explicit map, which is consistent with the hierarchy"
+      (doseq [[value-type expected-column-type] @#'upload-types/abstract->concrete]
+        (is (= expected-column-type (column-type value-type)))
+        ;; Strictly speaking we only require that there is a route from each abstract node to its column type which only
+        ;; traverses through abstract nodes, but it's much simpler to enforce this stronger condition.
+        (is (contains? (parents upload-types/h value-type) expected-column-type))))))
+
+(deftest ^:parallel append-column-type-test
+  (doseq [existing-type upload-types/value-types
+          value-type    upload-types/value-types]
+    (case [existing-type value-type]
+      [::upload-types/int ::upload-types/*float-or-int*]
+      (testing "We coerce floats with fractional part to plan integers when appending into an existing integer column"
+        (is (= ::upload-types/int (upload-types/concretize existing-type value-type))))
+
+      ;; This is unsatisfying, would be good if this interface also covered promoting columns and rejecting values.
+      (testing (format "We append %s values to %s columns as if we were inserting them into new columns" existing-type value-type)
+        (is (= (upload-types/concretize nil value-type)
+               (upload-types/concretize existing-type value-type)))))))
+
+(defn- re-namespace
+  "Replace the namespace in any namespaced keyword with this test namespace, return all other values unchanged."
+  [x]
+  (if (and (keyword? x) (namespace x))
+    (keyword (name (ns-name *ns*)) (name x))
+    x))
+
+;; Translate the hierarchy to talk about this namespace, so that we can omit an explicit namespace for brevity.
+(def ^:private h (walk/postwalk re-namespace upload-types/h))
+
+(deftest ^:parallel parents-test
+  (testing "Parents are listed according to the order that this tag was derived from each of them"
+    (is (nil? (parents h ::text)))
+    (is (= [::text] (vec (parents h ::varchar-255))))
+    (is (= [::*float-or-int*] (vec (parents h ::int))))
+    (is (= [::boolean ::int] (vec (parents h ::*boolean-int*))))))
+
+(deftest ^:parallel children-test
+  (testing "Children are listed in reverse order to when they were each derived from this tag"
+    (is (nil? (ordered-hierarchy/children h ::*boolean-int*)))
+    (is (= [::varchar-255] (vec (ordered-hierarchy/children h ::text))))
+    (is (= [::*float-or-int*] (vec (ordered-hierarchy/children h ::float))))
+    (is (= [::auto-incrementing-int-pk ::*boolean-int*] (vec (ordered-hierarchy/children h ::int))))))
+
+(deftest ^:parallel ancestors-test
+  (testing "Linear ancestors are listed in order"
+    (is (nil? (ancestors h ::text)))
+    (is (= [::text] (vec (ancestors h ::varchar-255))))
+    (is (= [::varchar-255 ::text] (vec (ancestors h ::boolean))))
+    (is (= [::*float-or-int* ::float ::varchar-255 ::text] (vec (ancestors h ::int)))))
+
+  (testing "Non-linear ancestors are listed in topological order, following edges in the order they were defined."
+    (is (= [::boolean
+            ::int
+            ::*float-or-int*
+            ::float
+            ::varchar-255
+            ::text]
+           (vec (ancestors h ::*boolean-int*))))))
+
+(deftest ^:parallel descendants-test
+  (testing "Linear descendants are listed in order"
+    (is (nil? (descendants h ::*boolean-int*)))
+    (is (nil? (descendants h ::date)))
+    (is (= [::date] (vec (descendants h ::datetime))))
+    (is (= [::*boolean-int*] (vec (descendants h ::boolean)))))
+
+  (testing "Non-linear descendants are listed in reverse topological order, following edges in reserve order."
+    (is (= [::*float-or-int*
+            ::int
+            ::auto-incrementing-int-pk
+            ::*boolean-int*]
+           (vec (descendants h ::float))))
+    (is (= [::varchar-255
+            ::offset-datetime
+            ::datetime
+            ::date
+            ::float
+            ::*float-or-int*
+            ::int
+            ::auto-incrementing-int-pk
+            ::boolean
+            ::*boolean-int*]
+           (vec (descendants h ::text))))))
+
+(deftest ^:parallel sorted-tags-test
+  (testing "Tags are returned in a topological ordering that also preserves insertion order of the edges."
+    (is (= [::*boolean-int*
+            ::boolean
+            ::auto-incrementing-int-pk
+            ::int
+            ::*float-or-int*
+            ::float
+            ::date
+            ::datetime
+            ::offset-datetime
+            ::varchar-255
+            ::text]
+           (vec (ordered-hierarchy/sorted-tags h))))))
+
+(deftest ^:parallel first-common-ancestor-test
+  (testing "The first-common-ancestor is the first tag in the lineage of tag-a that is also in the lineage of tag-b"
+    (is (= ::*boolean-int* (ordered-hierarchy/first-common-ancestor h ::*boolean-int* nil)))
+    (is (= ::*boolean-int* (ordered-hierarchy/first-common-ancestor h ::*boolean-int* ::*boolean-int*)))
+    (is (= ::boolean (ordered-hierarchy/first-common-ancestor h ::*boolean-int* ::boolean)))
+    (is (= ::varchar-255 (ordered-hierarchy/first-common-ancestor h ::boolean ::int)))))
diff --git a/test/metabase/upload_test.clj b/test/metabase/upload_test.clj
index 2f65fe4e224..b94fd5a8031 100644
--- a/test/metabase/upload_test.clj
+++ b/test/metabase/upload_test.clj
@@ -7,7 +7,6 @@
    [clojure.string :as str]
    [clojure.test :refer :all]
    [flatland.ordered.map :as ordered-map]
-   [java-time.api :as t]
    [metabase.analytics.snowplow-test :as snowplow-test]
    [metabase.driver :as driver]
    [metabase.driver.ddl.interface :as ddl.i]
@@ -23,25 +22,23 @@
    [metabase.test.data.sql :as sql.tx]
    [metabase.upload :as upload]
    [metabase.upload.parsing :as upload-parsing]
+   [metabase.upload.types :as upload-types]
    [metabase.util :as u]
-   [metabase.util.ordered-hierarchy :as ordered-hierarchy]
    [toucan2.core :as t2])
   (:import
    (java.io File)))
 
 (set! *warn-on-reflection* true)
 
-(def ^:private bool-type         ::upload/boolean)
-(def ^:private int-type          ::upload/int)
-(def ^:private bool-int-type     ::upload/*boolean-int*)
-(def ^:private float-type        ::upload/float)
-(def ^:private float-or-int-type ::upload/*float-or-int*)
-(def ^:private vchar-type        ::upload/varchar-255)
-(def ^:private date-type         ::upload/date)
-(def ^:private datetime-type     ::upload/datetime)
-(def ^:private offset-dt-type    ::upload/offset-datetime)
-(def ^:private text-type         ::upload/text)
-(def ^:private auto-pk-type      ::upload/auto-incrementing-int-pk)
+(def ^:private bool-type         ::upload-types/boolean)
+(def ^:private int-type          ::upload-types/int)
+(def ^:private float-type        ::upload-types/float)
+(def ^:private vchar-type        ::upload-types/varchar-255)
+(def ^:private date-type         ::upload-types/date)
+(def ^:private datetime-type     ::upload-types/datetime)
+(def ^:private offset-dt-type    ::upload-types/offset-datetime)
+(def ^:private text-type         ::upload-types/text)
+(def ^:private auto-pk-type      ::upload-types/auto-incrementing-int-pk)
 
 (defn- local-infile-on? []
   (= "ON" (-> (sql-jdbc.conn/db->pooled-connection-spec (mt/db))
@@ -115,189 +112,6 @@
       (#'upload/scan-and-sync-table! database table))
     (t2/select-one :model/Table (:id table))))
 
-(deftest type-detection-and-parse-test
-  (doseq [[string-value expected-value expected-type seps]
-          ;; Number-related
-          [["0.0"        0              float-or-int-type "."]
-           ["0.0"        0              float-or-int-type ".,"]
-           ["0,0"        0              float-or-int-type ",."]
-           ["0,0"        0              float-or-int-type ", "]
-           ["0.0"        0              float-or-int-type ".’"]
-           ["-0.0"       -0.0           float-or-int-type "."]
-           ["-0.0"       -0.0           float-or-int-type ".,"]
-           ["-0,0"       -0.0           float-or-int-type ",."]
-           ["-0,0"       -0.0           float-or-int-type ", "]
-           ["-0.0"       -0.0           float-or-int-type ".’"]
-           ["(0.0)"      -0.0           float-or-int-type "."]
-           ["(0.0)"      -0.0           float-or-int-type ".,"]
-           ["(0,0)"      -0.0           float-or-int-type ",."]
-           ["(0,0)"      -0.0           float-or-int-type ", "]
-           ["(0.0)"      -0.0           float-or-int-type ".’"]
-           ["-4300.00€"  -4300          float-or-int-type ".,"]
-           ["£1,000.00"  1000           float-or-int-type]
-           ["£1,000.00"  1000           float-or-int-type "."]
-           ["£1,000.00"  1000           float-or-int-type ".,"]
-           ["£1.000,00"  1000           float-or-int-type ",."]
-           ["£1 000,00"  1000           float-or-int-type ", "]
-           ["£1’000.00"  1000           float-or-int-type ".’"]
-           ["$2"         2              int-type]
-           ["$ 3"        3              int-type]
-           ["-43€"       -43            int-type]
-           ["(86)"       -86            int-type]
-           ["($86)"      -86            int-type]
-           ["£1000"      1000           int-type]
-           ["£1000"      1000           int-type "."]
-           ["£1000"      1000           int-type ".,"]
-           ["£1000"      1000           int-type ",."]
-           ["£1000"      1000           int-type ", "]
-           ["£1000"      1000           int-type ".’"]
-           ["-Â¥9"        -9             int-type]
-           ["₹ -13"      -13            int-type]
-           ["₪13"        13             int-type]
-           ["â‚©-13"       -13            int-type]
-           ["â‚¿42"        42             int-type]
-           ["-99¢"       -99            int-type]
-           ["2"          2              int-type]
-           ["-86"        -86            int-type]
-           ["9,986,000"  9986000        int-type]
-           ["9,986,000"  9986000        int-type "."]
-           ["9,986,000"  9986000        int-type ".,"]
-           ["9.986.000"  9986000        int-type ",."]
-           ["9’986’000"  9986000        int-type ".’"]
-           ["$0"         0              int-type]
-           ["-1"         -1             int-type]
-           ["0"          false          bool-int-type]
-           ["1"          true           bool-int-type]
-           ["9.986.000"  "9.986.000"    vchar-type ".,"]
-           ["3.14"       3.14           float-type]
-           ["3.14"       3.14           float-type "."]
-           ["3.14"       3.14           float-type ".,"]
-           ["3,14"       3.14           float-type ",."]
-           ["3,14"       3.14           float-type ", "]
-           ["(3.14)"     -3.14          float-type]
-           ["3.14"       3.14           float-type ".’"]
-           [".14"        ".14"          vchar-type ".,"] ;; TODO: this should be a float type
-           ["0.14"       0.14           float-type ".,"]
-           ["-9986.567"  -9986.567      float-type ".,"]
-           ["$2.0"       2              float-or-int-type ".,"]
-           ["$ 3.50"     3.50           float-type ".,"]
-           ["-4300.23€"  -4300.23       float-type ".,"]
-           ["£1,000.23"  1000.23        float-type]
-           ["£1,000.23"  1000.23        float-type "."]
-           ["£1,000.23"  1000.23        float-type ".,"]
-           ["£1.000,23"  1000.23        float-type ",."]
-           ["£1 000,23"  1000.23        float-type ", "]
-           ["£1’000.23"  1000.23        float-type ".’"]
-           ["-Â¥9.99"     -9.99          float-type ".,"]
-           ["₹ -13.23"   -13.23         float-type ".,"]
-           ["₪13.01"     13.01          float-type ".,"]
-           ["â‚©13.33"     13.33          float-type ".,"]
-           ["â‚¿42.243646" 42.243646      float-type ".,"]
-           ["-99.99¢"    -99.99         float-type ".,"]
-           ["."          "."            vchar-type]
-           ;; String-related
-           [(apply str (repeat 255 "x")) (apply str (repeat 255 "x")) vchar-type]
-           [(apply str (repeat 256 "x")) (apply str (repeat 256 "x")) text-type]
-           ["86 is my favorite number"   "86 is my favorite number"   vchar-type]
-           ["My favorite number is 86"   "My favorite number is 86"   vchar-type]
-           ;; Date-related
-           [" 2022-01-01 "                    #t "2022-01-01"             date-type]
-           [" 2022-02-30 "                    " 2022-02-30 "              vchar-type]
-           [" -2022-01-01 "                   #t "-2022-01-01"            date-type]
-           [" Jan 1 2018"                     #t "2018-01-01"             date-type]
-           [" Jan 02 2018"                    #t "2018-01-02"             date-type]
-           [" Jan 30 -2018"                   #t "-2018-01-30"            date-type]
-           [" Jan 1, 2018"                    #t "2018-01-01"             date-type]
-           [" Jan 02, 2018"                   #t "2018-01-02"             date-type]
-           [" Feb 30, 2018"                   " Feb 30, 2018"             vchar-type]
-           [" 1 Jan 2018"                     #t "2018-01-01"             date-type]
-           [" 02 Jan 2018"                    #t "2018-01-02"             date-type]
-           [" 1 Jan, 2018"                    #t "2018-01-01"             date-type]
-           [" 02 Jan, 2018"                   #t "2018-01-02"             date-type]
-           [" January 1 2018"                 #t "2018-01-01"             date-type]
-           [" January 02 2018"                #t "2018-01-02"             date-type]
-           [" January 1, 2018"                #t "2018-01-01"             date-type]
-           [" January 02, 2018"               #t "2018-01-02"             date-type]
-           [" 1 January 2018"                 #t "2018-01-01"             date-type]
-           [" 02 January 2018"                #t "2018-01-02"             date-type]
-           [" 1 January, 2018"                #t "2018-01-01"             date-type]
-           [" 02 January, 2018"               #t "2018-01-02"             date-type]
-           [" Saturday, January 1 2000"       #t "2000-01-01"             date-type]
-           [" Sunday, January 02 2000"        #t "2000-01-02"             date-type]
-           [" Saturday, January 1, 2000"      #t "2000-01-01"             date-type]
-           [" Sunday, January 02, 2000"       #t "2000-01-02"             date-type]
-           [" 2022-01-01T01:00 "              #t "2022-01-01T01:00"       datetime-type]
-           [" 2022-01-01t01:00 "              #t "2022-01-01T01:00"       datetime-type]
-           [" 2022-01-01 01:00 "              #t "2022-01-01T01:00"       datetime-type]
-           [" 2022-01-01T01:00:00 "           #t "2022-01-01T01:00"       datetime-type]
-           [" 2022-01-01t01:00:00 "           #t "2022-01-01T01:00"       datetime-type]
-           [" 2022-01-01 01:00:00 "           #t "2022-01-01T01:00"       datetime-type]
-           [" 2022-01-01T01:00:00.00 "        #t "2022-01-01T01:00"       datetime-type]
-           [" 2022-01-01t01:00:00.00 "        #t "2022-01-01T01:00"       datetime-type]
-           [" 2022-01-01 01:00:00.00 "        #t "2022-01-01T01:00"       datetime-type]
-           [" 2022-01-01T01:00:00.000000000 " #t "2022-01-01T01:00"       datetime-type]
-           [" 2022-01-01t01:00:00.000000000 " #t "2022-01-01T01:00"       datetime-type]
-           [" 2022-01-01 01:00:00.000000000 " #t "2022-01-01T01:00"       datetime-type]
-           [" 2022-01-01T01:00:00.00-07 "     #t "2022-01-01T01:00-07:00" offset-dt-type]
-           [" 2022-01-01t01:00:00.00-07 "     #t "2022-01-01T01:00-07:00" offset-dt-type]
-           [" 2022-01-01 01:00:00.00-07 "     #t "2022-01-01T01:00-07:00" offset-dt-type]
-           [" 2022-01-01T01:00:00.00-07:00 "  #t "2022-01-01T01:00-07:00" offset-dt-type]
-           [" 2022-01-01t01:00:00.00-07:00 "  #t "2022-01-01T01:00-07:00" offset-dt-type]
-           [" 2022-01-01 01:00:00.00-07:00 "  #t "2022-01-01T01:00-07:00" offset-dt-type]
-           [" 2022-01-01T01:00:00.00-07:00 "  #t "2022-01-01T01:00-07:00" offset-dt-type]
-           [" 2022-01-01t01:00:00.00-07:00 "  #t "2022-01-01T01:00-07:00" offset-dt-type]
-           [" 2022-01-01 01:00:00.00-07:00 "  #t "2022-01-01T01:00-07:00" offset-dt-type]
-           [" 2022-01-01T01:00:00.00Z "       (t/offset-date-time "2022-01-01T01:00+00:00") offset-dt-type]
-           [" 2022-01-01t01:00:00.00Z "       (t/offset-date-time "2022-01-01T01:00+00:00") offset-dt-type]
-           [" 2022-01-01 01:00:00.00Z "       (t/offset-date-time "2022-01-01T01:00+00:00") offset-dt-type]]]
-    (let [settings    {:number-separators (or seps ".,")}
-          type->check (#'upload/settings->type->check settings)
-          value-type  (#'upload/value->type type->check string-value)
-          ;; get the type of the column, if we created it based on only that value
-          col-type    (first (upload/column-types-from-rows settings nil [[string-value]]))
-          parser      (upload-parsing/upload-type->parser col-type settings)]
-      (testing (format "\"%s\" is a %s" string-value value-type)
-        (is (= expected-type
-               value-type)))
-      (testing (format "\"%s\" is parsed into %s" string-value expected-value)
-        (is (= expected-value
-               (parser string-value)))))))
-
-(deftest ^:parallel type-coalescing-test
-  (doseq [[type-a            type-b           expected]
-          [[bool-type        bool-type        bool-type]
-           [bool-type        int-type         vchar-type]
-           [bool-type        bool-int-type    bool-type]
-           [bool-type        date-type        vchar-type]
-           [bool-type        datetime-type    vchar-type]
-           [bool-type        vchar-type       vchar-type]
-           [bool-type        text-type        text-type]
-           [int-type         bool-type        vchar-type]
-           [int-type         float-type       float-type]
-           [int-type         date-type        vchar-type]
-           [int-type         datetime-type    vchar-type]
-           [int-type         vchar-type       vchar-type]
-           [int-type         text-type        text-type]
-           [int-type         bool-int-type    int-type]
-           [bool-int-type    bool-int-type    bool-int-type]
-           [float-type       vchar-type       vchar-type]
-           [float-type       text-type        text-type]
-           [float-type       date-type        vchar-type]
-           [float-type       datetime-type    vchar-type]
-           [float-type       text-type        text-type]
-           [float-type       date-type        vchar-type]
-           [float-type       datetime-type    vchar-type]
-           [date-type        datetime-type    datetime-type]
-           [date-type        vchar-type       vchar-type]
-           [date-type        text-type        text-type]
-           [datetime-type    vchar-type       vchar-type]
-           [offset-dt-type   vchar-type       vchar-type]
-           [datetime-type    text-type        text-type]
-           [offset-dt-type   text-type        text-type]
-           [vchar-type       text-type        text-type]]]
-    (is (= expected (#'ordered-hierarchy/first-common-ancestor @#'upload/h type-a type-b))
-        (format "%s + %s = %s" (name type-a) (name type-b) (name expected)))))
-
 (defn csv-file-with
   "Create a temp csv file with the given content and return the file"
   (^File [rows]
@@ -1279,8 +1093,8 @@
       :or {table-name       (mt/random-name)
            schema-name      (sql.tx/session-schema driver/*driver*)
            col->upload-type (ordered-map/ordered-map
-                             upload/auto-pk-column-keyword ::upload/auto-incrementing-int-pk
-                             :name ::upload/varchar-255)
+                             upload/auto-pk-column-keyword auto-pk-type
+                             :name vchar-type)
            rows             [["Obi-Wan Kenobi"]]}}]
   (let [driver driver/*driver*
         db-id (mt/id)
@@ -1384,9 +1198,9 @@
                         ["name,id" "Luke Skywalker,20" "Darth Vader,30"]]]  ;; different order
         (with-upload-table!
           [table (create-upload-table! {:col->upload-type (ordered-map/ordered-map
-                                                           :_mb_row_id ::upload/auto-incrementing-int-pk
-                                                           :id ::upload/int
-                                                           :name ::upload/varchar-255)
+                                                           :_mb_row_id auto-pk-type
+                                                           :id int-type
+                                                           :name vchar-type)
                                         :rows             [[10 "Obi-Wan Kenobi"]]})]
           (let [file (csv-file-with csv-rows (mt/random-name))]
             (is (some? (append-csv! {:file     file
@@ -1435,8 +1249,8 @@
           (with-upload-table!
             [table (create-upload-table!
                     {:col->upload-type (ordered-map/ordered-map
-                                        :id         ::upload/int
-                                        :name       ::upload/varchar-255)
+                                        :id         int-type
+                                        :name       vchar-type)
                      :rows [[1,"some_text"]]})]
 
             (let [file (csv-file-with csv-rows (mt/random-name))]
@@ -1465,14 +1279,14 @@
             (with-upload-table!
               [table (create-upload-table!
                       {:col->upload-type (ordered-map/ordered-map
-                                          :_mb_row_id      ::upload/auto-incrementing-int-pk
-                                          :biginteger      ::upload/int
-                                          :float           ::upload/float
-                                          :text            ::upload/varchar-255
-                                          :boolean         ::upload/boolean
-                                          :date            ::upload/date
-                                          :datetime        ::upload/datetime
-                                          :offset_datetime ::upload/offset-datetime)
+                                          :_mb_row_id      auto-pk-type
+                                          :biginteger      int-type
+                                          :float           float-type
+                                          :text            vchar-type
+                                          :boolean         bool-type
+                                          :date            date-type
+                                          :datetime        datetime-type
+                                          :offset_datetime offset-dt-type)
                        :rows [[1000000,1.0,"some_text",false,#t "2020-01-01",#t "2020-01-01T00:00:00",#t "2020-01-01T00:00:00"]]})]
               (let [csv-rows ["biginteger,float,text,boolean,date,datetime,offset_datetime"
                               "2000000,2.0,some_text,true,2020-02-02,2020-02-02T02:02:02,2020-02-02T02:02:02+02:00"]
@@ -1506,7 +1320,7 @@
     (testing "If the table doesn't have _mb_row_id but the CSV does, ignore the CSV _mb_row_id but create the column anyway"
       (with-upload-table!
         [table (create-upload-table! {:col->upload-type (ordered-map/ordered-map
-                                                         :name ::upload/varchar-255)
+                                                         :name vchar-type)
                                       :rows             [["Obi-Wan Kenobi"]]})]
         (let [csv-rows ["_MB-row ID,name" "1000,Luke Skywalker"]
               file     (csv-file-with csv-rows (mt/random-name))]
@@ -1546,7 +1360,7 @@
     (testing "If the table doesn't have _mb_row_id and a failure occurs, we shouldn't create a _mb_row_id column"
       (with-upload-table!
         [table (create-upload-table! {:col->upload-type (ordered-map/ordered-map
-                                                         :bool_column ::upload/boolean)
+                                                         :bool_column bool-type)
                                       :rows [[true]]})]
         (let [csv-rows    ["bool_column" "not a bool"]
               file        (csv-file-with csv-rows (mt/random-name))
@@ -1691,9 +1505,9 @@
     (testing "Append should handle the columns in the CSV file being reordered"
       (with-upload-table! [table (create-upload-table!
                                   :col->upload-type (ordered-map/ordered-map
-                                                     upload/auto-pk-column-keyword ::upload/auto-incrementing-int-pk
-                                                     :name ::upload/varchar-255
-                                                     :shame ::upload/varchar-255)
+                                                     upload/auto-pk-column-keyword auto-pk-type
+                                                     :name vchar-type
+                                                     :shame vchar-type)
                                   :rows [["Obi-Wan Kenobi" "No one really knows me"]])]
 
         (let [csv-rows ["shame,name" "Nothing - you can't prove it,Puke Nightstalker"]
@@ -1731,27 +1545,27 @@
           (doseq [auto-pk-column? [true false]]
             (testing (str "\nFor a table that has " (if auto-pk-column? "an" " no") " automatically generated PK already")
               (doseq [{:keys [upload-type valid invalid msg]}
-                      [{:upload-type ::upload/int
+                      [{:upload-type int-type
                         :valid       1
                         :invalid     "not an int"
                         :msg         "'not an int' is not a recognizable number"}
-                       {:upload-type ::upload/float
+                       {:upload-type float-type
                         :valid       1.1
                         :invalid     "not a float"
                         :msg         "'not a float' is not a recognizable number"}
-                       {:upload-type ::upload/boolean
+                       {:upload-type bool-type
                         :valid       true
                         :invalid     "correct"
                         :msg         "'correct' is not a recognizable boolean"}
-                       {:upload-type ::upload/date
+                       {:upload-type date-type
                         :valid       #t "2000-01-01"
                         :invalid     "2023-01-01T00:00:00"
                         :msg         "'2023-01-01T00:00:00' is not a recognizable date"}
-                       {:upload-type ::upload/datetime
+                       {:upload-type datetime-type
                         :valid       #t "2000-01-01T00:00:00"
                         :invalid     "2023-01-01T00:00:00+01"
                         :msg         "'2023-01-01T00:00:00+01' is not a recognizable datetime"}
-                       {:upload-type ::upload/offset-datetime
+                       {:upload-type offset-dt-type
                         :valid       #t "2000-01-01T00:00:00+01"
                         :invalid     "2023-01-01T00:00:00[Europe/Helsinki]"
                         :msg         "'2023-01-01T00:00:00[Europe/Helsinki]' is not a recognizable zoned datetime"}]]
@@ -1760,9 +1574,9 @@
                     [table (create-upload-table!
                             {:col->upload-type (cond-> (ordered-map/ordered-map
                                                         :test_column upload-type
-                                                        :name        ::upload/varchar-255)
+                                                        :name        vchar-type)
                                                  auto-pk-column?
-                                                 (assoc upload/auto-pk-column-keyword ::upload/auto-incrementing-int-pk))
+                                                 (assoc upload/auto-pk-column-keyword auto-pk-type))
                              :rows             [[valid "Obi-Wan Kenobi"]]})]
                     (let [;; The CSV contains 50 valid rows and 1 invalid row
                           csv-rows `["test_column,name" ~@(repeat 50 (str valid ",Darth Vadar")) ~(str invalid ",Luke Skywalker")]
@@ -1785,7 +1599,9 @@
   (mt/test-drivers (filter (fn [driver]
                              ;; use of varchar(255) is not universal for all drivers, so only test drivers that
                              ;; have different database types for varchar(255) and text
-                             (apply not= (map (partial driver/upload-type->database-type driver) [::upload/varchar-255 ::upload/text])))
+                             (apply not= (->> [vchar-type text-type]
+                                              (map #(keyword "metabase.upload" (name %)))
+                                              (map (partial driver/upload-type->database-type driver)))))
                            (mt/normal-drivers-with-feature :uploads))
     (with-mysql-local-infile-off
       (testing "Append fails if the CSV file contains string values that are too long for the column"
@@ -1794,8 +1610,8 @@
         (binding [driver/*insert-chunk-rows* 1]
           (with-upload-table!
             [table (create-upload-table! {:col->upload-type (ordered-map/ordered-map
-                                                             upload/auto-pk-column-keyword ::upload/auto-incrementing-int-pk
-                                                             :test_column ::upload/varchar-255)
+                                                             upload/auto-pk-column-keyword auto-pk-type
+                                                             :test_column vchar-type)
                                           :rows             [["valid"]]})]
             (let [csv-rows `["test_column" ~@(repeat 50 "valid too") ~(apply str (repeat 256 "x"))]
                   file  (csv-file-with csv-rows (mt/random-name))]
@@ -1816,14 +1632,14 @@
         ;; for drivers that insert rows in chunks, we change the chunk size to 1 so that we can test that the
         ;; inserted rows are rolled back
         (binding [driver/*insert-chunk-rows* 1]
-          (let [upload-type ::upload/varchar-255,
+          (let [upload-type vchar-type,
                 uncoerced   (apply str (repeat 256 "x"))
                 coerced     (apply str (repeat 255 "x"))]
             (testing (format "\nUploading %s into a column of type %s should be coerced to %s"
                              uncoerced (name upload-type) coerced)
               (with-upload-table!
                 [table (create-upload-table! {:col->upload-type (ordered-map/ordered-map
-                                                                 upload/auto-pk-column-keyword ::upload/auto-incrementing-int-pk
+                                                                 upload/auto-pk-column-keyword auto-pk-type
                                                                  :test_column upload-type)
                                               :rows             []})]
                 (let [csv-rows ["test_column" uncoerced]
@@ -1845,16 +1661,16 @@
         ;; inserted rows are rolled back
         (binding [driver/*insert-chunk-rows* 1]
           (doseq [{:keys [upload-type uncoerced coerced fail-msg] :as args}
-                  [{:upload-type ::upload/int,     :uncoerced "2.0",        :coerced 2} ;; value is coerced to int
-                   {:upload-type ::upload/int,     :uncoerced "2.1",        :coerced 2.1} ;; column is promoted to float
-                   {:upload-type ::upload/float,   :uncoerced "2",          :coerced 2.0}
-                   {:upload-type ::upload/boolean, :uncoerced "0",          :coerced false}
-                   {:upload-type ::upload/boolean, :uncoerced "1.0",        :fail-msg "'1.0' is not a recognizable boolean"}
-                   {:upload-type ::upload/boolean, :uncoerced "0.0",        :fail-msg "'0.0' is not a recognizable boolean"}
-                   {:upload-type ::upload/int,     :uncoerced "01/01/2012", :fail-msg "'01/01/2012' is not a recognizable number"}]]
+                  [{:upload-type int-type,   :uncoerced "2.0",        :coerced 2} ;; value is coerced to int
+                   {:upload-type int-type,   :uncoerced "2.1",        :coerced 2.1} ;; column is promoted to float
+                   {:upload-type float-type, :uncoerced "2",          :coerced 2.0}
+                   {:upload-type bool-type,  :uncoerced "0",          :coerced false}
+                   {:upload-type bool-type,  :uncoerced "1.0",        :fail-msg "'1.0' is not a recognizable boolean"}
+                   {:upload-type bool-type,  :uncoerced "0.0",        :fail-msg "'0.0' is not a recognizable boolean"}
+                   {:upload-type int-type,   :uncoerced "01/01/2012", :fail-msg "'01/01/2012' is not a recognizable number"}]]
             (with-upload-table!
               [table (create-upload-table! {:col->upload-type (ordered-map/ordered-map
-                                                               upload/auto-pk-column-keyword ::upload/auto-incrementing-int-pk
+                                                               upload/auto-pk-column-keyword auto-pk-type
                                                                :test_column upload-type)
                                             :rows             []})]
               (let [csv-rows ["test_column" uncoerced]
@@ -1878,36 +1694,6 @@
                           (append!)))))
                 (io/delete-file file)))))))))
 
-(def ^:private concretize @#'upload/concretize)
-
-(deftest initial-column-type-test
-  (let [column-type (partial concretize nil)]
-    (testing "Unknown value types are treated as text"
-      (is (= ::upload/text (column-type nil))))
-    (testing "Non-abstract value types resolve to themselves"
-      (let [ts @#'upload/column-types]
-        (is (= (zipmap ts ts)
-               (zipmap (map column-type ts) ts)))))
-    (testing "Abstract values are resolved using an explicit map, which is consistent with the hierarchy"
-      (doseq [[value-type expected-column-type] @#'upload/abstract->concrete]
-        (is (= expected-column-type (column-type value-type)))
-        ;; Strictly speaking we only require that there is a route from each abstract node to its column type which only
-        ;; traverses through abstract nodes, but it's much simpler to enforce this stronger condition.
-        (is (contains? (parents @#'upload/h value-type) expected-column-type))))))
-
-(deftest append-column-type-test
-  (doseq [existing-type @#'upload/value-types
-          value-type    @#'upload/value-types]
-    (case [existing-type value-type]
-      [::upload/int ::upload/*float-or-int*]
-      (testing "We coerce floats with fractional part to plan integers when appending into an existing integer column"
-        (is (= ::upload/int (concretize existing-type value-type))))
-
-      ;; This is unsatisfying, would be good if this interface also covered promoting columns and rejecting values.
-      (testing (format "We append %s values to %s columns as if we were inserting them into new columns" existing-type value-type)
-        (is (= (concretize nil value-type)
-               (concretize existing-type value-type)))))))
-
 (deftest create-from-csv-int-and-float-test
   (testing "Creation should handle a mix of int and float-or-int values in any order"
     (mt/test-drivers (mt/normal-drivers-with-feature :uploads)
@@ -1951,9 +1737,9 @@
     (testing "Append should handle a mix of int and float-or-int values being appended to an int column"
       (with-upload-table! [table (create-upload-table!
                                   :col->upload-type (ordered-map/ordered-map
-                                                     :_mb_row_id ::upload/auto-incrementing-int-pk
-                                                     :number_1 ::upload/int
-                                                     :number_2 ::upload/int)
+                                                     :_mb_row_id auto-pk-type
+                                                     :number_1 int-type
+                                                     :number_2 int-type)
                                   :rows [[1, 1]])]
 
         (let [csv-rows ["number-1, number-2"
@@ -1973,9 +1759,9 @@
     (testing "Append should handle a mix of int and float-or-int values being appended to an int column"
       (with-upload-table! [table (create-upload-table!
                                   :col->upload-type (ordered-map/ordered-map
-                                                     :_mb_row_id ::upload/auto-incrementing-int-pk
-                                                     :number_1 ::upload/int
-                                                     :number_2 ::upload/int)
+                                                     :_mb_row_id auto-pk-type
+                                                     :number_1 int-type
+                                                     :number_2 int-type)
                                   :rows [[1, 1]])]
 
         (let [csv-rows ["number-1, number-2"
@@ -1991,18 +1777,3 @@
                    table-rows)))
 
           (io/delete-file file))))))
-
-(deftest coercion-soundness-test
-  (testing "Every coercion maps to a stricter type that is a direct descendant"
-    (is (empty?
-         ;; Build a set of all type-pairs that violate this constraint
-         (into (sorted-set)
-               (comp (mapcat (fn [[column-type value-types]]
-                               (for [value-type value-types]
-                                 [column-type value-type])))
-                     (remove (fn [[column-type value-type]]
-                               ;; Strictly speaking we only require that there is a route which only traverses
-                               ;; through abstract nodes, but it's much simpler to enforce this stronger condition:
-                               ;; that `column-type` is a child of `value-type`
-                               (contains? (ordered-hierarchy/children @#'upload/h value-type) column-type))))
-               @#'upload/column-type->coercible-value-types)))))
diff --git a/test/metabase/util/ordered_hierarchy_test.clj b/test/metabase/util/ordered_hierarchy_test.clj
index 10e57672fdc..9fc79295b22 100644
--- a/test/metabase/util/ordered_hierarchy_test.clj
+++ b/test/metabase/util/ordered_hierarchy_test.clj
@@ -1,93 +1,59 @@
 (ns metabase.util.ordered-hierarchy-test
   (:require
    [clojure.test :refer [deftest is testing]]
-   [clojure.walk :as walk]
-   [metabase.upload :as upload]
    [metabase.util.ordered-hierarchy :as ordered-hierarchy]))
 
-;;; It would be nice to have property tests, to expose any subtle edge cases.
-;;; For now, we use a translation of some real world usage in the app.
-(def ^:private h
-  (walk/postwalk
-   (fn [x]
-     (if (and (keyword? x) (namespace x))
-       (keyword (name (ns-name *ns*)) (name x))
-       x))
-   @#'upload/h))
-
-(deftest ^:parallel parents-test
-  (testing "Parents are listed according to the order that this tag was derived from each of them"
-    (is (nil? (parents h ::text)))
-    (is (= [::text] (vec (parents h ::varchar-255))))
-    (is (= [::*float-or-int*] (vec (parents h ::int))))
-    (is (= [::boolean ::int] (vec (parents h ::*boolean-int*))))))
-
-(deftest ^:parallel children-test
-  (testing "Children are listed in reverse order to when they were each derived from this tag"
-    (is (nil? (ordered-hierarchy/children h ::*boolean-int*)))
-    (is (= [::varchar-255] (vec (ordered-hierarchy/children h ::text))))
-    (is (= [::*float-or-int*] (vec (ordered-hierarchy/children h ::float))))
-    (is (= [::auto-incrementing-int-pk ::*boolean-int*] (vec (ordered-hierarchy/children h ::int))))))
-
-(deftest ^:parallel ancestors-test
-  (testing "Linear ancestors are listed in order"
-    (is (nil? (ancestors h ::text)))
-    (is (= [::text] (vec (ancestors h ::varchar-255))))
-    (is (= [::varchar-255 ::text] (vec (ancestors h ::boolean))))
-    (is (= [::*float-or-int* ::float ::varchar-255 ::text] (vec (ancestors h ::int)))))
-
-  (testing "Non-linear ancestors are listed in topological order, following edges in the order they were defined."
-    (is (= [::boolean
-            ::int
-            ::*float-or-int*
-            ::float
-            ::varchar-255
-            ::text]
-           (vec (ancestors h ::*boolean-int*))))))
-
-(deftest ^:parallel descendants-test
-  (testing "Linear descendants are listed in order"
-    (is (nil? (descendants h ::*boolean-int*)))
-    (is (nil? (descendants h ::date)))
-    (is (= [::date] (vec (descendants h ::datetime))))
-    (is (= [::*boolean-int*] (vec (descendants h ::boolean)))))
-
-  (testing "Non-linear descendants are listed in reverse topological order, following edges in reserve order."
-    (is (= [::*float-or-int*
-            ::int
-            ::auto-incrementing-int-pk
-            ::*boolean-int*]
-           (vec (descendants h ::float))))
-    (is (= [::varchar-255
-            ::offset-datetime
-            ::datetime
-            ::date
-            ::float
-            ::*float-or-int*
-            ::int
-            ::auto-incrementing-int-pk
-            ::boolean
-            ::*boolean-int*]
-           (vec (descendants h ::text))))))
-
-(deftest ^:parallel sorted-tags-test
-  (testing "Tags are returned in a topological ordering that also preserves insertion order of the edges."
-    (is (= [::*boolean-int*
-            ::boolean
-            ::auto-incrementing-int-pk
-            ::int
-            ::*float-or-int*
-            ::float
-            ::date
-            ::datetime
-            ::offset-datetime
-            ::varchar-255
-            ::text]
-           (vec (ordered-hierarchy/sorted-tags h))))))
-
-(deftest ^:parallel first-common-ancestor-test
-  (testing "The first-common-ancestor is the first tag in the lineage of tag-a that is also in the lineage of tag-b"
-    (is (= ::*boolean-int* (ordered-hierarchy/first-common-ancestor h ::*boolean-int* nil)))
-    (is (= ::*boolean-int* (ordered-hierarchy/first-common-ancestor h ::*boolean-int* ::*boolean-int*)))
-    (is (= ::boolean (ordered-hierarchy/first-common-ancestor h ::*boolean-int* ::boolean)))
-    (is (= ::varchar-255 (ordered-hierarchy/first-common-ancestor h ::boolean ::int)))))
+(def ^:private polygons
+  (ordered-hierarchy/make-hierarchy
+   [:quadrilateral
+    [:trapezoid :isosceles-trapezoid :right-trapezoid]
+    [:kite [:rhombus :square]]
+    [:parallelogram
+     :rhombus
+     [:rectangle :square]]]
+   [:triangle
+    :scalene-triangle
+    [:isosceles-triangle :equilateral-triangle]
+    [:acute-triangle :equilateral-triangle]
+    :right-angled-triangle
+    :obtuse-triangle]))
+
+(deftest make-hierarchy-test
+  (testing "Hiccup structures have the expected topological order"
+    (is (= [:isosceles-trapezoid
+            :right-trapezoid
+            :trapezoid
+            :square
+            :rhombus
+            :kite
+            :rectangle
+            :parallelogram
+            :quadrilateral
+            ;; it's unfortunate that we would exhaustively test all the quadrilateral types, before checking
+            ;; if it's a triangle (if "hypothetically" we were using the topological order to test a value
+            ;; ... this is a case where a root-to-leaf traversal would make more sense.
+            :scalene-triangle
+            :equilateral-triangle
+            :isosceles-triangle
+            :acute-triangle
+            :right-angled-triangle
+            :obtuse-triangle
+            :triangle]
+           (vec (ordered-hierarchy/sorted-tags polygons)))))
+
+  (testing "Hiccup structures are translated into the expected graph structure"
+    (is (= {:trapezoid             [:quadrilateral]
+            :isosceles-trapezoid   [:trapezoid]
+            :right-trapezoid       [:trapezoid]
+            :kite                  [:quadrilateral]
+            :rhombus               [:kite :parallelogram]
+            :square                [:rhombus :rectangle]
+            :parallelogram         [:quadrilateral]
+            :rectangle             [:parallelogram]
+            :scalene-triangle      [:triangle]
+            :isosceles-triangle    [:triangle]
+            :equilateral-triangle  [:isosceles-triangle :acute-triangle]
+            :acute-triangle        [:triangle]
+            :right-angled-triangle [:triangle]
+            :obtuse-triangle       [:triangle]}
+           (update-vals (:parents polygons) vec)))))
-- 
GitLab