From dd9e126bb656651944a803d9207445508329bbef Mon Sep 17 00:00:00 2001
From: Braden Shepherdson <braden@metabase.com>
Date: Thu, 25 Apr 2024 15:28:12 -0400
Subject: [PATCH] [MBQL lib] Add `column-extractions` to the top level (#41525)

Column extractions are "canned" expressions based on a column's
type. For example, we might extract the weekday from a temporal column,
or the domain from an email or URL column.

This logic already existed inside the `column-extract` drill; this pulls
it out as a top-level concept, since extractions are also being
integrated into the notebook editor apart from drills.

Part of the follow-up for Extract Column epic #38964.
---
 .../mbql-library-changelog.md                 |   5 +
 .../column_extract_drill.cy.spec.js           |   1 +
 frontend/src/metabase-lib/types.ts            |   2 +-
 .../column-extract-drill.tsx                  |   2 +-
 src/metabase/lib/core.cljc                    |   4 +
 .../lib/drill_thru/column_extract.cljc        |  91 ++------
 src/metabase/lib/extraction.cljc              | 118 ++++++++++
 src/metabase/lib/js.cljs                      |  20 ++
 src/metabase/lib/schema/drill_thru.cljc       |   5 +-
 src/metabase/lib/schema/extraction.cljc       |  13 ++
 src/metabase/lib/util.cljc                    |  29 ++-
 .../lib/drill_thru/column_extract_test.cljc   |  30 +--
 test/metabase/lib/drill_thru_test.cljc        |   6 +-
 test/metabase/lib/extraction_test.cljc        | 204 ++++++++++++++++++
 14 files changed, 423 insertions(+), 107 deletions(-)
 create mode 100644 src/metabase/lib/extraction.cljc
 create mode 100644 src/metabase/lib/schema/extraction.cljc
 create mode 100644 test/metabase/lib/extraction_test.cljc

diff --git a/docs/developers-guide/mbql-library-changelog.md b/docs/developers-guide/mbql-library-changelog.md
index d5afe557a8b..28c0b1bfb88 100644
--- a/docs/developers-guide/mbql-library-changelog.md
+++ b/docs/developers-guide/mbql-library-changelog.md
@@ -22,3 +22,8 @@ and documented in this changelog.
 
   `as-returned` looks at the query and stage, and shifts to a later stage if necessary. If a later stage is needed but
   we were already on the last stage, a new empty stage is appended.
+- New functions `column-extractions` and `extract` have been added.
+  - `column-extractions` returns a list of _extractions_, which are possible custom expressions we can derive from a
+    given column. For example, getting the host or base domain name from a URL or email address, or the day of the week
+    from a date or datetime.
+  - `extract` applies an extraction to the query.
diff --git a/e2e/test/scenarios/visualizations-tabular/drillthroughs/column_extract_drill.cy.spec.js b/e2e/test/scenarios/visualizations-tabular/drillthroughs/column_extract_drill.cy.spec.js
index 4a5c88504fc..afa23b771ae 100644
--- a/e2e/test/scenarios/visualizations-tabular/drillthroughs/column_extract_drill.cy.spec.js
+++ b/e2e/test/scenarios/visualizations-tabular/drillthroughs/column_extract_drill.cy.spec.js
@@ -235,6 +235,7 @@ function extractColumnAndCheck({ column, option, newColumn = option, value }) {
   cy.intercept("POST", "/api/dataset").as(requestAlias);
   cy.findByRole("columnheader", { name: column }).click();
   popover().findByText("Extract day, month…").click();
+  cy.wait(1);
   popover().findByText(option).click();
   cy.wait(`@${requestAlias}`);
 
diff --git a/frontend/src/metabase-lib/types.ts b/frontend/src/metabase-lib/types.ts
index 00f719d8821..99499f42c8a 100644
--- a/frontend/src/metabase-lib/types.ts
+++ b/frontend/src/metabase-lib/types.ts
@@ -447,7 +447,7 @@ export type DrillThruType =
 export type BaseDrillThruInfo<Type extends DrillThruType> = { type: Type };
 
 export type ColumnExtraction = {
-  key: ColumnExtractionKey;
+  tag: ColumnExtractionKey;
   displayName: string;
 };
 
diff --git a/frontend/src/metabase/querying/utils/drills/column-extract-drill/column-extract-drill.tsx b/frontend/src/metabase/querying/utils/drills/column-extract-drill/column-extract-drill.tsx
index 0b3666d1aa6..f3848970d31 100644
--- a/frontend/src/metabase/querying/utils/drills/column-extract-drill/column-extract-drill.tsx
+++ b/frontend/src/metabase/querying/utils/drills/column-extract-drill/column-extract-drill.tsx
@@ -19,7 +19,7 @@ export const columnExtractDrill: Drill<Lib.ColumnExtractDrillThruInfo> = ({
         title: extraction.displayName,
         section: "extract-popover",
         buttonType: "horizontal",
-        question: () => applyDrill(drill, extraction.key),
+        question: () => applyDrill(drill, extraction.tag),
         extra: () => ({ settingsSyncOptions: { column: clicked.column } }),
       }),
     );
diff --git a/src/metabase/lib/core.cljc b/src/metabase/lib/core.cljc
index e54a46b1d52..da9c4e963bd 100644
--- a/src/metabase/lib/core.cljc
+++ b/src/metabase/lib/core.cljc
@@ -16,6 +16,7 @@
    [metabase.lib.drill-thru.pivot :as lib.drill-thru.pivot]
    [metabase.lib.equality :as lib.equality]
    [metabase.lib.expression :as lib.expression]
+   [metabase.lib.extraction :as lib.extraction]
    [metabase.lib.fe-util :as lib.fe-util]
    [metabase.lib.field :as lib.field]
    [metabase.lib.filter :as lib.filter]
@@ -172,6 +173,9 @@
   rtrim
   upper
   lower]
+ [lib.extraction
+  column-extractions
+  extract]
  [lib.fe-util
   dependent-metadata
   expression-clause
diff --git a/src/metabase/lib/drill_thru/column_extract.cljc b/src/metabase/lib/drill_thru/column_extract.cljc
index b3b9e847d15..ad073ee1d00 100644
--- a/src/metabase/lib/drill_thru/column_extract.cljc
+++ b/src/metabase/lib/drill_thru/column_extract.cljc
@@ -16,52 +16,21 @@
    [medley.core :as m]
    [metabase.lib.drill-thru.column-filter :as lib.drill-thru.column-filter]
    [metabase.lib.drill-thru.common :as lib.drill-thru.common]
-   [metabase.lib.expression :as lib.expression]
-   [metabase.lib.filter :as lib.filter]
-   [metabase.lib.metadata :as lib.metadata]
+   [metabase.lib.extraction :as lib.extraction]
    [metabase.lib.metadata.calculation :as lib.metadata.calculation]
    [metabase.lib.schema :as lib.schema]
    [metabase.lib.schema.drill-thru :as lib.schema.drill-thru]
-   [metabase.lib.temporal-bucket :as lib.temporal-bucket]
    [metabase.lib.types.isa :as lib.types.isa]
-   [metabase.lib.util :as lib.util]
    [metabase.shared.util.i18n :as i18n]
-   [metabase.shared.util.time :as shared.ut]
    [metabase.util.malli :as mu]))
 
-(defn- column-extract-temporal-units [column]
-  (let [time-units [:hour-of-day]
-        date-units [:day-of-month :day-of-week :month-of-year :quarter-of-year :year]]
-    (vec (for [unit (concat (when-not (lib.types.isa/date-without-time? column)
-                              time-units)
-                            (when-not (lib.types.isa/time? column)
-                              date-units))]
-           {:key          unit
-            :display-name (lib.temporal-bucket/describe-temporal-unit unit)}))))
-
-(defn- regex-available? [metadata-providerable]
-  ((:features (lib.metadata/database metadata-providerable)) :regex))
-
 (defn- column-extract-drill-for-column [query column]
-  (cond
-    (lib.types.isa/temporal? column) {:display-name (i18n/tru "Extract day, month…")
-                                      :extractions  (column-extract-temporal-units column)}
-
-    ;; The URL and email extractions are powered by regular expressions, and not every database supports those.
-    ;; If the target database doesn't support :regex feature, return nil.
-    (not (regex-available? query))   nil
-    (lib.types.isa/email? column)    {:display-name (i18n/tru "Extract domain")
-                                      :extractions  [{:key          :domain
-                                                      :display-name (i18n/tru "Domain")}
-                                                     {:key          :host
-                                                      :display-name (i18n/tru "Host")}]}
-    (lib.types.isa/URL? column)      {:display-name (i18n/tru "Extract domain, subdomain…")
-                                      :extractions  [{:key          :domain
-                                                      :display-name (i18n/tru "Domain")}
-                                                     {:key          :subdomain
-                                                      :display-name (i18n/tru "Subdomain")}
-                                                     {:key          :host
-                                                      :display-name (i18n/tru "Host")}]}))
+  (when-let [extractions (not-empty (lib.extraction/column-extractions query column))]
+    {:extractions  extractions
+     :display-name (cond
+                     (lib.types.isa/temporal? column) (i18n/tru "Extract day, month…")
+                     (lib.types.isa/email? column)    (i18n/tru "Extract domain, host…")
+                     (lib.types.isa/URL? column)      (i18n/tru "Extract domain, subdomain…"))}))
 
 (mu/defn column-extract-drill :- [:maybe ::lib.schema.drill-thru/drill-thru.column-extract]
   "Column clicks on temporal columns only.
@@ -79,42 +48,16 @@
                query stage-number column column-ref :expression)))))
 
 (defmethod lib.drill-thru.common/drill-thru-info-method :drill-thru/column-extract
-  [_query _stage-number drill]
-  (select-keys drill [:display-name :extractions :type]))
-
-(defn- case-expression
-  "Creates a case expression with a condition for each value of the unit."
-  [expression-fn unit n]
-  (lib.expression/case
-    (for [raw-value (range 1 (inc n))]
-      [(lib.filter/= (expression-fn) raw-value) (shared.ut/format-unit raw-value unit)])
-    ""))
-
-(defn- extraction-expression [column tag]
-  (case tag
-    ;; Temporal extractions
-    :hour-of-day     (lib.expression/get-hour column)
-    :day-of-month    (lib.expression/get-day column)
-    :day-of-week     (case-expression #(lib.expression/get-day-of-week column) tag 7)
-    :month-of-year   (case-expression #(lib.expression/get-month column) tag 12)
-    :quarter-of-year (case-expression #(lib.expression/get-quarter column) tag 4)
-    :year            (lib.expression/get-year column)
-    ;; URLs and emails
-    :domain          (lib.expression/domain column)
-    :subdomain       (lib.expression/subdomain column)
-    :host            (lib.expression/host column)))
+  [query stage-number drill]
+  (-> drill
+      (select-keys [:display-name :type])
+      (assoc :extractions (map #(lib.metadata.calculation/display-info query stage-number %)
+                               (:extractions drill)))))
 
 (defmethod lib.drill-thru.common/drill-thru-method :drill-thru/column-extract
   [_query _stage-number {:keys [query stage-number column extractions]} & [tag]]
-  (let [tag                    (keyword tag)
-        {:keys [display-name]} (m/find-first #(= (:key %) tag) extractions)
-        unique-name-fn         (lib.util/unique-name-generator)]
-    (doseq [col-name (->> (lib.util/query-stage query stage-number)
-                          (lib.metadata.calculation/returned-columns query stage-number)
-                          (map :name))]
-      (unique-name-fn col-name))
-    (lib.expression/expression
-      query
-      stage-number
-      (unique-name-fn display-name)
-      (extraction-expression column tag))))
+  (let [tag        (keyword tag)
+        extraction (m/find-first #(= (:tag %) tag) extractions)]
+    (lib.extraction/extract query stage-number
+                            ;; Replace the column on the extraction because we added an extra stage.
+                            (assoc extraction :column column))))
diff --git a/src/metabase/lib/extraction.cljc b/src/metabase/lib/extraction.cljc
new file mode 100644
index 00000000000..48f6c9e4a1a
--- /dev/null
+++ b/src/metabase/lib/extraction.cljc
@@ -0,0 +1,118 @@
+(ns metabase.lib.extraction
+  (:require
+   [metabase.lib.expression :as lib.expression]
+   [metabase.lib.filter :as lib.filter]
+   [metabase.lib.metadata :as lib.metadata]
+   [metabase.lib.metadata.calculation :as lib.metadata.calculation]
+   [metabase.lib.schema :as lib.schema]
+   [metabase.lib.schema.extraction :as lib.schema.extraction]
+   [metabase.lib.schema.metadata :as lib.schema.metadata]
+   [metabase.lib.temporal-bucket :as lib.temporal-bucket]
+   [metabase.lib.types.isa :as lib.types.isa]
+   [metabase.lib.util :as lib.util]
+   [metabase.shared.util.i18n :as i18n]
+   [metabase.shared.util.time :as shared.ut]
+   [metabase.util.malli :as mu]))
+
+(defn- column-extract-temporal-units [column]
+  (let [time-units [:hour-of-day]
+        date-units [:day-of-month :day-of-week :month-of-year :quarter-of-year :year]]
+    (vec (for [unit (concat (when-not (lib.types.isa/date-without-time? column)
+                              time-units)
+                            (when-not (lib.types.isa/time? column)
+                              date-units))]
+           {:lib/type     ::extraction
+            :tag          unit
+            :column       column
+            :display-name (lib.temporal-bucket/describe-temporal-unit unit)}))))
+
+(defn- regex-available? [metadata-providerable]
+  (-> (lib.metadata/database metadata-providerable)
+      :features
+      (contains? :regex)))
+
+(defn- domain-extraction [column]
+  {:lib/type     ::extraction
+   :tag          :domain
+   :column       column
+   :display-name (i18n/tru "Domain")})
+
+(defn- subdomain-extraction [column]
+  {:lib/type     ::extraction
+   :tag          :subdomain
+   :column       column
+   :display-name (i18n/tru "Subdomain")})
+
+(defn- host-extraction [column]
+  {:lib/type     ::extraction
+   :tag          :host
+   :column       column
+   :display-name (i18n/tru "Host")})
+
+(defn- email-extractions [column]
+  [(domain-extraction    column)
+   (host-extraction      column)])
+
+(defn- url-extractions [column]
+  [(domain-extraction    column)
+   (subdomain-extraction column)
+   (host-extraction      column)])
+
+(mu/defn column-extractions :- [:maybe [:sequential ::lib.schema.extraction/extraction]]
+  "Column extractions are a set of transformations possible on a given `column`, based on its type.
+
+  For example, we might extract the day of the week from a temporal column, or the domain name from an email or URL.
+
+  Returns a list of possible column extractions for the given column, or `nil` if there are none."
+  [query  :- ::lib.schema/query
+   column :- ::lib.schema.metadata/column]
+  (cond
+    (lib.types.isa/temporal? column) (column-extract-temporal-units column)
+
+    ;; The URL and email extractions are powered by regular expressions, and not every database supports those.
+    ;; If the target database doesn't support :regex feature, return nil.
+    (not (regex-available? query))   nil
+    (lib.types.isa/email? column)    (email-extractions column)
+    (lib.types.isa/URL? column)      (url-extractions column)))
+
+(defmethod lib.metadata.calculation/display-info-method ::extraction
+  [_query _stage-number extraction]
+  (dissoc extraction :lib/type :column))
+
+(defn- case-expression
+  "Creates a case expression with a condition for each value of the unit."
+  [expression-fn unit n]
+  (lib.expression/case
+    (for [raw-value (range 1 (inc n))]
+      [(lib.filter/= (expression-fn) raw-value) (shared.ut/format-unit raw-value unit)])
+    ""))
+
+(defn- extraction-expression [column tag]
+  (case tag
+    ;; Temporal extractions
+    :hour-of-day     (lib.expression/get-hour column)
+    :day-of-month    (lib.expression/get-day column)
+    :day-of-week     (case-expression #(lib.expression/get-day-of-week column) tag 7)
+    :month-of-year   (case-expression #(lib.expression/get-month column) tag 12)
+    :quarter-of-year (case-expression #(lib.expression/get-quarter column) tag 4)
+    :year            (lib.expression/get-year column)
+    ;; URLs and emails
+    :domain          (lib.expression/domain column)
+    :subdomain       (lib.expression/subdomain column)
+    :host            (lib.expression/host column)))
+
+(mu/defn extract :- ::lib.schema/query
+  "Given a query, stage and extraction as returned by [[column-extractions]], apply that extraction to the query."
+  [query                :- ::lib.schema/query
+   stage-number         :- :int
+   {:keys [column display-name tag]} :- ::lib.schema.extraction/extraction]
+  ;; Currently this is very simple: use the `:tag` as an expression function and the column as the only argument.
+  (let [unique-name-fn (->> (lib.util/query-stage query stage-number)
+                            (lib.metadata.calculation/returned-columns query stage-number)
+                            (map :name)
+                            lib.util/unique-name-generator)]
+    (lib.expression/expression
+      query
+      stage-number
+      (unique-name-fn display-name)
+      (extraction-expression column tag))))
diff --git a/src/metabase/lib/js.cljs b/src/metabase/lib/js.cljs
index fb9c0dfcecf..29468cbe8cb 100644
--- a/src/metabase/lib/js.cljs
+++ b/src/metabase/lib/js.cljs
@@ -1364,6 +1364,26 @@
     (fn [_]
       (to-array (lib.core/expressionable-columns a-query stage-number expression-position)))))
 
+(defn ^:export column-extractions
+  "Column extractions are a set of transformations possible on a given `column`, based on its type.
+
+  For example, we might extract the day of the week from a temporal column, or the domain name from an email or URL.
+
+  Returns a (possibly empty) JS array of possible column extractions for the given column.
+
+  > **Code health:** Healthy"
+  [a-query column]
+  (to-array (lib.core/column-extractions a-query column)))
+
+(defn ^:export extract
+  "Given `a-query` and an `extraction` from [[column-extractions]], apply that extraction to the query.
+
+  Generally this means adding a new expression. Returns an updated query.
+
+  > **Code health:** Healthy"
+  [a-query stage-number extraction]
+  (lib.core/extract a-query stage-number extraction))
+
 (defn ^:export suggested-join-conditions
   "Returns a JS array of possible default join conditions when joining against `joinable`, e.g. a Table, Saved
   Question, or another query. Suggested conditions will be returned if the existing query has a foreign key to the
diff --git a/src/metabase/lib/schema/drill_thru.cljc b/src/metabase/lib/schema/drill_thru.cljc
index 1d47b6fa680..5a1afe1f459 100644
--- a/src/metabase/lib/schema/drill_thru.cljc
+++ b/src/metabase/lib/schema/drill_thru.cljc
@@ -8,6 +8,7 @@
    [metabase.lib.schema.binning :as lib.schema.binning]
    [metabase.lib.schema.common :as lib.schema.common]
    [metabase.lib.schema.expression :as lib.schema.expression]
+   [metabase.lib.schema.extraction :as lib.schema.extraction]
    [metabase.lib.schema.filter :as lib.schema.filter]
    [metabase.lib.schema.id :as lib.schema.id]
    [metabase.lib.schema.metadata :as lib.schema.metadata]
@@ -169,9 +170,7 @@
     [:type         [:= :drill-thru/column-extract]]
     [:query        [:ref ::lib.schema/query]]
     [:stage-number number?]
-    [:extractions  [:sequential [:map
-                                 [:key          keyword?]
-                                 [:display-name string?]]]]]])
+    [:extractions  [:sequential [:ref ::lib.schema.extraction/extraction]]]]])
 
 (mr/def ::drill-thru.combine-columns
   [:merge
diff --git a/src/metabase/lib/schema/extraction.cljc b/src/metabase/lib/schema/extraction.cljc
new file mode 100644
index 00000000000..c83597b7e54
--- /dev/null
+++ b/src/metabase/lib/schema/extraction.cljc
@@ -0,0 +1,13 @@
+(ns metabase.lib.schema.extraction
+  (:require
+   [metabase.lib.schema.metadata :as lib.schema.metadata]
+   [metabase.util.malli.registry :as mr]))
+
+(mr/def ::extraction
+  [:map
+   [:lib/type     [:= :metabase.lib.extraction/extraction]]
+   [:tag          [:enum
+                   :domain :subdomain :host
+                   :hour-of-day :day-of-month :day-of-week :month-of-year :quarter-of-year :year]]
+   [:column       ::lib.schema.metadata/column]
+   [:display-name :string]])
diff --git a/src/metabase/lib/util.cljc b/src/metabase/lib/util.cljc
index fb8faaabb00..783b1e90b0f 100644
--- a/src/metabase/lib/util.cljc
+++ b/src/metabase/lib/util.cljc
@@ -494,16 +494,25 @@
     (f str) => str
 
   That takes any sort of string identifier (e.g. a column alias or table/join alias) and returns a guaranteed-unique
-  name truncated to 60 characters (actually 51 characters plus a hash)."
-  []
-  (comp truncate-alias
-        (mbql.u/unique-name-generator
-         ;; unique by lower-case name, e.g. `NAME` and `name` => `NAME` and `name_2`
-         ;;
-         ;; some databases treat aliases as case-insensitive so make sure the generated aliases are unique regardless
-         ;; of case
-         :name-key-fn     u/lower-case-en
-         :unique-alias-fn unique-alias)))
+  name truncated to 60 characters (actually 51 characters plus a hash).
+
+  Optionally takes a list of names which are already defined, \"priming\" the generator with eg. all the column names
+  that currently exist on a stage of the query."
+  ([]
+   (comp truncate-alias
+         (mbql.u/unique-name-generator
+           ;; unique by lower-case name, e.g. `NAME` and `name` => `NAME` and `name_2`
+           ;;
+           ;; some databases treat aliases as case-insensitive so make sure the generated aliases are unique regardless
+           ;; of case
+           :name-key-fn     u/lower-case-en
+           :unique-alias-fn unique-alias)))
+
+  ([existing-names :- [:sequential :string]]
+   (let [f (unique-name-generator)]
+     (doseq [existing existing-names]
+       (f existing))
+     f)))
 
 (def ^:private strip-id-regex
   #?(:cljs (js/RegExp. " id$" "i")
diff --git a/test/metabase/lib/drill_thru/column_extract_test.cljc b/test/metabase/lib/drill_thru/column_extract_test.cljc
index 88cf924c261..9621b1c4e05 100644
--- a/test/metabase/lib/drill_thru/column_extract_test.cljc
+++ b/test/metabase/lib/drill_thru/column_extract_test.cljc
@@ -15,14 +15,14 @@
 #?(:cljs (comment metabase.test-runner.assert-exprs.approximately-equal/keep-me))
 
 (def ^:private time-extraction-units
-  [{:key :hour-of-day, :display-name "Hour of day"}])
+  [{:tag :hour-of-day, :display-name "Hour of day"}])
 
 (def ^:private date-extraction-units
-  [{:key :day-of-month,    :display-name "Day of month"}
-   {:key :day-of-week,     :display-name "Day of week"}
-   {:key :month-of-year,   :display-name "Month of year"}
-   {:key :quarter-of-year, :display-name "Quarter of year"}
-   {:key :year,            :display-name "Year"}])
+  [{:tag :day-of-month,    :display-name "Day of month"}
+   {:tag :day-of-week,     :display-name "Day of week"}
+   {:tag :month-of-year,   :display-name "Month of year"}
+   {:tag :quarter-of-year, :display-name "Quarter of year"}
+   {:tag :year,            :display-name "Year"}])
 
 (def ^:private datetime-extraction-units
   (concat time-extraction-units date-extraction-units))
@@ -285,9 +285,9 @@
         query    (lib/query mp (lib.metadata/table mp (meta/id :people)))
         expected {:type         :drill-thru/column-extract
                   :display-name "Extract domain, subdomain…"
-                  :extractions  [{:key :domain,    :display-name "Domain"}
-                                 {:key :subdomain, :display-name "Subdomain"}
-                                 {:key :host,      :display-name "Host"}]}]
+                  :extractions  [{:tag :domain,    :display-name "Domain"}
+                                 {:tag :subdomain, :display-name "Subdomain"}
+                                 {:tag :host,      :display-name "Host"}]}]
     (testing "Extracting Domain"
       (lib.drill-thru.tu/test-drill-application
         {:drill-type     :drill-thru/column-extract
@@ -335,9 +335,9 @@
          :custom-query   query-regex
          :expected       {:type         :drill-thru/column-extract
                           :display-name "Extract domain, subdomain…"
-                          :extractions  [{:key :domain,    :display-name "Domain"}
-                                         {:key :subdomain, :display-name "Subdomain"}
-                                         {:key :host,      :display-name "Host"}]}
+                          :extractions  [{:tag :domain,    :display-name "Domain"}
+                                         {:tag :subdomain, :display-name "Subdomain"}
+                                         {:tag :host,      :display-name "Host"}]}
          :drill-args     ["subdomain"]
          :expected-query {:stages [{:expressions [[:subdomain {:lib/expression-name "Subdomain"}
                                                    [:field {} 9999001]]]}]}}))
@@ -361,9 +361,9 @@
          :column-name    "EMAIL"
          :custom-query   query-regex
          :expected       {:type         :drill-thru/column-extract
-                          :display-name "Extract domain"
-                          :extractions  [{:key :domain, :display-name "Domain"}
-                                         {:key :host,   :display-name "Host"}]}
+                          :display-name "Extract domain, host…"
+                          :extractions  [{:tag :domain, :display-name "Domain"}
+                                         {:tag :host,   :display-name "Host"}]}
          :drill-args     ["domain"]
          :expected-query {:stages [{:expressions [[:domain {:lib/expression-name "Domain"}
                                                    [:field {} (meta/id :people :email)]]]}]}}))
diff --git a/test/metabase/lib/drill_thru_test.cljc b/test/metabase/lib/drill_thru_test.cljc
index d24465682a9..7da2a8b1893 100644
--- a/test/metabase/lib/drill_thru_test.cljc
+++ b/test/metabase/lib/drill_thru_test.cljc
@@ -89,7 +89,7 @@
 
     :drill-thru/column-extract
     (for [extraction (:extractions drill)]
-      [(:key extraction)])
+      [(:tag extraction)])
 
     [nil]))
 
@@ -211,7 +211,7 @@
                   :type         :drill-thru/column-extract
                   :query        orders-query
                   :stage-number -1
-                  :extractions  (partial mc/validate [:sequential [:map [:key keyword?]]])}]
+                  :extractions  (partial mc/validate [:sequential [:map [:tag keyword?]]])}]
                 (lib/available-drill-thrus orders-query -1 context)))
         (test-drill-applications orders-query context)))))
 
@@ -715,7 +715,7 @@
                   {:type :drill-thru/summarize-column, :aggregations [:distinct]}
                   {:type        :drill-thru/column-extract
                    :extractions (partial mc/validate [:sequential [:map
-                                                                   [:key          keyword?]
+                                                                   [:tag          keyword?]
                                                                    [:display-name string?]]])}]}))
 
 (deftest ^:parallel available-drill-thrus-test-9
diff --git a/test/metabase/lib/extraction_test.cljc b/test/metabase/lib/extraction_test.cljc
new file mode 100644
index 00000000000..29673801e4a
--- /dev/null
+++ b/test/metabase/lib/extraction_test.cljc
@@ -0,0 +1,204 @@
+(ns metabase.lib.extraction-test
+  (:require
+   [clojure.test :refer [deftest is testing]]
+   [medley.core :as m]
+   [metabase.lib.core :as lib]
+   [metabase.lib.metadata :as lib.metadata]
+   [metabase.lib.test-metadata :as meta]
+   [metabase.lib.test-util :as lib.tu]
+   #?@(:clj  ([metabase.test :as mt])
+       :cljs ([metabase.test-runner.assert-exprs.approximately-equal]))))
+
+(defn- case-extraction
+  "Returns `=?` friendly value for a `:case`-based extraction, eg. `:day-of-week`.
+
+  `(case-extraction :get-month \"Month of year\" (meta/id :orders :created-at) [\"Jan\" \"Feb\" ... \"Dec\"])`"
+  [extraction expression-name field-id labels]
+  [:case {:lib/expression-name expression-name}
+   (vec (for [[index label] (m/indexed labels)]
+          [[:= {} [extraction {} [:field {} field-id]] (inc index)] label]))
+   ""])
+
+(deftest ^:parallel column-extraction-test-1-datetime-column
+  (testing "extract on a regular datetime column without aggregations adds the column in this stage"
+    (let [query       (lib/query meta/metadata-provider (meta/table-metadata :orders))
+          columns     (lib/returned-columns query)
+          created-at  (m/find-first #(= (:name %) "CREATED_AT") columns)
+          extractions (lib/column-extractions query created-at)
+          by-tag      (m/index-by :tag extractions)]
+      (is (=? [{:tag :hour-of-day,     :column created-at, :display-name "Hour of day"}
+               {:tag :day-of-month,    :column created-at, :display-name "Day of month"}
+               {:tag :day-of-week,     :column created-at, :display-name "Day of week"}
+               {:tag :month-of-year,   :column created-at, :display-name "Month of year"}
+               {:tag :quarter-of-year, :column created-at, :display-name "Quarter of year"}
+               {:tag :year,            :column created-at, :display-name "Year"}]
+              extractions))
+      (testing "extracting :month-of-year"
+        (is (=? {:stages [{:expressions
+                           [(case-extraction :get-month "Month of year" (meta/id :orders :created-at)
+                                             ["Jan" "Feb" "Mar" "Apr" "May" "Jun"
+                                              "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"])]}]}
+                (lib/extract query -1 (:month-of-year by-tag)))))
+      (testing "extracting :day-of-week"
+        (is (=? {:stages [{:expressions
+                           [(case-extraction :get-day-of-week "Day of week" (meta/id :orders :created-at)
+                                             ["Sunday" "Monday" "Tuesday" "Wednesday" "Thursday"
+                                              "Friday" "Saturday"])]}]}
+                (lib/extract query -1 (:day-of-week by-tag)))))
+      (testing "extracting :quarter-of-year"
+        (is (=? {:stages [{:expressions
+                           [(case-extraction :get-quarter "Quarter of year" (meta/id :orders :created-at)
+                                             ["Q1" "Q2" "Q3" "Q4"])]}]}
+                (lib/extract query -1 (:quarter-of-year by-tag)))))
+      (doseq [[tag expr label] [[:year         :get-year "Year"]
+                                [:day-of-month :get-day  "Day of month"]
+                                [:hour-of-day  :get-hour "Hour of day"]]]
+        (testing (str "extracting " tag)
+          (is (=? {:stages [{:expressions [[expr {:lib/expression-name label}
+                                            [:field {} (meta/id :orders :created-at)]]]}]}
+                  (lib/extract query -1 (get by-tag tag)))))))))
+
+(deftest ^:parallel duplicate-names-test
+  (testing "extracting the same field twice disambiguates the expression names"
+    (let [;; The standard ORDERS query but with a :day-of-month extraction already applied.
+          query (-> (lib/query meta/metadata-provider (meta/table-metadata :orders))
+                    (lib/expression -1 "Day of month"
+                                    (lib/get-day (meta/field-metadata :orders :created-at))))]
+      (is (=? {:stages [{:expressions [;; The original
+                                       [:get-day {:lib/expression-name "Day of month"}
+                                        [:field {} (meta/id :orders :created-at)]]
+                                       ;; The newly added one
+                                       [:get-day {:lib/expression-name "Day of month_2"}
+                                        [:field {} (meta/id :orders :created-at)]]]}]}
+              (->> (lib/returned-columns query)
+                   (m/find-first #(= (:name %) "CREATED_AT"))
+                   (lib/column-extractions query)
+                   (m/find-first (comp #{:day-of-month} :tag))
+                   (lib/extract query -1)))))))
+
+#?(:clj
+   ;; TODO: This should be possible to run in CLJS if we have a library for setting the locale in JS.
+   ;; Metabase FE has this in frontend/src/metabase/lib/i18n.js but that's loaded after the CLJS.
+   (deftest ^:synchronized i18n-output-test
+     (testing "column-extract with custom labels get i18n'd"
+       (mt/with-locale "es"
+         (let [query (lib/query meta/metadata-provider (meta/table-metadata :orders))]
+           (is (=? {:stages [{:expressions
+                              ;; TODO: The display name should also be getting translated!
+                              ;; It seems like extraction isn't working for [[describe-temporal-unit]].
+                              [(case-extraction :get-day-of-week "Day of week" (meta/id :orders :created-at)
+                                                ["domingo" "lunes" "martes" "miércoles" "jueves"
+                                                 "viernes" "sábado"])]}]}
+                   (->> (lib/returned-columns query)
+                        (m/find-first #(= (:name %) "CREATED_AT"))
+                        (lib/column-extractions query)
+                        (m/find-first (comp #{:day-of-week} :tag))
+                        (lib/extract query -1)))))))))
+
+(deftest ^:parallel extract-relevant-units-test-1-time
+  (let [ship-time (assoc (meta/field-metadata :orders :created-at)
+                         :id             9999001
+                         :name           "SHIP_TIME"
+                         :display-name   "Ship time"
+                         :base-type      :type/Time
+                         :effective-type :type/Time
+                         :semantic-type  :type/Time)
+        mp        (lib/composed-metadata-provider
+                    (lib.tu/mock-metadata-provider {:fields [ship-time]})
+                    meta/metadata-provider)
+        query     (lib/query mp (lib.metadata/table mp (meta/id :orders)))]
+    (is (=? [{:tag :hour-of-day}]
+            (->> (lib/returned-columns query)
+                 (m/find-first #(= (:name %) "SHIP_TIME"))
+                 (lib/column-extractions query))))))
+
+(deftest ^:parallel extract-relevant-units-test-2-date
+  (let [arrival   (assoc (meta/field-metadata :orders :created-at)
+                         :id             9999001
+                         :name           "ARRIVAL_DATE"
+                         :display-name   "Expected arrival"
+                         :base-type      :type/Date
+                         :effective-type :type/Date
+                         :semantic-type  :type/Date)
+        mp        (lib/composed-metadata-provider
+                    (lib.tu/mock-metadata-provider {:fields [arrival]})
+                    meta/metadata-provider)
+        query     (lib/query mp (lib.metadata/table mp (meta/id :orders)))]
+    (is (=? [{:tag :day-of-month}
+             {:tag :day-of-week}
+             {:tag :month-of-year}
+             {:tag :quarter-of-year}
+             {:tag :year}]
+            (->> (lib/returned-columns query)
+                 (m/find-first #(= (:name %) "ARRIVAL_DATE"))
+                 (lib/column-extractions query))))))
+
+(def ^:private homepage
+  (assoc (meta/field-metadata :people :email)
+         :id             9999001
+         :name           "HOMEPAGE"
+         :display-name   "Homepage URL"
+         :base-type      :type/Text
+         :effective-type :type/Text
+         :semantic-type  :type/URL))
+
+(defn- homepage-provider
+  ([] (homepage-provider meta/metadata-provider))
+  ([base-provider]
+   (lib/composed-metadata-provider
+     (lib.tu/mock-metadata-provider {:fields [homepage]})
+     base-provider)))
+
+(deftest ^:parallel extract-from-url-test
+  ;; There's no URL columns in the same dataset, but let's pretend there's one called People.HOMEPAGE.
+  (testing "Extracting a URL column"
+    (let [mp          (homepage-provider)
+          query       (lib/query mp (lib.metadata/table mp (meta/id :people)))
+          extractions (->> (lib/returned-columns query)
+                           (m/find-first #(= (:name %) "HOMEPAGE"))
+                           (lib/column-extractions query))
+          by-tag      (m/index-by :tag extractions)]
+      (is (=? #{:domain :subdomain :host} (set (keys by-tag))))
+      (testing "to :domain"
+        (is (=? {:stages [{:expressions [[:domain {:lib/expression-name "Domain"}
+                                          [:field {} 9999001]]]}]}
+                (lib/extract query -1 (:domain by-tag)))))
+      (testing "to :subdomain"
+        (is (=? {:stages [{:expressions [[:subdomain {:lib/expression-name "Subdomain"}
+                                          [:field {} 9999001]]]}]}
+                (lib/extract query -1 (:subdomain by-tag)))))
+      (testing "to :host"
+        (is (=? {:stages [{:expressions [[:host {:lib/expression-name "Host"}
+                                          [:field {} 9999001]]]}]}
+                (lib/extract query -1 (:host by-tag))))))))
+
+(deftest ^:parallel extracting-from-urls-requires-regex-feature-test
+  (let [query-regex    (lib/query (homepage-provider) (meta/table-metadata :people))
+        no-regex       (homepage-provider (meta/updated-metadata-provider update :features disj :regex))
+        query-no-regex (lib/query no-regex (meta/table-metadata :people))]
+    (testing "when the database supports :regex URL extraction is available"
+      (is (=? [{:tag :domain,    :display-name "Domain"}
+               {:tag :subdomain, :display-name "Subdomain"}
+               {:tag :host,      :display-name "Host"}]
+              (->> (lib/returned-columns query-regex)
+                   (m/find-first #(= (:name %) "HOMEPAGE"))
+                   (lib/column-extractions query-regex)))))
+    (testing "when the database does not support :regex URL extraction is not available"
+      (is (empty? (->> (lib/returned-columns query-no-regex)
+                       (m/find-first #(= (:name %) "HOMEPAGE"))
+                       (lib/column-extractions query-no-regex)))))))
+
+(deftest ^:parallel extracting-from-emails-requires-regex-feature-test
+  (let [query-regex    (lib/query meta/metadata-provider (meta/table-metadata :people))
+        no-regex       (meta/updated-metadata-provider update :features disj :regex)
+        query-no-regex (lib/query no-regex (meta/table-metadata :people))]
+    (testing "when the database supports :regex email extraction is available"
+      (is (=? [{:tag :domain,    :display-name "Domain"}
+               {:tag :host,      :display-name "Host"}]
+              (->> (lib/returned-columns query-regex)
+                   (m/find-first #(= (:name %) "EMAIL"))
+                   (lib/column-extractions query-regex)))))
+    (testing "when the database does not support :regex email extraction is not available"
+      (is (empty? (->> (lib/returned-columns query-no-regex)
+                       (m/find-first #(= (:name %) "EMAIL"))
+                       (lib/column-extractions query-no-regex)))))))
-- 
GitLab