From dd9e126bb656651944a803d9207445508329bbef Mon Sep 17 00:00:00 2001 From: Braden Shepherdson <braden@metabase.com> Date: Thu, 25 Apr 2024 15:28:12 -0400 Subject: [PATCH] [MBQL lib] Add `column-extractions` to the top level (#41525) Column extractions are "canned" expressions based on a column's type. For example, we might extract the weekday from a temporal column, or the domain from an email or URL column. This logic already existed inside the `column-extract` drill; this pulls it out as a top-level concept, since extractions are also being integrated into the notebook editor apart from drills. Part of the follow-up for Extract Column epic #38964. --- .../mbql-library-changelog.md | 5 + .../column_extract_drill.cy.spec.js | 1 + frontend/src/metabase-lib/types.ts | 2 +- .../column-extract-drill.tsx | 2 +- src/metabase/lib/core.cljc | 4 + .../lib/drill_thru/column_extract.cljc | 91 ++------ src/metabase/lib/extraction.cljc | 118 ++++++++++ src/metabase/lib/js.cljs | 20 ++ src/metabase/lib/schema/drill_thru.cljc | 5 +- src/metabase/lib/schema/extraction.cljc | 13 ++ src/metabase/lib/util.cljc | 29 ++- .../lib/drill_thru/column_extract_test.cljc | 30 +-- test/metabase/lib/drill_thru_test.cljc | 6 +- test/metabase/lib/extraction_test.cljc | 204 ++++++++++++++++++ 14 files changed, 423 insertions(+), 107 deletions(-) create mode 100644 src/metabase/lib/extraction.cljc create mode 100644 src/metabase/lib/schema/extraction.cljc create mode 100644 test/metabase/lib/extraction_test.cljc diff --git a/docs/developers-guide/mbql-library-changelog.md b/docs/developers-guide/mbql-library-changelog.md index d5afe557a8b..28c0b1bfb88 100644 --- a/docs/developers-guide/mbql-library-changelog.md +++ b/docs/developers-guide/mbql-library-changelog.md @@ -22,3 +22,8 @@ and documented in this changelog. `as-returned` looks at the query and stage, and shifts to a later stage if necessary. If a later stage is needed but we were already on the last stage, a new empty stage is appended. +- New functions `column-extractions` and `extract` have been added. + - `column-extractions` returns a list of _extractions_, which are possible custom expressions we can derive from a + given column. For example, getting the host or base domain name from a URL or email address, or the day of the week + from a date or datetime. + - `extract` applies an extraction to the query. diff --git a/e2e/test/scenarios/visualizations-tabular/drillthroughs/column_extract_drill.cy.spec.js b/e2e/test/scenarios/visualizations-tabular/drillthroughs/column_extract_drill.cy.spec.js index 4a5c88504fc..afa23b771ae 100644 --- a/e2e/test/scenarios/visualizations-tabular/drillthroughs/column_extract_drill.cy.spec.js +++ b/e2e/test/scenarios/visualizations-tabular/drillthroughs/column_extract_drill.cy.spec.js @@ -235,6 +235,7 @@ function extractColumnAndCheck({ column, option, newColumn = option, value }) { cy.intercept("POST", "/api/dataset").as(requestAlias); cy.findByRole("columnheader", { name: column }).click(); popover().findByText("Extract day, month…").click(); + cy.wait(1); popover().findByText(option).click(); cy.wait(`@${requestAlias}`); diff --git a/frontend/src/metabase-lib/types.ts b/frontend/src/metabase-lib/types.ts index 00f719d8821..99499f42c8a 100644 --- a/frontend/src/metabase-lib/types.ts +++ b/frontend/src/metabase-lib/types.ts @@ -447,7 +447,7 @@ export type DrillThruType = export type BaseDrillThruInfo<Type extends DrillThruType> = { type: Type }; export type ColumnExtraction = { - key: ColumnExtractionKey; + tag: ColumnExtractionKey; displayName: string; }; diff --git a/frontend/src/metabase/querying/utils/drills/column-extract-drill/column-extract-drill.tsx b/frontend/src/metabase/querying/utils/drills/column-extract-drill/column-extract-drill.tsx index 0b3666d1aa6..f3848970d31 100644 --- a/frontend/src/metabase/querying/utils/drills/column-extract-drill/column-extract-drill.tsx +++ b/frontend/src/metabase/querying/utils/drills/column-extract-drill/column-extract-drill.tsx @@ -19,7 +19,7 @@ export const columnExtractDrill: Drill<Lib.ColumnExtractDrillThruInfo> = ({ title: extraction.displayName, section: "extract-popover", buttonType: "horizontal", - question: () => applyDrill(drill, extraction.key), + question: () => applyDrill(drill, extraction.tag), extra: () => ({ settingsSyncOptions: { column: clicked.column } }), }), ); diff --git a/src/metabase/lib/core.cljc b/src/metabase/lib/core.cljc index e54a46b1d52..da9c4e963bd 100644 --- a/src/metabase/lib/core.cljc +++ b/src/metabase/lib/core.cljc @@ -16,6 +16,7 @@ [metabase.lib.drill-thru.pivot :as lib.drill-thru.pivot] [metabase.lib.equality :as lib.equality] [metabase.lib.expression :as lib.expression] + [metabase.lib.extraction :as lib.extraction] [metabase.lib.fe-util :as lib.fe-util] [metabase.lib.field :as lib.field] [metabase.lib.filter :as lib.filter] @@ -172,6 +173,9 @@ rtrim upper lower] + [lib.extraction + column-extractions + extract] [lib.fe-util dependent-metadata expression-clause diff --git a/src/metabase/lib/drill_thru/column_extract.cljc b/src/metabase/lib/drill_thru/column_extract.cljc index b3b9e847d15..ad073ee1d00 100644 --- a/src/metabase/lib/drill_thru/column_extract.cljc +++ b/src/metabase/lib/drill_thru/column_extract.cljc @@ -16,52 +16,21 @@ [medley.core :as m] [metabase.lib.drill-thru.column-filter :as lib.drill-thru.column-filter] [metabase.lib.drill-thru.common :as lib.drill-thru.common] - [metabase.lib.expression :as lib.expression] - [metabase.lib.filter :as lib.filter] - [metabase.lib.metadata :as lib.metadata] + [metabase.lib.extraction :as lib.extraction] [metabase.lib.metadata.calculation :as lib.metadata.calculation] [metabase.lib.schema :as lib.schema] [metabase.lib.schema.drill-thru :as lib.schema.drill-thru] - [metabase.lib.temporal-bucket :as lib.temporal-bucket] [metabase.lib.types.isa :as lib.types.isa] - [metabase.lib.util :as lib.util] [metabase.shared.util.i18n :as i18n] - [metabase.shared.util.time :as shared.ut] [metabase.util.malli :as mu])) -(defn- column-extract-temporal-units [column] - (let [time-units [:hour-of-day] - date-units [:day-of-month :day-of-week :month-of-year :quarter-of-year :year]] - (vec (for [unit (concat (when-not (lib.types.isa/date-without-time? column) - time-units) - (when-not (lib.types.isa/time? column) - date-units))] - {:key unit - :display-name (lib.temporal-bucket/describe-temporal-unit unit)})))) - -(defn- regex-available? [metadata-providerable] - ((:features (lib.metadata/database metadata-providerable)) :regex)) - (defn- column-extract-drill-for-column [query column] - (cond - (lib.types.isa/temporal? column) {:display-name (i18n/tru "Extract day, month…") - :extractions (column-extract-temporal-units column)} - - ;; The URL and email extractions are powered by regular expressions, and not every database supports those. - ;; If the target database doesn't support :regex feature, return nil. - (not (regex-available? query)) nil - (lib.types.isa/email? column) {:display-name (i18n/tru "Extract domain") - :extractions [{:key :domain - :display-name (i18n/tru "Domain")} - {:key :host - :display-name (i18n/tru "Host")}]} - (lib.types.isa/URL? column) {:display-name (i18n/tru "Extract domain, subdomain…") - :extractions [{:key :domain - :display-name (i18n/tru "Domain")} - {:key :subdomain - :display-name (i18n/tru "Subdomain")} - {:key :host - :display-name (i18n/tru "Host")}]})) + (when-let [extractions (not-empty (lib.extraction/column-extractions query column))] + {:extractions extractions + :display-name (cond + (lib.types.isa/temporal? column) (i18n/tru "Extract day, month…") + (lib.types.isa/email? column) (i18n/tru "Extract domain, host…") + (lib.types.isa/URL? column) (i18n/tru "Extract domain, subdomain…"))})) (mu/defn column-extract-drill :- [:maybe ::lib.schema.drill-thru/drill-thru.column-extract] "Column clicks on temporal columns only. @@ -79,42 +48,16 @@ query stage-number column column-ref :expression))))) (defmethod lib.drill-thru.common/drill-thru-info-method :drill-thru/column-extract - [_query _stage-number drill] - (select-keys drill [:display-name :extractions :type])) - -(defn- case-expression - "Creates a case expression with a condition for each value of the unit." - [expression-fn unit n] - (lib.expression/case - (for [raw-value (range 1 (inc n))] - [(lib.filter/= (expression-fn) raw-value) (shared.ut/format-unit raw-value unit)]) - "")) - -(defn- extraction-expression [column tag] - (case tag - ;; Temporal extractions - :hour-of-day (lib.expression/get-hour column) - :day-of-month (lib.expression/get-day column) - :day-of-week (case-expression #(lib.expression/get-day-of-week column) tag 7) - :month-of-year (case-expression #(lib.expression/get-month column) tag 12) - :quarter-of-year (case-expression #(lib.expression/get-quarter column) tag 4) - :year (lib.expression/get-year column) - ;; URLs and emails - :domain (lib.expression/domain column) - :subdomain (lib.expression/subdomain column) - :host (lib.expression/host column))) + [query stage-number drill] + (-> drill + (select-keys [:display-name :type]) + (assoc :extractions (map #(lib.metadata.calculation/display-info query stage-number %) + (:extractions drill))))) (defmethod lib.drill-thru.common/drill-thru-method :drill-thru/column-extract [_query _stage-number {:keys [query stage-number column extractions]} & [tag]] - (let [tag (keyword tag) - {:keys [display-name]} (m/find-first #(= (:key %) tag) extractions) - unique-name-fn (lib.util/unique-name-generator)] - (doseq [col-name (->> (lib.util/query-stage query stage-number) - (lib.metadata.calculation/returned-columns query stage-number) - (map :name))] - (unique-name-fn col-name)) - (lib.expression/expression - query - stage-number - (unique-name-fn display-name) - (extraction-expression column tag)))) + (let [tag (keyword tag) + extraction (m/find-first #(= (:tag %) tag) extractions)] + (lib.extraction/extract query stage-number + ;; Replace the column on the extraction because we added an extra stage. + (assoc extraction :column column)))) diff --git a/src/metabase/lib/extraction.cljc b/src/metabase/lib/extraction.cljc new file mode 100644 index 00000000000..48f6c9e4a1a --- /dev/null +++ b/src/metabase/lib/extraction.cljc @@ -0,0 +1,118 @@ +(ns metabase.lib.extraction + (:require + [metabase.lib.expression :as lib.expression] + [metabase.lib.filter :as lib.filter] + [metabase.lib.metadata :as lib.metadata] + [metabase.lib.metadata.calculation :as lib.metadata.calculation] + [metabase.lib.schema :as lib.schema] + [metabase.lib.schema.extraction :as lib.schema.extraction] + [metabase.lib.schema.metadata :as lib.schema.metadata] + [metabase.lib.temporal-bucket :as lib.temporal-bucket] + [metabase.lib.types.isa :as lib.types.isa] + [metabase.lib.util :as lib.util] + [metabase.shared.util.i18n :as i18n] + [metabase.shared.util.time :as shared.ut] + [metabase.util.malli :as mu])) + +(defn- column-extract-temporal-units [column] + (let [time-units [:hour-of-day] + date-units [:day-of-month :day-of-week :month-of-year :quarter-of-year :year]] + (vec (for [unit (concat (when-not (lib.types.isa/date-without-time? column) + time-units) + (when-not (lib.types.isa/time? column) + date-units))] + {:lib/type ::extraction + :tag unit + :column column + :display-name (lib.temporal-bucket/describe-temporal-unit unit)})))) + +(defn- regex-available? [metadata-providerable] + (-> (lib.metadata/database metadata-providerable) + :features + (contains? :regex))) + +(defn- domain-extraction [column] + {:lib/type ::extraction + :tag :domain + :column column + :display-name (i18n/tru "Domain")}) + +(defn- subdomain-extraction [column] + {:lib/type ::extraction + :tag :subdomain + :column column + :display-name (i18n/tru "Subdomain")}) + +(defn- host-extraction [column] + {:lib/type ::extraction + :tag :host + :column column + :display-name (i18n/tru "Host")}) + +(defn- email-extractions [column] + [(domain-extraction column) + (host-extraction column)]) + +(defn- url-extractions [column] + [(domain-extraction column) + (subdomain-extraction column) + (host-extraction column)]) + +(mu/defn column-extractions :- [:maybe [:sequential ::lib.schema.extraction/extraction]] + "Column extractions are a set of transformations possible on a given `column`, based on its type. + + For example, we might extract the day of the week from a temporal column, or the domain name from an email or URL. + + Returns a list of possible column extractions for the given column, or `nil` if there are none." + [query :- ::lib.schema/query + column :- ::lib.schema.metadata/column] + (cond + (lib.types.isa/temporal? column) (column-extract-temporal-units column) + + ;; The URL and email extractions are powered by regular expressions, and not every database supports those. + ;; If the target database doesn't support :regex feature, return nil. + (not (regex-available? query)) nil + (lib.types.isa/email? column) (email-extractions column) + (lib.types.isa/URL? column) (url-extractions column))) + +(defmethod lib.metadata.calculation/display-info-method ::extraction + [_query _stage-number extraction] + (dissoc extraction :lib/type :column)) + +(defn- case-expression + "Creates a case expression with a condition for each value of the unit." + [expression-fn unit n] + (lib.expression/case + (for [raw-value (range 1 (inc n))] + [(lib.filter/= (expression-fn) raw-value) (shared.ut/format-unit raw-value unit)]) + "")) + +(defn- extraction-expression [column tag] + (case tag + ;; Temporal extractions + :hour-of-day (lib.expression/get-hour column) + :day-of-month (lib.expression/get-day column) + :day-of-week (case-expression #(lib.expression/get-day-of-week column) tag 7) + :month-of-year (case-expression #(lib.expression/get-month column) tag 12) + :quarter-of-year (case-expression #(lib.expression/get-quarter column) tag 4) + :year (lib.expression/get-year column) + ;; URLs and emails + :domain (lib.expression/domain column) + :subdomain (lib.expression/subdomain column) + :host (lib.expression/host column))) + +(mu/defn extract :- ::lib.schema/query + "Given a query, stage and extraction as returned by [[column-extractions]], apply that extraction to the query." + [query :- ::lib.schema/query + stage-number :- :int + {:keys [column display-name tag]} :- ::lib.schema.extraction/extraction] + ;; Currently this is very simple: use the `:tag` as an expression function and the column as the only argument. + (let [unique-name-fn (->> (lib.util/query-stage query stage-number) + (lib.metadata.calculation/returned-columns query stage-number) + (map :name) + lib.util/unique-name-generator)] + (lib.expression/expression + query + stage-number + (unique-name-fn display-name) + (extraction-expression column tag)))) diff --git a/src/metabase/lib/js.cljs b/src/metabase/lib/js.cljs index fb9c0dfcecf..29468cbe8cb 100644 --- a/src/metabase/lib/js.cljs +++ b/src/metabase/lib/js.cljs @@ -1364,6 +1364,26 @@ (fn [_] (to-array (lib.core/expressionable-columns a-query stage-number expression-position))))) +(defn ^:export column-extractions + "Column extractions are a set of transformations possible on a given `column`, based on its type. + + For example, we might extract the day of the week from a temporal column, or the domain name from an email or URL. + + Returns a (possibly empty) JS array of possible column extractions for the given column. + + > **Code health:** Healthy" + [a-query column] + (to-array (lib.core/column-extractions a-query column))) + +(defn ^:export extract + "Given `a-query` and an `extraction` from [[column-extractions]], apply that extraction to the query. + + Generally this means adding a new expression. Returns an updated query. + + > **Code health:** Healthy" + [a-query stage-number extraction] + (lib.core/extract a-query stage-number extraction)) + (defn ^:export suggested-join-conditions "Returns a JS array of possible default join conditions when joining against `joinable`, e.g. a Table, Saved Question, or another query. Suggested conditions will be returned if the existing query has a foreign key to the diff --git a/src/metabase/lib/schema/drill_thru.cljc b/src/metabase/lib/schema/drill_thru.cljc index 1d47b6fa680..5a1afe1f459 100644 --- a/src/metabase/lib/schema/drill_thru.cljc +++ b/src/metabase/lib/schema/drill_thru.cljc @@ -8,6 +8,7 @@ [metabase.lib.schema.binning :as lib.schema.binning] [metabase.lib.schema.common :as lib.schema.common] [metabase.lib.schema.expression :as lib.schema.expression] + [metabase.lib.schema.extraction :as lib.schema.extraction] [metabase.lib.schema.filter :as lib.schema.filter] [metabase.lib.schema.id :as lib.schema.id] [metabase.lib.schema.metadata :as lib.schema.metadata] @@ -169,9 +170,7 @@ [:type [:= :drill-thru/column-extract]] [:query [:ref ::lib.schema/query]] [:stage-number number?] - [:extractions [:sequential [:map - [:key keyword?] - [:display-name string?]]]]]]) + [:extractions [:sequential [:ref ::lib.schema.extraction/extraction]]]]]) (mr/def ::drill-thru.combine-columns [:merge diff --git a/src/metabase/lib/schema/extraction.cljc b/src/metabase/lib/schema/extraction.cljc new file mode 100644 index 00000000000..c83597b7e54 --- /dev/null +++ b/src/metabase/lib/schema/extraction.cljc @@ -0,0 +1,13 @@ +(ns metabase.lib.schema.extraction + (:require + [metabase.lib.schema.metadata :as lib.schema.metadata] + [metabase.util.malli.registry :as mr])) + +(mr/def ::extraction + [:map + [:lib/type [:= :metabase.lib.extraction/extraction]] + [:tag [:enum + :domain :subdomain :host + :hour-of-day :day-of-month :day-of-week :month-of-year :quarter-of-year :year]] + [:column ::lib.schema.metadata/column] + [:display-name :string]]) diff --git a/src/metabase/lib/util.cljc b/src/metabase/lib/util.cljc index fb8faaabb00..783b1e90b0f 100644 --- a/src/metabase/lib/util.cljc +++ b/src/metabase/lib/util.cljc @@ -494,16 +494,25 @@ (f str) => str That takes any sort of string identifier (e.g. a column alias or table/join alias) and returns a guaranteed-unique - name truncated to 60 characters (actually 51 characters plus a hash)." - [] - (comp truncate-alias - (mbql.u/unique-name-generator - ;; unique by lower-case name, e.g. `NAME` and `name` => `NAME` and `name_2` - ;; - ;; some databases treat aliases as case-insensitive so make sure the generated aliases are unique regardless - ;; of case - :name-key-fn u/lower-case-en - :unique-alias-fn unique-alias))) + name truncated to 60 characters (actually 51 characters plus a hash). + + Optionally takes a list of names which are already defined, \"priming\" the generator with eg. all the column names + that currently exist on a stage of the query." + ([] + (comp truncate-alias + (mbql.u/unique-name-generator + ;; unique by lower-case name, e.g. `NAME` and `name` => `NAME` and `name_2` + ;; + ;; some databases treat aliases as case-insensitive so make sure the generated aliases are unique regardless + ;; of case + :name-key-fn u/lower-case-en + :unique-alias-fn unique-alias))) + + ([existing-names :- [:sequential :string]] + (let [f (unique-name-generator)] + (doseq [existing existing-names] + (f existing)) + f))) (def ^:private strip-id-regex #?(:cljs (js/RegExp. " id$" "i") diff --git a/test/metabase/lib/drill_thru/column_extract_test.cljc b/test/metabase/lib/drill_thru/column_extract_test.cljc index 88cf924c261..9621b1c4e05 100644 --- a/test/metabase/lib/drill_thru/column_extract_test.cljc +++ b/test/metabase/lib/drill_thru/column_extract_test.cljc @@ -15,14 +15,14 @@ #?(:cljs (comment metabase.test-runner.assert-exprs.approximately-equal/keep-me)) (def ^:private time-extraction-units - [{:key :hour-of-day, :display-name "Hour of day"}]) + [{:tag :hour-of-day, :display-name "Hour of day"}]) (def ^:private date-extraction-units - [{:key :day-of-month, :display-name "Day of month"} - {:key :day-of-week, :display-name "Day of week"} - {:key :month-of-year, :display-name "Month of year"} - {:key :quarter-of-year, :display-name "Quarter of year"} - {:key :year, :display-name "Year"}]) + [{:tag :day-of-month, :display-name "Day of month"} + {:tag :day-of-week, :display-name "Day of week"} + {:tag :month-of-year, :display-name "Month of year"} + {:tag :quarter-of-year, :display-name "Quarter of year"} + {:tag :year, :display-name "Year"}]) (def ^:private datetime-extraction-units (concat time-extraction-units date-extraction-units)) @@ -285,9 +285,9 @@ query (lib/query mp (lib.metadata/table mp (meta/id :people))) expected {:type :drill-thru/column-extract :display-name "Extract domain, subdomain…" - :extractions [{:key :domain, :display-name "Domain"} - {:key :subdomain, :display-name "Subdomain"} - {:key :host, :display-name "Host"}]}] + :extractions [{:tag :domain, :display-name "Domain"} + {:tag :subdomain, :display-name "Subdomain"} + {:tag :host, :display-name "Host"}]}] (testing "Extracting Domain" (lib.drill-thru.tu/test-drill-application {:drill-type :drill-thru/column-extract @@ -335,9 +335,9 @@ :custom-query query-regex :expected {:type :drill-thru/column-extract :display-name "Extract domain, subdomain…" - :extractions [{:key :domain, :display-name "Domain"} - {:key :subdomain, :display-name "Subdomain"} - {:key :host, :display-name "Host"}]} + :extractions [{:tag :domain, :display-name "Domain"} + {:tag :subdomain, :display-name "Subdomain"} + {:tag :host, :display-name "Host"}]} :drill-args ["subdomain"] :expected-query {:stages [{:expressions [[:subdomain {:lib/expression-name "Subdomain"} [:field {} 9999001]]]}]}})) @@ -361,9 +361,9 @@ :column-name "EMAIL" :custom-query query-regex :expected {:type :drill-thru/column-extract - :display-name "Extract domain" - :extractions [{:key :domain, :display-name "Domain"} - {:key :host, :display-name "Host"}]} + :display-name "Extract domain, host…" + :extractions [{:tag :domain, :display-name "Domain"} + {:tag :host, :display-name "Host"}]} :drill-args ["domain"] :expected-query {:stages [{:expressions [[:domain {:lib/expression-name "Domain"} [:field {} (meta/id :people :email)]]]}]}})) diff --git a/test/metabase/lib/drill_thru_test.cljc b/test/metabase/lib/drill_thru_test.cljc index d24465682a9..7da2a8b1893 100644 --- a/test/metabase/lib/drill_thru_test.cljc +++ b/test/metabase/lib/drill_thru_test.cljc @@ -89,7 +89,7 @@ :drill-thru/column-extract (for [extraction (:extractions drill)] - [(:key extraction)]) + [(:tag extraction)]) [nil])) @@ -211,7 +211,7 @@ :type :drill-thru/column-extract :query orders-query :stage-number -1 - :extractions (partial mc/validate [:sequential [:map [:key keyword?]]])}] + :extractions (partial mc/validate [:sequential [:map [:tag keyword?]]])}] (lib/available-drill-thrus orders-query -1 context))) (test-drill-applications orders-query context))))) @@ -715,7 +715,7 @@ {:type :drill-thru/summarize-column, :aggregations [:distinct]} {:type :drill-thru/column-extract :extractions (partial mc/validate [:sequential [:map - [:key keyword?] + [:tag keyword?] [:display-name string?]]])}]})) (deftest ^:parallel available-drill-thrus-test-9 diff --git a/test/metabase/lib/extraction_test.cljc b/test/metabase/lib/extraction_test.cljc new file mode 100644 index 00000000000..29673801e4a --- /dev/null +++ b/test/metabase/lib/extraction_test.cljc @@ -0,0 +1,204 @@ +(ns metabase.lib.extraction-test + (:require + [clojure.test :refer [deftest is testing]] + [medley.core :as m] + [metabase.lib.core :as lib] + [metabase.lib.metadata :as lib.metadata] + [metabase.lib.test-metadata :as meta] + [metabase.lib.test-util :as lib.tu] + #?@(:clj ([metabase.test :as mt]) + :cljs ([metabase.test-runner.assert-exprs.approximately-equal])))) + +(defn- case-extraction + "Returns `=?` friendly value for a `:case`-based extraction, eg. `:day-of-week`. + + `(case-extraction :get-month \"Month of year\" (meta/id :orders :created-at) [\"Jan\" \"Feb\" ... \"Dec\"])`" + [extraction expression-name field-id labels] + [:case {:lib/expression-name expression-name} + (vec (for [[index label] (m/indexed labels)] + [[:= {} [extraction {} [:field {} field-id]] (inc index)] label])) + ""]) + +(deftest ^:parallel column-extraction-test-1-datetime-column + (testing "extract on a regular datetime column without aggregations adds the column in this stage" + (let [query (lib/query meta/metadata-provider (meta/table-metadata :orders)) + columns (lib/returned-columns query) + created-at (m/find-first #(= (:name %) "CREATED_AT") columns) + extractions (lib/column-extractions query created-at) + by-tag (m/index-by :tag extractions)] + (is (=? [{:tag :hour-of-day, :column created-at, :display-name "Hour of day"} + {:tag :day-of-month, :column created-at, :display-name "Day of month"} + {:tag :day-of-week, :column created-at, :display-name "Day of week"} + {:tag :month-of-year, :column created-at, :display-name "Month of year"} + {:tag :quarter-of-year, :column created-at, :display-name "Quarter of year"} + {:tag :year, :column created-at, :display-name "Year"}] + extractions)) + (testing "extracting :month-of-year" + (is (=? {:stages [{:expressions + [(case-extraction :get-month "Month of year" (meta/id :orders :created-at) + ["Jan" "Feb" "Mar" "Apr" "May" "Jun" + "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"])]}]} + (lib/extract query -1 (:month-of-year by-tag))))) + (testing "extracting :day-of-week" + (is (=? {:stages [{:expressions + [(case-extraction :get-day-of-week "Day of week" (meta/id :orders :created-at) + ["Sunday" "Monday" "Tuesday" "Wednesday" "Thursday" + "Friday" "Saturday"])]}]} + (lib/extract query -1 (:day-of-week by-tag))))) + (testing "extracting :quarter-of-year" + (is (=? {:stages [{:expressions + [(case-extraction :get-quarter "Quarter of year" (meta/id :orders :created-at) + ["Q1" "Q2" "Q3" "Q4"])]}]} + (lib/extract query -1 (:quarter-of-year by-tag))))) + (doseq [[tag expr label] [[:year :get-year "Year"] + [:day-of-month :get-day "Day of month"] + [:hour-of-day :get-hour "Hour of day"]]] + (testing (str "extracting " tag) + (is (=? {:stages [{:expressions [[expr {:lib/expression-name label} + [:field {} (meta/id :orders :created-at)]]]}]} + (lib/extract query -1 (get by-tag tag))))))))) + +(deftest ^:parallel duplicate-names-test + (testing "extracting the same field twice disambiguates the expression names" + (let [;; The standard ORDERS query but with a :day-of-month extraction already applied. + query (-> (lib/query meta/metadata-provider (meta/table-metadata :orders)) + (lib/expression -1 "Day of month" + (lib/get-day (meta/field-metadata :orders :created-at))))] + (is (=? {:stages [{:expressions [;; The original + [:get-day {:lib/expression-name "Day of month"} + [:field {} (meta/id :orders :created-at)]] + ;; The newly added one + [:get-day {:lib/expression-name "Day of month_2"} + [:field {} (meta/id :orders :created-at)]]]}]} + (->> (lib/returned-columns query) + (m/find-first #(= (:name %) "CREATED_AT")) + (lib/column-extractions query) + (m/find-first (comp #{:day-of-month} :tag)) + (lib/extract query -1))))))) + +#?(:clj + ;; TODO: This should be possible to run in CLJS if we have a library for setting the locale in JS. + ;; Metabase FE has this in frontend/src/metabase/lib/i18n.js but that's loaded after the CLJS. + (deftest ^:synchronized i18n-output-test + (testing "column-extract with custom labels get i18n'd" + (mt/with-locale "es" + (let [query (lib/query meta/metadata-provider (meta/table-metadata :orders))] + (is (=? {:stages [{:expressions + ;; TODO: The display name should also be getting translated! + ;; It seems like extraction isn't working for [[describe-temporal-unit]]. + [(case-extraction :get-day-of-week "Day of week" (meta/id :orders :created-at) + ["domingo" "lunes" "martes" "miércoles" "jueves" + "viernes" "sábado"])]}]} + (->> (lib/returned-columns query) + (m/find-first #(= (:name %) "CREATED_AT")) + (lib/column-extractions query) + (m/find-first (comp #{:day-of-week} :tag)) + (lib/extract query -1))))))))) + +(deftest ^:parallel extract-relevant-units-test-1-time + (let [ship-time (assoc (meta/field-metadata :orders :created-at) + :id 9999001 + :name "SHIP_TIME" + :display-name "Ship time" + :base-type :type/Time + :effective-type :type/Time + :semantic-type :type/Time) + mp (lib/composed-metadata-provider + (lib.tu/mock-metadata-provider {:fields [ship-time]}) + meta/metadata-provider) + query (lib/query mp (lib.metadata/table mp (meta/id :orders)))] + (is (=? [{:tag :hour-of-day}] + (->> (lib/returned-columns query) + (m/find-first #(= (:name %) "SHIP_TIME")) + (lib/column-extractions query)))))) + +(deftest ^:parallel extract-relevant-units-test-2-date + (let [arrival (assoc (meta/field-metadata :orders :created-at) + :id 9999001 + :name "ARRIVAL_DATE" + :display-name "Expected arrival" + :base-type :type/Date + :effective-type :type/Date + :semantic-type :type/Date) + mp (lib/composed-metadata-provider + (lib.tu/mock-metadata-provider {:fields [arrival]}) + meta/metadata-provider) + query (lib/query mp (lib.metadata/table mp (meta/id :orders)))] + (is (=? [{:tag :day-of-month} + {:tag :day-of-week} + {:tag :month-of-year} + {:tag :quarter-of-year} + {:tag :year}] + (->> (lib/returned-columns query) + (m/find-first #(= (:name %) "ARRIVAL_DATE")) + (lib/column-extractions query)))))) + +(def ^:private homepage + (assoc (meta/field-metadata :people :email) + :id 9999001 + :name "HOMEPAGE" + :display-name "Homepage URL" + :base-type :type/Text + :effective-type :type/Text + :semantic-type :type/URL)) + +(defn- homepage-provider + ([] (homepage-provider meta/metadata-provider)) + ([base-provider] + (lib/composed-metadata-provider + (lib.tu/mock-metadata-provider {:fields [homepage]}) + base-provider))) + +(deftest ^:parallel extract-from-url-test + ;; There's no URL columns in the same dataset, but let's pretend there's one called People.HOMEPAGE. + (testing "Extracting a URL column" + (let [mp (homepage-provider) + query (lib/query mp (lib.metadata/table mp (meta/id :people))) + extractions (->> (lib/returned-columns query) + (m/find-first #(= (:name %) "HOMEPAGE")) + (lib/column-extractions query)) + by-tag (m/index-by :tag extractions)] + (is (=? #{:domain :subdomain :host} (set (keys by-tag)))) + (testing "to :domain" + (is (=? {:stages [{:expressions [[:domain {:lib/expression-name "Domain"} + [:field {} 9999001]]]}]} + (lib/extract query -1 (:domain by-tag))))) + (testing "to :subdomain" + (is (=? {:stages [{:expressions [[:subdomain {:lib/expression-name "Subdomain"} + [:field {} 9999001]]]}]} + (lib/extract query -1 (:subdomain by-tag))))) + (testing "to :host" + (is (=? {:stages [{:expressions [[:host {:lib/expression-name "Host"} + [:field {} 9999001]]]}]} + (lib/extract query -1 (:host by-tag)))))))) + +(deftest ^:parallel extracting-from-urls-requires-regex-feature-test + (let [query-regex (lib/query (homepage-provider) (meta/table-metadata :people)) + no-regex (homepage-provider (meta/updated-metadata-provider update :features disj :regex)) + query-no-regex (lib/query no-regex (meta/table-metadata :people))] + (testing "when the database supports :regex URL extraction is available" + (is (=? [{:tag :domain, :display-name "Domain"} + {:tag :subdomain, :display-name "Subdomain"} + {:tag :host, :display-name "Host"}] + (->> (lib/returned-columns query-regex) + (m/find-first #(= (:name %) "HOMEPAGE")) + (lib/column-extractions query-regex))))) + (testing "when the database does not support :regex URL extraction is not available" + (is (empty? (->> (lib/returned-columns query-no-regex) + (m/find-first #(= (:name %) "HOMEPAGE")) + (lib/column-extractions query-no-regex))))))) + +(deftest ^:parallel extracting-from-emails-requires-regex-feature-test + (let [query-regex (lib/query meta/metadata-provider (meta/table-metadata :people)) + no-regex (meta/updated-metadata-provider update :features disj :regex) + query-no-regex (lib/query no-regex (meta/table-metadata :people))] + (testing "when the database supports :regex email extraction is available" + (is (=? [{:tag :domain, :display-name "Domain"} + {:tag :host, :display-name "Host"}] + (->> (lib/returned-columns query-regex) + (m/find-first #(= (:name %) "EMAIL")) + (lib/column-extractions query-regex))))) + (testing "when the database does not support :regex email extraction is not available" + (is (empty? (->> (lib/returned-columns query-no-regex) + (m/find-first #(= (:name %) "EMAIL")) + (lib/column-extractions query-no-regex))))))) -- GitLab