From 1ab3e4e9d68bc5bae7989c943a6a57f3f4679a0a Mon Sep 17 00:00:00 2001 From: Oleksandr Yakushev <alex@bytopia.org> Date: Mon, 18 Nov 2024 21:54:37 +0200 Subject: [PATCH] perf: Improve the performance of pivoted XLSX export (#50117) * [xlsx-export] Cache the custom datetime formatter * [xlsx-export] Optimize functions in m.q_p.pivot.postprocess * cljfmt --------- Co-authored-by: Adam James <adam.vermeer2@gmail.com> --- src/metabase/channel/render/body.clj | 2 +- src/metabase/formatter.clj | 4 +- src/metabase/formatter/datetime.clj | 22 +-- .../query_processor/pivot/postprocess.clj | 129 +++++++++--------- test/metabase/formatter/datetime_test.clj | 111 ++++++++------- 5 files changed, 137 insertions(+), 131 deletions(-) diff --git a/src/metabase/channel/render/body.clj b/src/metabase/channel/render/body.clj index 8806a7a7110..8be04a26610 100644 --- a/src/metabase/channel/render/body.clj +++ b/src/metabase/channel/render/body.clj @@ -74,7 +74,7 @@ [timezone-id :- [:maybe :string] value col visualization-settings] (cond (types/temporal-field? col) - (formatter/format-temporal-str timezone-id value col) + ((formatter/make-temporal-str-formatter timezone-id col {}) value) (number? value) (formatter/format-number value col visualization-settings) diff --git a/src/metabase/formatter.clj b/src/metabase/formatter.clj index 37ae4aaac23..13b5357dfb0 100644 --- a/src/metabase/formatter.clj +++ b/src/metabase/formatter.clj @@ -29,7 +29,7 @@ (p/import-vars [datetime - format-temporal-str + make-temporal-str-formatter temporal-string?]) (p.types/defrecord+ NumericWrapper [^String num-str ^Number num-value] @@ -275,7 +275,7 @@ ;; for numbers, return a format function that has already computed the differences. ;; todo: do the same for temporal strings (and apply-formatting? (types/temporal-field? col)) - #(datetime/format-temporal-str timezone-id % col visualization-settings) + (datetime/make-temporal-str-formatter timezone-id col visualization-settings) (and apply-formatting? (isa? (:semantic_type col) :type/Coordinate)) (partial format-geographic-coordinates (:semantic_type col)) diff --git a/src/metabase/formatter/datetime.clj b/src/metabase/formatter/datetime.clj index 8a174be932a..5dae89fcfc4 100644 --- a/src/metabase/formatter/datetime.clj +++ b/src/metabase/formatter/datetime.clj @@ -230,14 +230,14 @@ If neither a unit nor a temporal type is provided, just bottom out by assuming a date-format) temporal-str))))) -(defn format-temporal-str - "Reformat a temporal literal string by combining time zone, column, and viz setting information to create a final - desired output format." - ([timezone-id temporal-str col] (format-temporal-str timezone-id temporal-str col {})) - ([timezone-id temporal-str col viz-settings] - (Locale/setDefault (Locale. (public-settings/site-locale))) - (let [merged-viz-settings (common/normalize-keys - (common/viz-settings-for-col col viz-settings))] - (if (str/blank? temporal-str) - "" - (format-timestring timezone-id temporal-str col merged-viz-settings))))) +(defn make-temporal-str-formatter + "Return a formatter which, given a temporal literal string, reformts it by combining time zone, column, and viz + setting information to create a final desired output format." + [timezone-id col viz-settings] + (Locale/setDefault (Locale. (public-settings/site-locale))) + (let [merged-viz-settings (common/normalize-keys + (common/viz-settings-for-col col viz-settings))] + (fn [temporal-str] + (if (str/blank? temporal-str) + "" + (format-timestring timezone-id temporal-str col merged-viz-settings))))) diff --git a/src/metabase/query_processor/pivot/postprocess.clj b/src/metabase/query_processor/pivot/postprocess.clj index 038ca24c9b2..b73f39a7d52 100644 --- a/src/metabase/query_processor/pivot/postprocess.clj +++ b/src/metabase/query_processor/pivot/postprocess.clj @@ -190,34 +190,36 @@ (defn- build-headers "Combine row keys with column headers." [column-headers {:keys [pivot-cols pivot-rows column-titles]}] - (map (fn [h] - (if (and (seq pivot-cols) (not (seq pivot-rows))) - (concat (map #(get column-titles %) pivot-cols) h) - (concat (map #(get column-titles %) pivot-rows) h))) - (let [hs (filter seq column-headers)] - (when (seq hs) - (apply map vector hs))))) + (some->> (not-empty (filter seq column-headers)) + (apply mapv vector) + (mapv (fn [h] + (-> (mapv #(get column-titles %) + (if (and (seq pivot-cols) (empty? pivot-rows)) + pivot-cols pivot-rows)) + (into h)))))) (defn- build-row "Build a single row of the pivot table." [row-combo col-combos pivot-measures data totals row-totals? ordered-formatters row-formatters] - (let [row-path row-combo - measure-values (for [col-combo col-combos - measure-key pivot-measures] - (fmt (get ordered-formatters measure-key) - (get-in data (concat row-path col-combo [measure-key]))))] + (let [row-path (vec row-combo) + row-data (get-in data row-path) + measure-values (vec + (for [measure-key pivot-measures + :let [formatter (get ordered-formatters measure-key)] + col-combo col-combos] + (fmt formatter + (as-> row-data m + (reduce get m col-combo) + (get m measure-key)))))] (when (some #(and (some? %) (not= "" %)) measure-values) - (concat - (when-not (seq row-formatters) (repeat (count pivot-measures) nil)) - row-combo - #_(mapv fmt row-formatters row-combo) - (concat - measure-values - (when row-totals? - (for [measure-key pivot-measures] - (fmt (get ordered-formatters measure-key) - (get-in totals (concat row-path [measure-key])))))))))) - + (as-> (vec (when-not (seq row-formatters) (repeat (count pivot-measures) nil))) + row + (into row row-combo) + (into row measure-values) + (into row (when row-totals? + (for [measure-key pivot-measures] + (fmt (get ordered-formatters measure-key) + (get-in totals (concat row-path [measure-key])))))))))) (defn- build-column-totals "Build column totals for a section." [section-path col-combos pivot-measures totals row-totals? ordered-formatters pivot-rows] @@ -277,7 +279,7 @@ groups (group-by #(nth % idx) section)] (mapcat second (transform (sort groups))))) column-combos - (reverse (map vector (range) pivot-cols))))) + (reverse (map-indexed vector pivot-cols))))) (defn- append-totals-to-subsections [pivot section col-combos ordered-formatters] @@ -339,45 +341,44 @@ :descending reverse} direction))) row-formatters (mapv #(get ordered-formatters %) pivot-rows) col-formatters (mapv #(get ordered-formatters %) pivot-cols) - row-combos (apply math.combo/cartesian-product (map row-values pivot-rows)) - col-combos (apply math.combo/cartesian-product (map column-values pivot-cols)) - col-combos (sort-column-combos config col-combos) + row-combos (mapv vec (apply math.combo/cartesian-product (mapv row-values pivot-rows))) + col-combos (mapv vec (apply math.combo/cartesian-product (mapv column-values pivot-cols))) + col-combos (vec (sort-column-combos config col-combos)) row-totals? (and row-totals? (boolean (seq pivot-cols))) column-headers (build-column-headers config col-combos col-formatters) - headers (or (seq (build-headers column-headers config)) - [(concat - (map #(get column-titles %) pivot-rows) - (map #(get column-titles %) pivot-measures))])] - (concat - headers - (filter seq - (apply concat - (let [sections-rows - (for [section-row-combos ((get sort-fns (first pivot-rows) identity) (sort-by ffirst (vals (group-by first row-combos))))] - (concat - (remove nil? - (for [row-combo section-row-combos] - (build-row row-combo col-combos pivot-measures data totals row-totals? ordered-formatters row-formatters)))))] - (mapv - (fn [section-rows] - (->> - section-rows - (sort-pivot-subsections config) - ;; section rows are either enriched with column-totals rows or left as is - ((fn [rows] - (if (and col-totals? (> (count pivot-rows) 1)) - (append-totals-to-subsections pivot rows col-combos ordered-formatters) - rows))) - ;; then, we apply the row-formatters to the pivot-rows portion of each row, - ;; filtering out any rows that begin with "Totals ..." - (mapv - (fn [row] - (let [[row-part vals-part] (split-at (count pivot-rows) row)] - (if (or - (not (seq row-part)) - (str/starts-with? (first row-part) "Totals")) - row - (vec (concat (map fmt row-formatters row-part) vals-part)))))))) - sections-rows)))) - (when col-totals? - [(build-grand-totals config col-combos totals row-totals? ordered-formatters)])))) + headers (or (not-empty (build-headers column-headers config)) + [(mapv #(get column-titles %) (into (vec pivot-rows) pivot-measures))])] + (-> headers + (into (remove empty?) + (reduce into [] + (let [sections-rows + (vec + (for [section-row-combos ((get sort-fns (first pivot-rows) identity) (sort-by ffirst (vals (group-by first row-combos))))] + (into [] + (keep (fn [row-combo] + (build-row row-combo col-combos pivot-measures data totals row-totals? ordered-formatters row-formatters))) + section-row-combos)))] + (mapv + (fn [section-rows] + (->> + section-rows + (sort-pivot-subsections config) + ;; section rows are either enriched with column-totals rows or left as is + ((fn [rows] + (if (and col-totals? (> (count pivot-rows) 1)) + (append-totals-to-subsections pivot rows col-combos ordered-formatters) + rows))) + ;; then, we apply the row-formatters to the pivot-rows portion of each row, + ;; filtering out any rows that begin with "Totals ..." + (mapv + (fn [row] + (let [[row-part vals-part] (split-at (count pivot-rows) row)] + (if (or + (not (seq row-part)) + (str/starts-with? (first row-part) "Totals")) + row + (into (mapv fmt row-formatters row-part) vals-part))))))) + sections-rows)))) + (into + (when col-totals? + [(build-grand-totals config col-combos totals row-totals? ordered-formatters)]))))) diff --git a/test/metabase/formatter/datetime_test.clj b/test/metabase/formatter/datetime_test.clj index 328499659b0..edb2e03aa7f 100644 --- a/test/metabase/formatter/datetime_test.clj +++ b/test/metabase/formatter/datetime_test.clj @@ -25,85 +25,90 @@ (testing "The default behavior when the :time-enabled key is absent is to show minutes" (is (= "h:mm a" (#'datetime/determine-time-format {})))))) +(defn- format-temporal-str + ([timezone-id temporal-str col] (format-temporal-str timezone-id temporal-str col {})) + ([timezone-id temporal-str col viz-settings] + ((datetime/make-temporal-str-formatter timezone-id col viz-settings) temporal-str))) + (deftest format-temporal-str-test (mt/with-temporary-setting-values [custom-formatting nil] (testing "Null values do not blow up" (is (= "" - (datetime/format-temporal-str "UTC" nil :now)))) + (format-temporal-str "UTC" nil :now)))) (testing "Temporal Units are formatted" (testing :minute (is (= "July 16, 2020, 6:04 PM" - (datetime/format-temporal-str "UTC" now {:unit :minute})))) + (format-temporal-str "UTC" now {:unit :minute})))) (testing :hour (is (= "July 16, 2020, 6 PM" - (datetime/format-temporal-str "UTC" now {:unit :hour})))) + (format-temporal-str "UTC" now {:unit :hour})))) (testing :day (is (= "Thursday, July 16, 2020" - (datetime/format-temporal-str "UTC" now {:unit :day})))) + (format-temporal-str "UTC" now {:unit :day})))) (testing :week (is (= "July 16, 2020 - July 22, 2020" - (datetime/format-temporal-str "UTC" now {:unit :week})))) + (format-temporal-str "UTC" now {:unit :week})))) (testing :month (is (= "July, 2020" - (datetime/format-temporal-str "UTC" now {:unit :month})))) + (format-temporal-str "UTC" now {:unit :month})))) (testing :quarter (is (= "Q3 - 2020" - (datetime/format-temporal-str "UTC" now {:unit :quarter})))) + (format-temporal-str "UTC" now {:unit :quarter})))) (testing :year (is (= "2020" - (datetime/format-temporal-str "UTC" now {:unit :year}))))) + (format-temporal-str "UTC" now {:unit :year}))))) (testing "x-of-y Temporal Units are formatted" (testing :minute-of-hour (is (= "1st" - (datetime/format-temporal-str "UTC" "1" {:unit :minute-of-hour})))) + (format-temporal-str "UTC" "1" {:unit :minute-of-hour})))) (testing :day-of-month (is (= "2nd" - (datetime/format-temporal-str "UTC" "2" {:unit :day-of-month})))) + (format-temporal-str "UTC" "2" {:unit :day-of-month})))) (testing :day-of-year (is (= "203rd" - (datetime/format-temporal-str "UTC" "203" {:unit :day-of-year})))) + (format-temporal-str "UTC" "203" {:unit :day-of-year})))) (testing :week-of-year (is (= "44th" - (datetime/format-temporal-str "UTC" "44" {:unit :week-of-year})))) + (format-temporal-str "UTC" "44" {:unit :week-of-year})))) (testing :day-of-week (is (= "Thursday" - (datetime/format-temporal-str "UTC" "4" {:unit :day-of-week})))) + (format-temporal-str "UTC" "4" {:unit :day-of-week})))) (testing :month-of-year (is (= "May" - (datetime/format-temporal-str "UTC" "5" {:unit :month-of-year})))) + (format-temporal-str "UTC" "5" {:unit :month-of-year})))) (testing :quarter-of-year (is (= "Q3" - (datetime/format-temporal-str "UTC" "3" {:unit :quarter-of-year})))) + (format-temporal-str "UTC" "3" {:unit :quarter-of-year})))) (testing :hour-of-day (is (= "4 AM" - (datetime/format-temporal-str "UTC" "4" {:unit :hour-of-day}))))) + (format-temporal-str "UTC" "4" {:unit :hour-of-day}))))) (testing "Can render time types (#15146)" (is (= "8:05 AM" - (datetime/format-temporal-str "UTC" "08:05:06Z" - {:effective_type :type/Time})))) + (format-temporal-str "UTC" "08:05:06Z" + {:effective_type :type/Time})))) (testing "Can render date time types (Part of resolving #36484)" (is (= "April 1, 2014, 8:30 AM" - (datetime/format-temporal-str "UTC" "2014-04-01T08:30:00" - {:effective_type :type/DateTime})))) + (format-temporal-str "UTC" "2014-04-01T08:30:00" + {:effective_type :type/DateTime})))) (testing "When `:time_enabled` is `nil` the time is truncated for date times." (is (= "April 1, 2014" - (datetime/format-temporal-str "UTC" "2014-04-01T08:30:00" - {:effective_type :type/DateTime - :settings {:time_enabled nil}})))) + (format-temporal-str "UTC" "2014-04-01T08:30:00" + {:effective_type :type/DateTime + :settings {:time_enabled nil}})))) (testing "When `:time_enabled` is `nil` the time is truncated for times (even though this may not make sense)." (is (= "" - (datetime/format-temporal-str "UTC" "08:05:06Z" - {:effective_type :type/Time - :settings {:time_enabled nil}})))))) + (format-temporal-str "UTC" "08:05:06Z" + {:effective_type :type/Time + :settings {:time_enabled nil}})))))) (deftest format-temporal-str-column-viz-settings-test (mt/with-temporary-setting-values [custom-formatting nil] (testing "Written Date Formatting" (let [fmt (fn [col-viz] - (datetime/format-temporal-str "UTC" now {:field_ref [:column_name "created_at"] - :effective_type :type/Date} - {::mb.viz/column-settings - {{::mb.viz/column-name "created_at"} col-viz}}))] + (format-temporal-str "UTC" now {:field_ref [:column_name "created_at"] + :effective_type :type/Date} + {::mb.viz/column-settings + {{::mb.viz/column-name "created_at"} col-viz}}))] (doseq [[date-style normal-result abbreviated-result] [["MMMM D, YYYY" "July 16, 2020" "Jul 16, 2020"] ["D MMMM, YYYY" "16 July, 2020" "16 Jul, 2020"] @@ -118,10 +123,10 @@ (when date-style {::mb.viz/date-style date-style}))))))))) (testing "Numerical Date Formatting" (let [fmt (fn [col-viz] - (datetime/format-temporal-str "UTC" now {:field_ref [:column_name "created_at"] - :effective_type :type/Date} - {::mb.viz/column-settings - {{::mb.viz/column-name "created_at"} col-viz}}))] + (format-temporal-str "UTC" now {:field_ref [:column_name "created_at"] + :effective_type :type/Date} + {::mb.viz/column-settings + {{::mb.viz/column-name "created_at"} col-viz}}))] (doseq [[date-style slash-result dash-result dot-result] [["M/D/YYYY" "7/16/2020" "7-16-2020" "7.16.2020"] ["D/M/YYYY" "16/7/2020" "16-7-2020" "16.7.2020"] @@ -145,15 +150,15 @@ (let [global-settings (m/map-vals mb.viz/db->norm-column-settings-entries (public-settings/custom-formatting))] (is (= "Jul 16, 2020" - (datetime/format-temporal-str "UTC" now - {:effective_type :type/Date} - {::mb.viz/global-column-settings global-settings}))))) + (format-temporal-str "UTC" now + {:effective_type :type/Date} + {::mb.viz/global-column-settings global-settings}))))) (mt/with-temporary-setting-values [custom-formatting {:type/Temporal {:date_style "M/DD/YYYY" :date_separator "-"}}] (let [global-settings (m/map-vals mb.viz/db->norm-column-settings-entries (public-settings/custom-formatting))] (is (= "7-16-2020, 6:04 PM" - (datetime/format-temporal-str + (format-temporal-str "UTC" now {:effective_type :type/DateTime} @@ -175,21 +180,21 @@ time-str "2023-12-11T21:51:57.265914Z"] (testing "Global settings are applied to a :type/DateTimeDateTime" (is (= "December 11, 2023, 21:51" - (datetime/format-temporal-str "UTC" time-str col common-viz-settings)))) + (format-temporal-str "UTC" time-str col common-viz-settings)))) (testing "A :type/DateTimeDateTimeWithLocalTZ is a :type/DateTimeDateTime" (is (= "December 11, 2023, 21:51" (let [col (assoc col :base_type :type/DateTimeWithLocalTZ)] - (datetime/format-temporal-str "UTC" time-str col common-viz-settings))))) + (format-temporal-str "UTC" time-str col common-viz-settings))))) (testing "Custom settings are applied when the column has them" ;; Note that the time style of the column setting has precedence over the global setting (is (= "Monday, December 11, 2023, 9:51:57.265 PM" (let [col (assoc col :name "CUSTOM_DATETIME")] - (datetime/format-temporal-str "UTC" time-str col common-viz-settings))))) + (format-temporal-str "UTC" time-str col common-viz-settings))))) (testing "Column metadata settings are applied" (is (= "Dec 11, 2023, 21:51:57" (let [col (assoc col :settings {:time_enabled "seconds" :date_abbreviate true})] - (datetime/format-temporal-str "UTC" time-str col common-viz-settings))))) + (format-temporal-str "UTC" time-str col common-viz-settings))))) (testing "Various settings can be merged" (testing "We abbreviate the base case..." (is (= "Dec 11, 2023, 21:51" @@ -198,7 +203,7 @@ :type/Temporal ::mb.viz/date-abbreviate] true)] - (datetime/format-temporal-str "UTC" time-str col common-viz-settings))))) + (format-temporal-str "UTC" time-str col common-viz-settings))))) (testing "...and we abbreviate the custome column formatting as well" (is (= "Mon, Dec 11, 2023, 9:51:57.265 PM" (let [col (assoc col :name "CUSTOM_DATETIME") @@ -207,15 +212,15 @@ :type/Temporal ::mb.viz/date-abbreviate] true)] - (datetime/format-temporal-str "UTC" time-str col common-viz-settings)))))) + (format-temporal-str "UTC" time-str col common-viz-settings)))))) (testing "The appropriate formatting is applied when the column type is date" (is (= "December 11, 2023" (let [col (assoc col :effective_type :type/Date)] - (datetime/format-temporal-str "UTC" time-str col common-viz-settings))))) + (format-temporal-str "UTC" time-str col common-viz-settings))))) (testing "The appropriate formatting is applied when the column type is time" (is (= "21:51" (let [col (assoc col :effective_type :type/Time)] - (datetime/format-temporal-str "UTC" time-str col common-viz-settings))))) + (format-temporal-str "UTC" time-str col common-viz-settings))))) (testing "Formatting works for times with a custom time-enabled" (is (= "21:51:57.265" (let [col (assoc col :effective_type :type/Time) @@ -224,7 +229,7 @@ :type/Temporal ::mb.viz/time-enabled] "milliseconds")] - (datetime/format-temporal-str "UTC" time-str col common-viz-settings))))))))) + (format-temporal-str "UTC" time-str col common-viz-settings))))))))) (deftest format-default-unit-test (testing "When the unit is :default we use the column type." @@ -233,14 +238,14 @@ :effective_type :type/Time :base_type :type/Time}] (is (= "3:30 PM" - (datetime/format-temporal-str "UTC" "15:30:45Z" col nil)))))) + (format-temporal-str "UTC" "15:30:45Z" col nil)))))) (testing "Corner case: Return the time string when there is no useful information about it _and_ it's not formattable." ;; This addresses a rare case (might never happen IRL) in which we try to apply the default formatting of ;; "MMMM d, yyyy" to a time, but we don't know it's a time so we error our. (mt/with-temporary-setting-values [custom-formatting nil] (let [col {:unit :default}] (is (= "15:30:45Z" - (datetime/format-temporal-str "UTC" "15:30:45Z" col nil))))))) + (format-temporal-str "UTC" "15:30:45Z" col nil))))))) (deftest ^:parallel year-in-dates-near-start-or-end-of-year-is-correct-test (testing "When the date is at the start/end of the year, the year is formatted properly. (#40306)" @@ -252,9 +257,9 @@ ;; What we probably do want is 'yyyy' which calculates what day of the year the date is and then returns the year. (let [dates (fn [year] [(format "%s-01-01" year) (format "%s-12-31" year)]) fmt (fn [s] - (datetime/format-temporal-str "UTC" s {:field_ref [:column_name "created_at"] - :effective_type :type/Date} - {::mb.viz/column-settings - {{::mb.viz/column-name "created_at"} {::mb.viz/date-style "YYYY-MM-dd"}}}))] + (format-temporal-str "UTC" s {:field_ref [:column_name "created_at"] + :effective_type :type/Date} + {::mb.viz/column-settings + {{::mb.viz/column-name "created_at"} {::mb.viz/date-style "YYYY-MM-dd"}}}))] (doseq [the-date (mapcat dates (range 2008 3008))] (is (= the-date (fmt the-date))))))) -- GitLab