Skip to content
Snippets Groups Projects
Unverified Commit e56676bd authored by github-automation-metabase's avatar github-automation-metabase Committed by GitHub
Browse files

:robot: backported "Pivot Export Aggregations Improved and Won't Error" (#50532)


* Pivot Export Aggregations Improved and Won't Error (#50380)

* Pivot Export Aggregations Improved and Won't Error

Fixes #50207

WIP

- Totals now have appropriate aggregation implementation
- Totals for different row and column partial paths could collide before (eg. if a pivot-col value was equal to the
- measure idx), now the paths are unique and will not collide, causing errors

* add a test

* multiple measures need to have the correct order in each row

* clean up, add some comments explaining totals paths

* fix tests

* add test to show add-rows working correctly after :rows-part :cols-part added

* tap tap tap

---------

Co-authored-by: default avataradam-james <21064735+adam-james-v@users.noreply.github.com>
Co-authored-by: default avatarAdam James <adam.vermeer2@gmail.com>
parent 127ab0be
No related branches found
No related tags found
No related merge requests found
......@@ -104,12 +104,44 @@
[m k v]
(update m k conj v))
(defn- default-agg-fn
[agg v]
(when v
(if (number? v)
(+ agg v)
v)))
(defn- measure->agg-fn
"Aggregators for the column totals"
[k]
(case k
(:sum :count :total)
(fn [prev v]
(if (number? v)
(-> (merge {:result 0} prev)
(update :result #(+ % v)))
v))
:avg
(fn [prev v]
(if (number? v)
(-> (merge {:total 0
:count 0}
prev)
(update :total #(+ % v))
(update :count inc))
v))
:min
(fn [prev v]
(if (number? v)
(-> (merge {:min 0} prev)
(update :min #(min % v)))
v))
:max
(fn [prev v]
(if (number? v)
(-> (merge {:min 0} prev)
(update :min #(max % v)))
v))
;; else
(fn [_prev v] v)))
(defn- update-aggregate
"Update the given `measure-aggregations` with `new-values` using the appropriate function in the `agg-fns` map.
......@@ -124,10 +156,11 @@
(into {}
(map
(fn [[measure-key agg]]
(let [agg-fn (get agg-fns measure-key default-agg-fn)
new-v (get new-values measure-key)]
(let [agg-fn-key (get agg-fns measure-key :total)
new-v (get new-values measure-key)]
[measure-key (if new-v
(agg-fn agg new-v)
(let [agg-fn (measure->agg-fn agg-fn-key)]
(agg-fn agg new-v))
agg)])))
measure-aggregations))
......@@ -144,31 +177,61 @@
total-fn* (fn [m path]
(if (seq path)
(update-in m path
#(update-aggregate (or % (zipmap pivot-measures (repeat 0))) measure-vals measures))
#(update-aggregate (or % (zipmap pivot-measures (repeat {}))) measure-vals measures))
m))
total-fn (fn [m paths]
(reduce total-fn* m paths))]
(-> pivot
(update :row-count (fn [v] (if v (inc v) 0)))
(update :data update-in (concat row-path col-path)
#(update-aggregate (or % (zipmap pivot-measures (repeat 0))) measure-vals measures))
#(update-aggregate (or % (zipmap pivot-measures (repeat {}))) measure-vals measures))
(update :totals (fn [totals]
(-> totals
(total-fn [[:grand-total]])
(total-fn [row-path])
(total-fn [col-path])
(total-fn [[:section-totals (first row-path)]])
#_(total-fn [(concat [:column-totals (first row-path)] col-path)])
(total-fn [[:grand-total]
row-path
col-path
[:section-totals (first row-path)]])
(total-fn (map (fn [part]
(concat [:column-totals] part col-path))
;; here, the `:rows-part` and `:cols-part` keys exist to
;; force paths into the :totals map to be unique.
;; without this, it is possible that a path is already written to
;; if a pivot-col value by chance happens to be the same number
;; of an idx into the row, such as a product ID of 4 matching
;; the pivot-measure idx of 4 if 2 pivot-rows and 1 pivot-col are configured.
;; Previously, in such a case, the measure map (the second deepest 'nesting')
;; can be erroneously accessed when later aggregating
;; to try illustrate, let's say that earlier, these 2 steps occurred:
;; `(assoc-in totals-map [:column-totals "RowA"] {4 {:result 1}})`
;; `(assoc-in totals-map [:column-totals "RowA" 3] {4 {:result 1}})`
;; the result will look like:
;; {:column-totals {"RowA" {4 {:result 1}
;; 3 {4 {:result 1}}}}}
;; Now, you're attempting to (update-aggregate totals-map [:column-totals "RowA"])
;; but, you'll be operating on an unexpected map shape (the key 3 does not correspond to a measure)
;; This is why in issue #50207, when switching around the pivot-rows, things broke. It wasn't
;; the switching, but rather that the second pivot-row's values were IDs, thus the integer 4
;; was part of some totals paths, breaking aggregating in later steps.
(concat [:column-totals :rows-part] part [:cols-part] col-path))
(rest (reductions conj [] row-path)))))))
(update :row-values #(reduce-kv update-set % (select-keys row pivot-rows)))
(update :column-values #(reduce-kv update-set % (select-keys row pivot-cols))))))
(defn- fmt
"Format a value using the provided formatter or identity function."
[formatter value]
((or formatter identity) (common/format-value value)))
[formatter v-map]
(let [value (if (map? v-map)
(or (:result v-map)
(when (contains? v-map :total)
(/ (double (:total v-map)) (:count v-map)))
(:min v-map)
(:max v-map)
(seq v-map))
v-map)]
(when value
((or formatter identity) (common/format-value value)))))
(defn- build-column-headers
"Build multi-level column headers."
......@@ -204,9 +267,13 @@
(let [row-path (vec row-combo)
row-data (get-in data row-path)
measure-values (vec
(for [measure-key pivot-measures
:let [formatter (get ordered-formatters measure-key)]
col-combo col-combos]
;; we need to lead with col-combo here so that each row will alternate
;; between all of the measures, rather than have all measures of one kind
;; bunched together. That is, if you have a table with `count` and `avg`
;; the row must show count-val, avg-val, count-val, avg-val ... etc
(for [col-combo col-combos
measure-key pivot-measures
:let [formatter (get ordered-formatters measure-key)]]
(fmt formatter
(as-> row-data m
(reduce get m col-combo)
......@@ -220,17 +287,20 @@
(for [measure-key pivot-measures]
(fmt (get ordered-formatters measure-key)
(get-in totals (concat row-path [measure-key]))))))))))
(defn- build-column-totals
"Build column totals for a section."
[section-path col-combos pivot-measures totals row-totals? ordered-formatters pivot-rows]
(let [totals-row (for [col-combo col-combos
measure-key pivot-measures]
measure-key pivot-measures
:let [subtotal-path (concat
[:column-totals :rows-part]
section-path
[:cols-part]
col-combo
[measure-key])]]
(fmt (get ordered-formatters measure-key)
(get-in totals (concat
[:column-totals]
section-path
col-combo
[measure-key]))))]
(get-in totals subtotal-path)))]
(when (some #(and (some? %) (not= "" %)) totals-row)
(concat
(cons (format "Totals for %s" (fmt (get ordered-formatters (first pivot-rows)) (last section-path)))
......
(ns metabase.query-processor.streaming.csv
(:require
[clojure.data.csv]
[clojure.string :as str]
[java-time.api :as t]
[medley.core :as m]
[metabase.formatter :as formatter]
......@@ -9,6 +10,7 @@
[metabase.query-processor.pivot.postprocess :as qp.pivot.postprocess]
[metabase.query-processor.streaming.common :as common]
[metabase.query-processor.streaming.interface :as qp.si]
[metabase.util :as u]
[metabase.util.date-2 :as u.date]
[metabase.util.performance :as perf])
(:import
......@@ -70,6 +72,18 @@
string))
(when must-quote (.write writer "\"")))))
(defn- col->aggregation-fn-key
[{agg-name :name source :source}]
(when (= :aggregation source)
(let [agg-name (u/lower-case-en agg-name)]
(cond
(str/starts-with? agg-name "sum") :sum
(str/starts-with? agg-name "avg") :avg
(str/starts-with? agg-name "min") :min
(str/starts-with? agg-name "max") :max
(str/starts-with? agg-name "count") :count
(str/starts-with? agg-name "stddev") :stddev))))
(defmethod qp.si/streaming-results-writer :csv
[_ ^OutputStream os]
(let [writer (BufferedWriter. (OutputStreamWriter. os StandardCharsets/UTF_8))
......@@ -82,7 +96,8 @@
(let [col-names (vec (common/column-titles ordered-cols (::mb.viz/column-settings viz-settings) format-rows?))
opts (when (and pivot? pivot-export-options)
(-> (merge {:pivot-rows []
:pivot-cols []}
:pivot-cols []
:measures (mapv col->aggregation-fn-key ordered-cols)}
pivot-export-options)
(assoc :column-titles col-names)
(qp.pivot.postprocess/add-totals-settings viz-settings)
......
......@@ -266,7 +266,7 @@
:visualization_settings {:pivot_table.column_split
{:rows ["CATEGORY"]
:columns ["CREATED_AT"]
:values ["sum"]}
:values ["sum" "avg"]}
:column_settings
{"[\"name\",\"sum\"]" {:number_style "currency"
:currency_in_header false}}}
......@@ -336,6 +336,62 @@
((fn [m] (update-vals m #(into #{} (mapv first %)))))
(apply concat))))))))))
(deftest simple-pivot-with-sum-and-average-export-test
(testing "Pivot table exports look pivoted and can have multiple measures aggregated properly."
(mt/dataset test-data
(mt/with-temp [:model/Card card
{:display :pivot
:visualization_settings {:pivot_table.column_split
{:rows ["CATEGORY"]
:columns ["CREATED_AT"]
:values ["sum" "avg"]}
:column_settings
{"[\"name\",\"sum\"]" {:number_style "currency"
:currency_in_header false}}}
:dataset_query {:database (mt/id)
:type :query
:query
{:source-table (mt/id :products)
:aggregation [[:sum [:field (mt/id :products :price) {:base-type :type/Float}]]
[:avg [:field (mt/id :products :price) {:base-type :type/Float}]]]
:breakout [[:field (mt/id :products :category) {:base-type :type/Text}]
[:field (mt/id :products :created_at) {:base-type :type/DateTime :temporal-unit :year}]]}}}]
(testing "formatted"
(is (= [[["Category" "2016" "2016" "2017" "2017" "2018" "2018" "2019" "2019" "Row totals" "Row totals"]
["Category"
"Sum of Price"
"Average of Price"
"Sum of Price"
"Average of Price"
"Sum of Price"
"Average of Price"
"Sum of Price"
"Average of Price"
""
""]
["Doohickey" "$632.14" "48.63" "$854.19" "50.25" "$496.43" "62.05" "$203.13" "50.78" "$2,185.89" "52.93"]
["Gadget" "$679.83" "52.29" "$1,059.11" "55.74" "$844.51" "60.32" "$435.75" "62.25" "$3,019.20" "57.65"]
["Gizmo" "$529.70" "58.86" "$1,080.18" "51.44" "$997.94" "58.7" "$227.06" "56.77" "$2,834.88" "56.44"]
["Widget" "$987.39" "51.97" "$1,014.68" "56.37" "$912.20" "65.16" "$195.04" "65.01" "$3,109.31" "59.63"]
["Grand totals"
"$2,829.06"
"52.94"
"$4,008.16"
"53.45"
"$3,251.08"
"61.56"
"$1,060.98"
"58.7"
"$11,149.28"
"56.66"]]
#{:unsaved-card-download :card-download :dashcard-download
:alert-attachment :subscription-attachment
:public-question-download :public-dashcard-download}]
(->> (all-outputs! card {:export-format :csv :format-rows true :pivot true})
(group-by second)
((fn [m] (update-vals m #(into #{} (mapv first %)))))
(apply concat)))))))))
(deftest simple-pivot-export-row-col-totals-test
(testing "Pivot table csv exports respect row/column totals viz-settings"
(doseq [row-totals? [#_true false]
......@@ -516,6 +572,53 @@
"" ""]]
(take 2 result))))))))
(deftest ^:parallel pivot-export-aggregations-test
(testing "Row and Column Values that collide with indices don't break (#50207)"
(testing "Other aggregations will produce the correct values in Totals rows."
(let [pivot-rows-query "SELECT *
FROM (SELECT 4 AS A UNION ALL SELECT 3)
CROSS JOIN (SELECT 'BA' AS B)
CROSS JOIN (SELECT 3 AS C UNION ALL SELECT 4)
CROSS JOIN (SELECT 1 AS MEASURE)"]
(mt/dataset test-data
(mt/with-temp [:model/Card {pivot-data-card-id :id}
{:dataset_query {:database (mt/id)
:type :native
:native
{:template-tags {}
:query pivot-rows-query}}
:result_metadata
(into [] (for [[_ field-name {:keys [base-type]}] pivot-fields]
{:name field-name
:display_name field-name
:field_ref [:field field-name {:base-type base-type}]
:base_type base-type}))}
:model/Card pivot-card
{:display :pivot
:visualization_settings {:pivot_table.column_split
{:rows ["B" "C"]
:columns ["A"]
:values ["MEASURE"]}}
:dataset_query {:database (mt/id)
:type :query
:query
{:aggregation [[:sum [:field "MEASURE" {:base-type :type/Integer}]]]
:breakout
[[:field "A" {:base-type :type/Integer}]
[:field "B" {:base-type :type/Text}]
[:field "C" {:base-type :type/Integer}]]
:source-table (format "card__%s" pivot-data-card-id)}}}]
(let [result (card-download pivot-card {:export-format :csv :pivot true})]
(is
(= [["B" "C" "3" "4" "Row totals"]
["BA" "3" "1" "1" "2"]
["BA" "4" "1" "1" "2"]
;; Without the fix in pr#50380, this would incorrectly look like:
;; ["Totals for BA" "" "2" "[4 {:result 1}]"]
["Totals for BA" "" "2" "2"]
["Grand totals" "" "2" "2" "4"]]
result)))))))))
(deftest ^:parallel zero-column-pivot-tables-test
(testing "Pivot tables with zero columns download correctly."
(mt/dataset test-data
......
......@@ -40,13 +40,19 @@
;; Every row will contribute to the MEASURE somewhere, determined by
;; the values in each pivot-row and pivot-col index. For example,
;; given the pivot-config in this test, the row `["X" "Y" 0 1]` will end up adding
;; {"X" {"Y" {3 1}}}
;; {"X" {"Y" {3 {:result 1}}}}
;; the operation is essentially an assoc-in done per measure:
;; assoc-in path from rows cols and measures value from measure idx
;; `(assoc-in (concat pivot-rows pivot-cols pivot-measures) (get-in row measure-idx))`
(is (= {"aA" {"bA" {3 4} "bB" {3 1} "bC" {3 1} "bD" {3 1}}
"aB" {"bA" {3 1} "bB" {3 1} "bC" {3 1} "bD" {3 1}}}
(is (= {"aA" {"bA" {3 {:result 4}}
"bB" {3 {:result 1}}
"bC" {3 {:result 1}}
"bD" {3 {:result 1}}}
"aB" {"bA" {3 {:result 1}}
"bB" {3 {:result 1}}
"bC" {3 {:result 1}}
"bD" {3 {:result 1}}}}
(:data pivot-data)))
;; all distinct values encountered for each row/col index are stored
......@@ -59,14 +65,97 @@
;; since everything is aggregated as a row is added, we can store all of the
;; relevant totals right away and use them to construct the pivot table totals
;; if the user has specified them on (they're on by default and likely to be on most of the time)
(is (= {:grand-total {3 11}
:section-totals {"aA" {3 7} "aB" {3 4}} ;; section refers to the 'row totals' interspersed between each row-wise group
:column-totals {"aA" {"bA" {3 4} "bB" {3 1} "bC" {3 1} "bD" {3 1}}
"aB" {"bA" {3 1} "bB" {3 1} "bC" {3 1} "bD" {3 1}}}
"aA" {3 7}
"aB" {3 4}
"bC" {3 2}
"bB" {3 2}
"bD" {3 2}
"bA" {3 5}}
(is (= {:grand-total {3 {:result 11}}
:section-totals {"aA" {3 {:result 7}} "aB" {3 {:result 4}}} ;; section refers to the 'row totals' interspersed between each row-wise group
:column-totals {:rows-part
{"aA" {:cols-part {"bA" {3 {:result 4}}
"bB" {3 {:result 1}}
"bC" {3 {:result 1}}
"bD" {3 {:result 1}}}}
"aB" {:cols-part {"bA" {3 {:result 1}}
"bB" {3 {:result 1}}
"bC" {3 {:result 1}}
"bD" {3 {:result 1}}}}}}
"aA" {3 {:result 7}}
"aB" {3 {:result 4}}
"bC" {3 {:result 2}}
"bB" {3 {:result 2}}
"bD" {3 {:result 2}}
"bA" {3 {:result 5}}}
(:totals pivot-data)))))))
(deftest pivot-aggregation-no-collisions-test
(testing "The pivot aggregation datastructure stores values as expected"
(let [pivot-config {:pivot-rows [0 1]
:pivot-cols [2]
:column-titles ["A" "B" "pivot-grouping" "Count"]
:row-totals? true
:col-totals? true
:pivot-measures [4]
:pivot-grouping 3}
pivot (process/init-pivot pivot-config)
pivot-data (reduce process/add-row pivot [["aA" 11 1 0 1]
["aA" 10 2 0 1]
["aA" 9 3 0 1]
["aA" 8 4 0 1]
["aA" 7 5 0 1]
["aA" 6 6 0 1]
["aA" 5 7 0 1]
["aB" 4 8 0 1]
["aB" 3 9 0 1]
["aB" 2 10 0 1]
["aB" 1 11 0 1]])]
pivot-data)))
(deftest add-rows-and-totals-test
(testing "Adding Rows produces the correct entries in :totals without 'collisions' on any indices. (#50207)"
(let [build-row #'process/build-row
build-column-totals #'process/build-column-totals
pivot-config {:pivot-rows [0 1]
:pivot-cols [2]
:column-titles ["A" "B" "C" "pivot-grouping" "Sum of MEASURE"]
:row-totals? true
:col-totals? true
:pivot-measures [4]
:pivot-grouping 3}
pivot (process/init-pivot pivot-config)
rows [[3 "BA" 3 0 1]
[3 "BA" 4 0 2]
[4 "BA" 3 0 3]
[4 "BA" 4 0 4]]
pivot-data (reduce process/add-row pivot rows)]
(is (= {:data {3 {"BA" {3 {4 {:result 1}}}}},
:totals
{:grand-total {4 {:result 1}},
3 {"BA" {4 {:result 1}}},
:section-totals {3 {4 {:result 1}}},
:column-totals {:rows-part {3 {:cols-part {3 {4 {:result 1}}} "BA" {:cols-part {3 {4 {:result 1}}}}}}}}}
(select-keys (process/add-row pivot (first rows)) [:data :totals])))
(is (= {:data {3 {"BA" {3 {4 {:result 1}}, 4 {4 {:result 2}}}}},
:totals {:grand-total {4 {:result 3}},
3 {"BA" {4 {:result 3}}},
4 {4 {:result 2}},
:section-totals {3 {4 {:result 3}}},
:column-totals {:rows-part
{3 {:cols-part {3 {4 {:result 1}},
4 {4 {:result 2}}},
"BA" {:cols-part {3 {4 {:result 1}},
4 {4 {:result 2}}}}}}}}}
(select-keys (reduce process/add-row pivot (take 2 rows)) [:data :totals])))
(is (= [4 "BA" 3 4 7]
(build-row [4 "BA"]
[[3] [4]]
[4]
(:data pivot-data)
(:totals pivot-data)
true
(repeat 5 identity)
(repeat 2 identity))))
(is (= ["Totals for 4" nil 3 4 7]
(build-column-totals [4]
[[3] [4]]
[4]
(:totals pivot-data)
true
(repeat 5 identity)
(repeat 2 identity)))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment