Skip to content
Snippets Groups Projects
Unverified Commit 23595c12 authored by adam-james's avatar adam-james Committed by GitHub
Browse files

Pivot Exports/Downloads Should Not include pivot-grouping or 'Extra' Rows (#47832)


* Pivot Exports/Downloads Should Not include pivot-grouping or 'Extra' Rows

Fixes 46561

When downloading a pivot table's data, a column 'pivot-grouping' is included as well as 'extra' rows that correspond
to various totals in the table.

This PR removes the pivot-grouping column and any of these 'extra' rows from the downloads/exports.

* fix xlsx

* Fix csv

* fix json

* add test and fix some failing tests

* fmt

* Update src/metabase/query_processor/streaming/csv.clj

Co-authored-by: default avatarmetamben <103100869+metamben@users.noreply.github.com>

* static viz table renders filter out pivot-grouping

* address review feedback

* add some comments to try explain the changes

---------

Co-authored-by: default avatarmetamben <103100869+metamben@users.noreply.github.com>
parent 44806ac6
No related branches found
No related tags found
No related merge requests found
......@@ -151,12 +151,22 @@
(render-table color-selector 0 column-names-map contents))
([color-selector normalized-zero {:keys [col-names cols-for-color-lookup]} [header & rows]]
[:table {:style (style/style {:max-width "100%"
:white-space :nowrap
:border (str "1px solid " style/color-border)
:border-radius :6px
:width "1%"})
:cellpadding "0"
:cellspacing "0"}
(render-table-head (vec col-names) header)
(render-table-body (partial color/get-background-color color-selector) normalized-zero cols-for-color-lookup rows)]))
(let [pivot-grouping-idx (get (zipmap col-names (range)) "pivot-grouping")
col-names (cond->> col-names
pivot-grouping-idx (m/remove-nth pivot-grouping-idx))
header (cond-> header
pivot-grouping-idx (update :row #(m/remove-nth pivot-grouping-idx %)))
rows (cond->> rows
pivot-grouping-idx (keep (fn [row]
(let [group (:num-value (nth (:row row) pivot-grouping-idx))]
(when (= 0 group)
(update row :row #(m/remove-nth pivot-grouping-idx %)))))))]
[:table {:style (style/style {:max-width "100%"
:white-space :nowrap
:border (str "1px solid " style/color-border)
:border-radius :6px
:width "1%"})
:cellpadding "0"
:cellspacing "0"}
(render-table-head (vec col-names) header)
(render-table-body (partial color/get-background-color color-selector) normalized-zero cols-for-color-lookup rows)])))
......@@ -24,6 +24,11 @@
;; The 'pivot-grouping' is the giveaway. If you ever see that column, you know you're dealing with raw pivot rows.
(def NON_PIVOT_ROW_GROUP
"Pivot query results have a 'pivot-grouping' column. Rows whose pivot-grouping value is 0 are expected results.
Rows whose pivot-grouping values are greater than 0 represent subtotals, and should not be included in non-pivot result outputs."
0)
;; Most of the post processing functions use a 'pivot-spec' map.
(mr/def ::pivot-spec
[:map
......
......@@ -2,6 +2,7 @@
(:require
[clojure.data.csv]
[java-time.api :as t]
[medley.core :as m]
[metabase.formatter :as formatter]
[metabase.query-processor.pivot.postprocess :as qp.pivot.postprocess]
[metabase.query-processor.streaming.common :as common]
......@@ -73,15 +74,20 @@
(let [writer (BufferedWriter. (OutputStreamWriter. os StandardCharsets/UTF_8))
ordered-formatters (volatile! nil)
rows! (atom [])
pivot-options (atom nil)]
pivot-options (atom nil)
;; if we're processing results from a pivot query, there will be a column 'pivot-grouping' that we don't want to include
;; in the final results, so we get the idx into the row in order to remove it
pivot-grouping-idx (volatile! nil)]
(reify qp.si/StreamingResultsWriter
(begin! [_ {{:keys [ordered-cols results_timezone format-rows? pivot-export-options]
:or {format-rows? true}} :data} viz-settings]
(let [opts (when (and *pivot-export-post-processing-enabled* pivot-export-options)
(assoc pivot-export-options :column-titles (mapv :display_name ordered-cols)))
(let [opts (when (and *pivot-export-post-processing-enabled* pivot-export-options)
(assoc pivot-export-options :column-titles (mapv :display_name ordered-cols)))
;; col-names are created later when exporting a pivot table, so only create them if there are no pivot options
col-names (when-not opts (common/column-titles ordered-cols (::mb.viz/column-settings viz-settings) format-rows?))]
col-names (when-not opts (common/column-titles ordered-cols (::mb.viz/column-settings viz-settings) format-rows?))
pivot-grouping (qp.pivot.postprocess/pivot-grouping-key col-names)]
;; when pivot options exist, we want to save them to access later when processing the complete set of results for export.
(when pivot-grouping (vreset! pivot-grouping-idx pivot-grouping))
(when opts
(reset! pivot-options (merge {:pivot-rows []
:pivot-cols []} opts)))
......@@ -90,15 +96,18 @@
(mapv #(formatter/create-formatter results_timezone % viz-settings) ordered-cols)
(vec (repeat (count ordered-cols) identity))))
;; write the column names for non-pivot tables
(when col-names
(write-csv writer [col-names])
(when-not opts
(let [modified-row (cond->> col-names
@pivot-grouping-idx (m/remove-nth @pivot-grouping-idx))]
(write-csv writer [modified-row]))
(.flush writer))))
(write-row! [_ row _row-num _ {:keys [output-order]}]
(let [ordered-row (if output-order
(let [row-v (into [] row)]
(for [i output-order] (row-v i)))
row)
(let [ordered-row (vec
(if output-order
(let [row-v (into [] row)]
(for [i output-order] (row-v i)))
row))
xf-row (perf/mapv (fn [formatter r]
(formatter (common/format-value r)))
@ordered-formatters ordered-row)]
......@@ -106,9 +115,16 @@
;; if we're processing a pivot result, we don't write it out yet, just store it
;; so that we can post process the full set of results in finish!
(swap! rows! conj xf-row)
(do
(write-csv writer [xf-row])
(.flush writer)))))
(let [pivot-grouping-key @pivot-grouping-idx
group (get ordered-row pivot-grouping-key)
cleaned-row (cond->> xf-row
pivot-grouping-key (m/remove-nth pivot-grouping-key))]
;; when a pivot-grouping col exists, we check its group number. When it's zero,
;; we keep it, otherwise don't include it in the results as it's a row representing a subtotal of some kind
(when (or (= qp.pivot.postprocess/NON_PIVOT_ROW_GROUP group)
(not group))
(write-csv writer [cleaned-row])
(.flush writer))))))
(finish! [_ _]
;; TODO -- not sure we need to flush both
......
......@@ -6,7 +6,9 @@
[cheshire.factory :as json.factory]
[cheshire.generate :as json.generate]
[java-time.api :as t]
[medley.core :as m]
[metabase.formatter :as formatter]
[metabase.query-processor.pivot.postprocess :as qp.pivot.postprocess]
[metabase.query-processor.streaming.common :as common]
[metabase.query-processor.streaming.interface :as qp.si]
[metabase.shared.models.visualization-settings :as mb.viz]
......@@ -32,41 +34,58 @@
[_ ^OutputStream os]
(let [writer (BufferedWriter. (OutputStreamWriter. os StandardCharsets/UTF_8))
col-names (volatile! nil)
ordered-formatters (volatile! nil)]
ordered-formatters (volatile! nil)
;; if we're processing results from a pivot query, there will be a column 'pivot-grouping' that we don't want to include
;; in the final results, so we get the idx into the row in order to remove it
pivot-grouping-idx (volatile! nil)]
(reify qp.si/StreamingResultsWriter
(begin! [_ {{:keys [ordered-cols results_timezone format-rows?]
:or {format-rows? true}} :data} viz-settings]
;; TODO -- wouldn't it make more sense if the JSON downloads used `:name` preferentially? Seeing how JSON is
;; probably going to be parsed programmatically
(vreset! col-names (common/column-titles ordered-cols (::mb.viz/column-settings viz-settings) format-rows?))
(vreset! ordered-formatters
(if format-rows?
(mapv #(formatter/create-formatter results_timezone % viz-settings) ordered-cols)
(vec (repeat (count ordered-cols) identity))))
(.write writer "[\n"))
(let [cols (common/column-titles ordered-cols (::mb.viz/column-settings viz-settings) format-rows?)
pivot-grouping (qp.pivot.postprocess/pivot-grouping-key cols)]
(when pivot-grouping (vreset! pivot-grouping-idx pivot-grouping))
(let [names (cond->> cols
pivot-grouping (m/remove-nth pivot-grouping))]
(vreset! col-names names))
(vreset! ordered-formatters
(if format-rows?
(mapv #(formatter/create-formatter results_timezone % viz-settings) ordered-cols)
(vec (repeat (count ordered-cols) identity))))
(.write writer "[\n")))
(write-row! [_ row row-num _ {:keys [output-order]}]
(let [ordered-row (if output-order
(let [row-v (into [] row)]
(for [i output-order] (row-v i)))
row)]
(when-not (zero? row-num)
(.write writer ",\n"))
(json/generate-stream
(zipmap
@col-names
(map (fn [formatter r]
(let [ordered-row (vec
(if output-order
(let [row-v (into [] row)]
(for [i output-order] (row-v i)))
row))
pivot-grouping-key @pivot-grouping-idx
group (get ordered-row pivot-grouping-key)
cleaned-row (cond->> ordered-row
pivot-grouping-key (m/remove-nth pivot-grouping-key))]
;; when a pivot-grouping col exists, we check its group number. When it's zero,
;; we keep it, otherwise don't include it in the results as it's a row representing a subtotal of some kind
(when (or (= qp.pivot.postprocess/NON_PIVOT_ROW_GROUP group)
(not group))
(when-not (zero? row-num)
(.write writer ",\n"))
(json/generate-stream
(zipmap
@col-names
(map (fn [formatter r]
;; NOTE: Stringification of formatted values ensures consistency with what is shown in the
;; Metabase UI, especially numbers (e.g. percents, currencies, and rounding). However, this
;; does mean that all JSON values are strings. Any other strategy requires some level of
;; inference to know if we should or should not parse a string (or not stringify an object).
(let [res (formatter (common/format-value r))]
(if-some [num-str (:num-str res)]
num-str
res)))
@ordered-formatters ordered-row))
writer)
(.flush writer)))
(let [res (formatter (common/format-value r))]
(if-some [num-str (:num-str res)]
num-str
res)))
@ordered-formatters cleaned-row))
writer)
(.flush writer))))
(finish! [_ _]
(.write writer "\n]")
......
......@@ -616,22 +616,26 @@
(defmethod qp.si/streaming-results-writer :xlsx
[_ ^OutputStream os]
(let [workbook (SXSSFWorkbook.)
sheet (spreadsheet/add-sheet! workbook (tru "Query result"))
_ (set-no-style-custom-helper sheet)
data-format (. workbook createDataFormat)
cell-styles (volatile! nil)
typed-cell-styles (volatile! nil)
pivot-data! (atom {:rows []})]
(let [workbook (SXSSFWorkbook.)
sheet (spreadsheet/add-sheet! workbook (tru "Query result"))
_ (set-no-style-custom-helper sheet)
data-format (. workbook createDataFormat)
cell-styles (volatile! nil)
typed-cell-styles (volatile! nil)
pivot-data! (atom {:rows []})
;; if we're processing results from a pivot query, there will be a column 'pivot-grouping' that we don't want to include
;; in the final results, so we get the idx into the row in order to remove it
pivot-grouping-idx (volatile! nil)]
(reify qp.si/StreamingResultsWriter
(begin! [_ {{:keys [ordered-cols format-rows? pivot-export-options]} :data}
{col-settings ::mb.viz/column-settings :as viz-settings}]
(let [opts (when (and *pivot-export-post-processing-enabled* pivot-export-options)
(pivot-opts->pivot-spec (merge {:pivot-cols []
:pivot-rows []}
pivot-export-options) ordered-cols))
;; col-names are created later when exporting a pivot table, so only create them if there are no pivot options
col-names (when-not opts (common/column-titles ordered-cols (::mb.viz/column-settings viz-settings) format-rows?))]
(let [opts (when (and *pivot-export-post-processing-enabled* pivot-export-options)
(pivot-opts->pivot-spec (merge {:pivot-cols []
:pivot-rows []}
pivot-export-options) ordered-cols))
col-names (common/column-titles ordered-cols (::mb.viz/column-settings viz-settings) format-rows?)
pivot-grouping (qp.pivot.postprocess/pivot-grouping-key col-names)]
(when pivot-grouping (vreset! pivot-grouping-idx pivot-grouping))
(vreset! cell-styles (compute-column-cell-styles workbook data-format viz-settings ordered-cols))
(vreset! typed-cell-styles (compute-typed-cell-styles workbook data-format))
;; when pivot options exist, we want to save them to access later when processing the complete set of results for export.
......@@ -642,41 +646,49 @@
:viz-settings viz-settings}
:pivot-options opts))
(when col-names
(when-not opts
(doseq [i (range (count ordered-cols))]
(.trackColumnForAutoSizing ^SXSSFSheet sheet i))
(setup-header-row! sheet (count ordered-cols))
(spreadsheet/add-row! sheet (common/column-titles ordered-cols col-settings true)))))
(let [modified-row (cond->> (common/column-titles ordered-cols (::mb.viz/column-settings viz-settings) true)
@pivot-grouping-idx (m/remove-nth @pivot-grouping-idx))]
(spreadsheet/add-row! sheet modified-row)))))
(write-row! [_ row row-num ordered-cols {:keys [output-order] :as viz-settings}]
(let [ordered-row (if output-order
(let [row-v (into [] row)]
(for [i output-order] (row-v i)))
row)
(let [ordered-row (vec (if output-order
(let [row-v (into [] row)]
(for [i output-order] (row-v i)))
row))
col-settings (::mb.viz/column-settings viz-settings)
{:keys [pivot-options]} @pivot-data!]
(if pivot-options
(let [{:keys [pivot-grouping-key]} pivot-options
group (get row pivot-grouping-key)]
(when (= 0 group)
;; TODO: right now, the way I'm building up the native pivot,
;; I end up using the docjure set-cell! (since I create a whole sheet with all the rows at once)
;; I'll want to change that so I can use the set-cell! method we have in this ns, but for now just string everything.
(let [modified-row (->> (vec (m/remove-nth pivot-grouping-key row))
(mapv (fn [value]
(if (number? value)
value
(str value)))))]
(swap! pivot-data! update :rows conj modified-row))))
(do
(add-row! sheet ordered-row ordered-cols col-settings @cell-styles @typed-cell-styles)
(when (= (inc row-num) *auto-sizing-threshold*)
(autosize-columns! sheet))))))
{:keys [pivot-options]} @pivot-data!
pivot-grouping-key @pivot-grouping-idx
group (get ordered-row pivot-grouping-key)
cleaned-row (cond->> ordered-row
pivot-grouping-key (m/remove-nth pivot-grouping-key))]
;; when a pivot-grouping col exists, we check its group number. When it's zero,
;; we keep it, otherwise don't include it in the results as it's a row representing a subtotal of some kind
(when (or (= qp.pivot.postprocess/NON_PIVOT_ROW_GROUP group)
(not group))
(if pivot-options
;; TODO: right now, the way I'm building up the native pivot,
;; I end up using the docjure set-cell! (since I create a whole sheet with all the rows at once)
;; I'll want to change that so I can use the set-cell! method we have in this ns, but for now just string everything.
(let [modified-row (mapv (fn [value]
(if (number? value)
value
(str value)))
cleaned-row)]
(swap! pivot-data! update :rows conj modified-row))
(do
(add-row! sheet cleaned-row ordered-cols col-settings @cell-styles @typed-cell-styles)
(when (= (inc row-num) *auto-sizing-threshold*)
(autosize-columns! sheet)))))))
(finish! [_ {:keys [row_count]}]
(let [{:keys [pivot-options rows cell-style-data]} @pivot-data!]
(let [{:keys [pivot-options rows cell-style-data]} @pivot-data!
pivot-grouping-key @pivot-grouping-idx]
(if pivot-options
(let [header (vec (m/remove-nth (:pivot-grouping-key pivot-options) (:column-titles pivot-options)))
(let [header (vec (m/remove-nth pivot-grouping-key (:column-titles pivot-options)))
wb (native-pivot (concat [header] rows) pivot-options cell-style-data)]
(try
(spreadsheet/save-workbook-into-stream! os wb)
......
......@@ -42,12 +42,20 @@
(->> (spreadsheet/cell-seq r)
(mapv read-cell-with-formatting)))))))
(defn- tabulate-maps
[result]
(let [ks (keys (first result))]
(cons
(mapv name ks)
(map #(mapv % ks) result))))
(defn- process-results
[export-format results]
(when (seq results)
(case export-format
:csv (csv/read-csv results)
:xlsx (read-xlsx results))))
:xlsx (read-xlsx results)
:json (tabulate-maps results))))
(defn- card-download
[{:keys [id] :as _card} export-format format-rows?]
......@@ -533,12 +541,12 @@
[:field (mt/id :products :created_at) {:base-type :type/DateTime :temporal-unit :month}]]}}}]
(let [result (->> (mt/user-http-request :crowberto :post 200 (format "card/%d/query/csv?format_rows=false" pivot-card-id))
csv/read-csv)]
(is (= [["Category" "Created At" "pivot-grouping" "Sum of Price"]
["Doohickey" "2016-05-01T00:00:00Z" "0" "144.12"]
["Doohickey" "2016-06-01T00:00:00Z" "0" "82.92"]
["Doohickey" "2016-07-01T00:00:00Z" "0" "78.22"]
["Doohickey" "2016-08-01T00:00:00Z" "0" "71.09"]
["Doohickey" "2016-09-01T00:00:00Z" "0" "45.65"]]
(is (= [["Category" "Created At" "Sum of Price"]
["Doohickey" "2016-05-01T00:00:00Z" "144.12"]
["Doohickey" "2016-06-01T00:00:00Z" "82.92"]
["Doohickey" "2016-07-01T00:00:00Z" "78.22"]
["Doohickey" "2016-08-01T00:00:00Z" "71.09"]
["Doohickey" "2016-09-01T00:00:00Z" "45.65"]]
(take 6 result)))))))
(testing "for xlsx"
(mt/dataset test-data
......@@ -562,12 +570,11 @@
(mapv (fn [row] (->> (spreadsheet/cell-seq row)
(mapv spreadsheet/read-cell)))))]
data))]
(is (= [["Category" "pivot-grouping" "Sum of Price"]
["Doohickey" 0.0 2185.89]
["Gadget" 0.0 3019.2]
["Gizmo" 0.0 2834.88]
["Widget" 0.0 3109.31]
[nil 1.0 11149.28]]
(is (= [["Category" "Sum of Price"]
["Doohickey" 2185.89]
["Gadget" 3019.2]
["Gizmo" 2834.88]
["Widget" 3109.31]]
(take 6 data)))))))))
(deftest ^:parallel dashcard-viz-settings-downloads-test
......@@ -783,3 +790,27 @@
(is (true? (str/includes? results-string "Test Exception")))
(testing (format "String \"%s\" is not in the error message." illegal)
(is (false? (str/includes? results-string illegal)))))))))))))
(deftest unpivoted-pivot-results-do-not-include-pivot-grouping
(testing "If a pivot question is downloaded or exported unpivoted, the results do not include 'pivot-grouping' column"
(doseq [export-format ["csv" "xlsx" "json"]]
(testing (format "for %s" export-format)
(mt/dataset test-data
(mt/with-temp [:model/Card {pivot-card-id :id}
{:display :pivot
:visualization_settings {:pivot_table.column_split
{:rows []
:columns [[:field (mt/id :products :category) {:base-type :type/Text}]]
:values [[:aggregation 0]]}}
:dataset_query {:database (mt/id)
:type :query
:query
{:source-table (mt/id :products)
:aggregation [[:sum [:field (mt/id :products :price) {:base-type :type/Float}]]]
:breakout [[:field (mt/id :products :category) {:base-type :type/Text}]]}}}]
(let [result (mt/user-http-request :crowberto :post 200
(format "card/%d/query/%s?format_rows=false" pivot-card-id export-format)
{})
data (process-results (keyword export-format) result)]
(is (= ["Category" "Sum of Price"]
(first data))))))))))
......@@ -974,3 +974,28 @@
(map vector
(range)
(map :content (take 20 card-row-els)))))))))))))
(deftest table-renders-excludes-pivot-grouping
(testing "Rendered Tables respect the provided viz-settings on the dashcard."
(mt/dataset test-data
(mt/with-temp [:model/Card {card-id :id}
{:display :pivot
:visualization_settings {:pivot_table.column_split
{:rows [[:field (mt/id :products :category) {:base-type :type/Text}]]
:columns [[:field (mt/id :products :created_at) {:base-type :type/DateTime :temporal-unit :year}]]
:values [[:aggregation 0]]}
:column_settings
{"[\"name\",\"sum\"]" {:number_style "currency"
:currency_in_header false}}}
:dataset_query {:database (mt/id)
:type :query
:query
{:source-table (mt/id :products)
:aggregation [[:sum [:field (mt/id :products :price) {:base-type :type/Float}]]]
:breakout [[:field (mt/id :products :category) {:base-type :type/Text}]
[:field (mt/id :products :created_at) {:base-type :type/DateTime :temporal-unit :year}]]}}}]
(mt/with-current-user (mt/user->id :rasta)
(let [card-doc (render.tu/render-pivot-card-as-hickory! card-id)
card-header-els (hik.s/select (hik.s/tag :th) card-doc)]
(is (= ["Category" "Created At" "Sum of Price"]
(mapv (comp first :content) card-header-els)))))))))
......@@ -15,6 +15,7 @@
[metabase.pulse.util :as pu]
[metabase.query-processor :as qp]
[metabase.query-processor.card :as qp.card]
[metabase.query-processor.pivot :as qp.pivot]
[toucan2.core :as t2])
(:import
(org.apache.batik.anim.dom SVGOMDocument AbstractElement$ExtendedNamedNodeHashMap)
......@@ -152,6 +153,26 @@
hik/parse
hik/as-hickory)))))
(defn render-pivot-card-as-hickory!
"Render the card with `card-id` using the pivot qp and the static-viz rendering pipeline as a hickory data structure.
Redefines some internal rendering functions to keep svg from being rendered into a png. Functions from `hickory.select`
can be used on the output of this function and are particularly useful for writing test assertions."
[card-id]
(let [{:keys [visualization_settings] :as card} (t2/select-one :model/Card :id card-id)
query (qp.card/query-for-card card [] nil {:process-viz-settings? true} nil)
results (qp.pivot/run-pivot-query (assoc query :viz-settings visualization_settings))]
(with-redefs [js-svg/svg-string->bytes identity
image-bundle/make-image-bundle (fn [_ s]
{:image-src s
:render-type :inline})]
(let [content (-> (render/render-pulse-card :inline "UTC" card nil results)
:content)]
(-> content
(edit-nodes img-node-with-svg? img-node->svg-node) ;; replace the :img tag with its parsed SVG.
hiccup/html
hik/parse
hik/as-hickory)))))
(defn render-dashcard-as-hickory!
"Render the dashcard with `dashcard-id` using the static-viz rendering pipeline as a hickory data structure. Redefines
some internal rendering functions to keep svg from being rendered into a png. Functions from `hickory.select` can be
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment