Skip to content
Snippets Groups Projects
Unverified Commit fd8b5544 authored by Cam Saul's avatar Cam Saul Committed by GitHub
Browse files

Fix exports not being in report timezone :calendar: (#15415)

* Bump a few dep versions and add missing exclusions

* Fix export timezones; fix Excel time values

* Text fixes & other improvements

* Test fix :wrench:

* Move some XLSX/CSV specific tests into appropriate test namespaces

* Fix SQLite parsing of date-only values

* address PR feedback
parent b5d88ecc
Branches
Tags
No related merge requests found
Showing
with 421 additions and 328 deletions
......@@ -137,7 +137,8 @@
attrs))
(defn- base64-decode [s]
(codecs/bytes->str (codec/base64-decode s)))
(when (u/base64-string? s)
(codecs/bytes->str (codec/base64-decode s))))
(defmethod sso/sso-post :saml
;; Does the verification of the IDP's response and 'logs the user in'. The attributes are available in the response:
......
......@@ -12,7 +12,6 @@
[metabase.driver.sql-jdbc.sync :as sql-jdbc.sync]
[metabase.driver.sql.parameters.substitution :as params.substitution]
[metabase.driver.sql.query-processor :as sql.qp]
[metabase.query-processor.timezone :as qp.timezone]
[metabase.util.date-2 :as u.date]
[metabase.util.honeysql-extensions :as hx]
[schema.core :as s])
......@@ -369,4 +368,4 @@
(t/local-date t))
(catch Throwable _
(when-let [s (.getString rs i)]
(u.date/parse s (qp.timezone/results-timezone-id)))))))
(u.date/parse s))))))
......@@ -66,7 +66,9 @@
[aleph "0.4.6" :exclusions [org.clojure/tools.logging]] ; Async HTTP library; WebSockets
[bigml/histogram "4.1.3"] ; Histogram data structure
[buddy/buddy-core "1.5.0" ; various cryptograhpic functions
:exclusions [commons-codec]]
:exclusions [commons-codec
org.bouncycastle/bcprov-jdk15on
org.bouncycastle/bcpkix-jdk15on]]
[buddy/buddy-sign "3.0.0"] ; JSON Web Tokens; High-Level message signing library
[cheshire "5.10.0"] ; fast JSON encoding (used by Ring JSON middleware)
[clj-http "3.10.3" ; HTTP client
......@@ -91,15 +93,16 @@
[com.google.guava/guava "28.2-jre"] ; dep for BigQuery, Spark, and GA. Require here rather than letting different dep versions stomp on each other — see comments on #9697
[com.h2database/h2 "1.4.197"] ; embedded SQL database
[com.taoensso/nippy "2.14.0"] ; Fast serialization (i.e., GZIP) library for Clojure
[commons-codec/commons-codec "1.14"] ; Apache Commons -- useful codec util fns
[commons-io/commons-io "2.6"] ; Apache Commons -- useful IO util fns
[commons-codec/commons-codec "1.15"] ; Apache Commons -- useful codec util fns
[commons-io/commons-io "2.8.0"] ; Apache Commons -- useful IO util fns
[commons-validator/commons-validator "1.6" ; Apache Commons -- useful validation util fns
:exclusions [commons-beanutils
commons-digester
commons-logging]]
[compojure "1.6.1" :exclusions [ring/ring-codec]] ; HTTP Routing library built on Ring
[crypto-random "1.2.0"] ; library for generating cryptographically secure random bytes and strings
[dk.ative/docjure "1.13.0"] ; Excel export
[dk.ative/docjure "1.14.0" :exclusions [org.apache.poi/poi
org.apache.poi/poi-ooxml]] ; excel export
[environ "1.2.0"] ; easy environment management
[hiccup "1.0.5"] ; HTML templating
[honeysql "0.9.5" :exclusions [org.clojure/clojurescript]] ; Transform Clojure data structures to SQL
......@@ -110,7 +113,8 @@
org.yaml/snakeyaml]]
[javax.xml.bind/jaxb-api "2.4.0-b180830.0359"] ; add the `javax.xml.bind` classes which we're still using but were removed in Java 11
[kixi/stats "0.4.4" :exclusions [org.clojure/data.avl]] ; Various statistic measures implemented as transducers
[me.raynes/fs "1.4.6"] ; Filesystem tools
[me.raynes/fs "1.4.6" ; Filesystem tools
:exclusions [org.apache.commons/commons-compress]]
[medley "1.3.0"] ; lightweight lib of useful functions
[metabase/connection-pool "1.1.1"] ; simple wrapper around C3P0. JDBC connection pools
[metabase/saml20-clj "2.0.0"] ; EE SAML integration
......@@ -118,6 +122,7 @@
[net.redhogs.cronparser/cron-parser-core "3.4" ; describe Cron schedule in human-readable language
:exclusions [org.slf4j/slf4j-api joda-time]] ; exclude joda time 2.3 which has outdated timezone information
[net.sf.cssbox/cssbox "4.12" :exclusions [org.slf4j/slf4j-api]] ; HTML / CSS rendering
[org.apache.commons/commons-compress "1.20"] ; compression utils
[org.apache.commons/commons-lang3 "3.10"] ; helper methods for working with java.lang stuff
[org.apache.logging.log4j/log4j-api "2.13.3"] ; apache logging framework
[org.apache.logging.log4j/log4j-1.2-api "2.13.3"] ; add compatibility with log4j 1.2
......@@ -125,8 +130,13 @@
[org.apache.logging.log4j/log4j-jcl "2.13.3"] ; allows the commons-logging API to work with log4j 2
[org.apache.logging.log4j/log4j-liquibase "2.13.3"] ; liquibase logging via log4j 2
[org.apache.logging.log4j/log4j-slf4j-impl "2.13.3"] ; allows the slf4j API to work with log4j 2
[org.apache.poi/poi "5.0.0"] ; Work with Office documents (e.g. Excel spreadsheets) -- newer version than one specified by Docjure
[org.apache.poi/poi-ooxml "5.0.0"
:exclusions [org.bouncycastle/bcprov-jdk15on
org.bouncycastle/bcpkix-jdk15on]]
[org.apache.sshd/sshd-core "2.4.0"] ; ssh tunneling and test server
[org.bouncycastle/bcprov-jdk15on "1.65"] ; Bouncy Castle crypto library -- explicit version of BC specified to resolve illegal reflective access errors
[org.bouncycastle/bcprov-jdk15on "1.68"] ; Bouncy Castle crypto library -- explicit version of BC specified to resolve illegal reflective access errors
[org.bouncycastle/bcpkix-jdk15on "1.68"]
[org.clojars.pntblnk/clj-ldap "0.0.16"] ; LDAP client
[org.eclipse.jetty/jetty-server "9.4.32.v20200930"] ; We require JDK 8 which allows us to run Jetty 9.4, ring-jetty-adapter runs on 1.7 which forces an older version
[org.flatland/ordered "1.5.9"] ; ordered maps & sets
......@@ -143,7 +153,7 @@
[prismatic/schema "1.1.11"] ; Data schema declaration and validation library
[redux "0.1.4"] ; Utility functions for building and composing transducers
[riddley "0.2.0"] ; code walking lib -- used interally by Potemkin, manifold, etc.
[ring/ring-core "1.8.0"]
[ring/ring-core "1.8.1"]
[ring/ring-jetty-adapter "1.8.1"] ; Ring adapter using Jetty webserver (used to run a Ring server for unit tests)
[ring/ring-json "0.5.0"] ; Ring middleware for reading/writing JSON automatically
[stencil "0.5.0"] ; Mustache templates for Clojure
......
......@@ -59,7 +59,7 @@
"Execute a query and retrieve the results in the usual format."
[:as {{:keys [database], query-type :type, :as query} :body}]
{database (s/maybe s/Int)}
(run-query-async (update query :middleware assoc :js-int-to-string? true)))
(run-query-async (update-in query [:middleware :js-int-to-string?] (fnil identity true))))
;;; ----------------------------------- Downloading Query Results in Other Formats -----------------------------------
......
......@@ -32,15 +32,15 @@
;; so, short of turning all `:type/Integer` derived values into strings, this is the best approximation
;; of a fix that can be accomplished.
(let [field-indexes (keep-indexed
(fn [idx val]
(let [field-id (mbql.u/field-clause->id-or-literal val)]
(when-let [field (when (number? field-id)
(Field field-id))]
(when (and (or (isa? (:special_type field) :type/PK)
(isa? (:special_type field) :type/FK))
(isa? (:base_type field) :type/Integer))
idx))))
(:fields (:query query)))]
(fn [idx val]
(let [field-id (mbql.u/field-clause->id-or-literal val)]
(when-let [field (when (number? field-id)
(Field field-id))]
(when (and (or (isa? (:special_type field) :type/PK)
(isa? (:special_type field) :type/FK))
(isa? (:base_type field) :type/Integer))
idx))))
(:fields (:query query)))]
(qp query (if (and js-int-to-string? (seq field-indexes))
#(result-int->string field-indexes (rff %))
rff)
......
(ns metabase.query-processor.streaming.common
"Shared util fns for various export (download) streaming formats."
(:require [java-time :as t]
[metabase.util.date-2 :as u.date]))
[metabase.query-processor.store :as qp.store]
[metabase.query-processor.timezone :as qp.timezone]
[metabase.util.date-2 :as u.date])
(:import [java.time LocalDate LocalDateTime LocalTime OffsetDateTime OffsetTime ZonedDateTime]))
(defn in-result-time-zone
"Set the time zone of a temporal value `t` to result timezone without changing the actual moment in time. e.g.
;; if result timezone is `US/Pacific`
(apply-timezone #t \"2021-03-30T20:06:00Z\") -> #t \"2021-03-30T13:06:00-07:00\""
[t]
(u.date/with-time-zone-same-instant
t
(qp.store/cached ::results-timezone (t/zone-id (qp.timezone/results-timezone-id)))))
(defprotocol FormatValue
"Protocol for specifying how objects of various classes in QP result rows should be formatted in various download
......@@ -16,24 +29,28 @@
Object
(format-value [this] this)
java.time.temporal.Temporal
(format-value [this]
(u.date/format this))
LocalDate
(format-value [t]
(u.date/format t))
java.time.LocalDateTime
(format-value [this]
(u.date/format
(if (= (t/local-time this) (t/local-time 0))
(t/local-date this)
this)))
LocalDateTime
(format-value [t]
(if (= (t/local-time t) (t/local-time 0))
(format-value (t/local-date t))
(u.date/format t)))
java.time.OffsetDateTime
(format-value [this]
(u.date/format
(if (= (t/local-time this) (t/local-time 0))
(t/local-date this)
this)))
LocalTime
(format-value [t]
(u.date/format t))
java.time.ZonedDateTime
(format-value [this]
(format-value (t/offset-date-time this))))
OffsetTime
(format-value [t]
(u.date/format (in-result-time-zone t)))
OffsetDateTime
(format-value [t]
(u.date/format (in-result-time-zone t)))
ZonedDateTime
(format-value [t]
(format-value (t/offset-date-time t))))
......@@ -22,6 +22,8 @@
col-names (volatile! nil)]
(reify i/StreamingResultsWriter
(begin! [_ {{:keys [cols]} :data}]
;; TODO -- wouldn't it make more sense if the JSON downloads used `:name` preferentially? Seeing how JSON is
;; probably going to be parsed programatically
(vreset! col-names (mapv (some-fn :display_name :name) cols))
(.write writer "[\n"))
......
......@@ -2,17 +2,16 @@
(:require [cheshire.core :as json]
[dk.ative.docjure.spreadsheet :as spreadsheet]
[java-time :as t]
[metabase.plugins.classloader :as classloader]
[metabase.query-processor.error-type :as qp.error-type]
[metabase.query-processor.streaming.common :as common]
[metabase.query-processor.streaming.interface :as i]
[metabase.util.date-2 :as u.date]
[metabase.util.i18n :refer [tru]])
(:import java.io.OutputStream
[java.time LocalDate LocalDateTime OffsetDateTime ZonedDateTime]
[org.apache.poi.ss.usermodel Cell CellType Workbook]
[java.time LocalDate LocalDateTime LocalTime OffsetDateTime OffsetTime ZonedDateTime]
[org.apache.poi.ss.usermodel BuiltinFormats Cell CellType DateUtil Workbook]
org.apache.poi.xssf.streaming.SXSSFWorkbook))
(def ^:private ^:dynamic *results-timezone-id* "UTC")
(defmethod i/stream-options :xlsx
[_]
{:content-type "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
......@@ -21,77 +20,91 @@
:headers {"Content-Disposition" (format "attachment; filename=\"query_result_%s.xlsx\""
(u.date/format (t/zoned-date-time)))}})
;; docjure has an open issue about its formatting issues: https://github.com/mjul/docjure/issues/75
;; since this hasn't been addressed at the library level, we have to do some workarounds
;; this method is private in the docjure library, but we need to able to call it from here
;; also, the version in Docjure doesn't keep track of created cell styles, which is bad
;; these need to be memoized per Workbook
(def ^:dynamic *cell-styles*
"Holds the CellStyle values used within a spreadsheet so that they can be reused. Excel has a limit
of 64,000 cell styles in a single workbook."
nil)
(defn- create-or-get-date-format [^Workbook workbook ^String format-string]
(when-not (contains? @*cell-styles* format-string)
(let [new-val (let [format-helper (.getCreationHelper workbook)
date-style (.createCellStyle workbook)]
(assoc
@*cell-styles*
format-string
(doto date-style
(.setDataFormat (.. format-helper createDataFormat (getFormat format-string))))))]
(swap! *cell-styles* (constantly new-val))))
(get @*cell-styles* format-string))
;; the docjure library does not handle the difference between a date and date+time column
;; as a result, we'll add overrides that can do it
(intern 'dk.ative.docjure.spreadsheet 'create-date-format create-or-get-date-format)
(def ^:const date-format
"Standard date format for :type/Date objects"
"m/d/yy")
(def ^:const datetime-format
"Standard date/time format for any of the :type/Date variants with a Time"
"m/d/yy HH:MM:ss")
(defn- set-cell! [^Cell cell format-string date]
(when (= (.getCellType cell) CellType/FORMULA) (.setCellType cell CellType/NUMERIC))
(.setCellValue cell ^java.util.Date date)
(.setCellStyle cell (create-or-get-date-format (.. cell getSheet getWorkbook) format-string)))
(defn- apply-timezone [t]
(u.date/with-time-zone-same-instant t (t/zone-id *results-timezone-id*)))
;; Since we can only have a limited number of styles we'll enumerate the possible styles we'll want to use. Then we'll
;; create a bunch of delays for them that will create them if needed and bind this to `*cell-styles*`; we'll use them
;; as needed in various `set-cell!` method impls.
(def ^:private ^{:arglists '(^String [style-name])} cell-style->format-string
"Predefined cell format strings. These are used to get the corresponding predefined cell format number using
`BuiltinFormats/getBuiltinFormat`. See `(BuiltinFormats/getAll)` for all predefined formats"
{:date "m/d/yy"
:datetime "m/d/yy h:mm"
:time "h:mm:ss"})
(defn- cell-format
"Fetch the predefined cell format integer associated with `style-name`."
^Integer [style-name]
(or (some-> style-name cell-style->format-string BuiltinFormats/getBuiltinFormat)
(throw (ex-info (tru "Invalid cell format")
{:type qp.error-type/qp
:style-name style-name
:known-formats cell-style->format-string
:all-formats (BuiltinFormats/getAll)}))))
(defn- cell-style-delays
"Create a map of style name -> delay. This is bound to `*cell-styles*` by `streaming-results-writer`. Dereffing the
delay will create the style and add it to the workbook if needed."
[^Workbook workbook]
(into {} (for [style-name (keys cell-style->format-string)]
[style-name (delay
(doto (.createCellStyle workbook)
(.setDataFormat (cell-format style-name))))])))
(defn- cell-style
"Get the cell style associated with `style-name` by dereffing the delay in `*cell-styles*`."
^org.apache.poi.ss.usermodel.CellStyle [style-name]
(some-> style-name *cell-styles* deref))
;; Temporal values in Excel are just NUMERIC cells that are stored in a floating-point format and have some cell
;; styles applied that dictate how to format them
(defmethod spreadsheet/set-cell! LocalDate
[^Cell cell t]
(set-cell! cell date-format (t/java-date (apply-timezone t))))
[^Cell cell ^LocalDate t]
(.setCellValue cell t)
(.setCellType cell CellType/NUMERIC)
(.setCellStyle cell (cell-style :date)))
(defmethod spreadsheet/set-cell! LocalDateTime
[^Cell cell t]
(spreadsheet/set-cell! cell (t/java-date (apply-timezone t))))
[^Cell cell ^LocalDateTime t]
(.setCellValue cell t)
(.setCellType cell CellType/NUMERIC)
(.setCellStyle cell (cell-style :datetime)))
(defmethod spreadsheet/set-cell! ZonedDateTime
(defmethod spreadsheet/set-cell! LocalTime
[^Cell cell t]
(set-cell! cell datetime-format (t/java-date (apply-timezone t))))
;; there's no `.setCellValue` for a `LocalTime` -- but all the built-in impls for `LocalDate` and `LocalDateTime` do
;; anyway is convert the date(time) to an Excel datetime floating-point number and then set that.
;;
;; `DateUtil/convertTime` will convert a *time* string to an Excel number; after that we can set the numeric value
;; directly.
;;
;; See https://poi.apache.org/apidocs/4.1/org/apache/poi/ss/usermodel/DateUtil.html#convertTime-java.lang.String-
(.setCellValue cell (DateUtil/convertTime (u.date/format "HH:mm:ss" t)))
(.setCellType cell CellType/NUMERIC)
(.setCellStyle cell (cell-style :time)))
(defmethod spreadsheet/set-cell! OffsetTime
[cell t]
(spreadsheet/set-cell! cell (t/local-time (common/in-result-time-zone t))))
(defmethod spreadsheet/set-cell! OffsetDateTime
[^Cell cell t]
(set-cell! cell datetime-format (t/java-date (apply-timezone t))))
[cell t]
(spreadsheet/set-cell! cell (t/local-date-time (common/in-result-time-zone t))))
;; overrides the default implementation from docjure, so that a plain Date object
;; carries its time too
(defmethod spreadsheet/set-cell! java.util.Date
[^Cell cell val]
(set-cell! cell datetime-format val))
(defmethod spreadsheet/set-cell! ZonedDateTime
[cell t]
(spreadsheet/set-cell! cell (t/offset-date-time t)))
;; add a generic implementation for the method that writes values to XLSX cells that just piggybacks off the
;; implementations we've already defined for encoding things as JSON. These implementations live in
;; `metabase.server.middleware`.
(defmethod spreadsheet/set-cell! Object
[^Cell cell, value]
[^Cell cell value]
(when (= (.getCellType cell) CellType/FORMULA)
(.setCellType cell CellType/STRING))
;; stick the object in a JSON map and encode it, which will force conversion to a string. Then unparse that JSON and
......@@ -103,20 +116,15 @@
(defmethod i/streaming-results-writer :xlsx
[_ ^OutputStream os]
(let [workbook (SXSSFWorkbook.)
cell-styles (atom {})
sheet (spreadsheet/add-sheet! workbook (tru "Query result"))
results-timezone (atom nil)]
(let [workbook (SXSSFWorkbook.)
cell-styles (cell-style-delays workbook)
sheet (spreadsheet/add-sheet! workbook (tru "Query result"))]
(reify i/StreamingResultsWriter
(begin! [_ {{:keys [cols]} :data}]
;; avoid circular refs
(classloader/require 'metabase.query-processor.timezone)
(reset! results-timezone ((resolve 'metabase.query-processor.timezone/results-timezone-id)))
(spreadsheet/add-row! sheet (map (some-fn :display_name :name) cols)))
(write-row! [_ row _]
(binding [*cell-styles* cell-styles
*results-timezone-id* @results-timezone]
(binding [*cell-styles* cell-styles]
(spreadsheet/add-row! sheet row)))
(finish! [_ _]
......
......@@ -408,7 +408,7 @@
(p.types/defprotocol+ WithTimeZoneSameInstant
"Protocol for converting a temporal value to an equivalent one in a given timezone."
(with-time-zone-same-instant [t ^java.time.ZoneId zone-id]
(^{:style/indent 0} with-time-zone-same-instant [t ^java.time.ZoneId zone-id]
"Convert a temporal value to an equivalent one in a given timezone. For local temporal values, this simply
converts it to the corresponding offset/zoned type; for offset/zoned types, this applies an appropriate timezone
shift."))
......
(ns metabase.api.dataset-test
"Unit tests for /api/dataset endpoints."
"Unit tests for /api/dataset endpoints. There are additional tests for downloading XLSX/CSV/JSON results generally in
`metabase.query-processor.streaming-test` and specifically for each format in
`metabase.query-processor.streaming.csv-test` etc."
(:require [cheshire.core :as json]
[cheshire.generate :as generate]
[clojure.data.csv :as csv]
[clojure.string :as str]
[clojure.test :refer :all]
[dk.ative.docjure.spreadsheet :as spreadsheet]
[medley.core :as m]
[metabase.api.pivots :as pivots]
[metabase.http-client :as http-client]
......@@ -18,13 +18,11 @@
[metabase.query-processor.middleware.constraints :as constraints]
[metabase.query-processor.util :as qp-util]
[metabase.test :as mt]
[metabase.test.data.dataset-definitions :as defs]
[metabase.test.data.users :as test-users]
[metabase.test.fixtures :as fixtures]
[metabase.util :as u]
[schema.core :as s]
[toucan.db :as db])
(:import com.fasterxml.jackson.core.JsonGenerator))
[toucan.db :as db]))
(use-fixtures :once (fixtures/initialize :db))
......@@ -158,48 +156,6 @@
:dashboard_id nil}
(check-error-message (format-response (most-recent-query-execution-for-query query))))))))))
;;; Make sure that we're piggybacking off of the JSON encoding logic when encoding strange values in XLSX (#5145,
;;; #5220, #5459)
(defrecord ^:private SampleNastyClass [^String v])
(generate/add-encoder
SampleNastyClass
(fn [obj, ^JsonGenerator json-generator]
(.writeString json-generator (str (:v obj)))))
(defrecord ^:private AnotherNastyClass [^String v])
(deftest export-spreadsheet
(is (= [{"Values" "values"}
{"Values" "Hello XLSX World!"} ; should use the JSON encoding implementation for object
{"Values" "{:v \"No Encoder\"}"} ; fall back to the implementation of `str` for an object if no JSON encoder exists rather than barfing
{"Values" "ABC"}]
(->> (spreadsheet/create-workbook "Results" [["values"]
[(SampleNastyClass. "Hello XLSX World!")]
[(AnotherNastyClass. "No Encoder")]
["ABC"]])
(spreadsheet/select-sheet "Results")
(spreadsheet/select-columns {:A "Values"})))))
(defn- parse-and-sort-csv [response]
(assert (some? response))
(sort-by
;; ID in CSV is a string, parse it and sort it to get the first 5
(comp #(Integer/parseInt %) first)
;; First row is the header
(rest (csv/read-csv response))))
(deftest date-columns-should-be-emitted-without-time
(is (= [["1" "2014-04-07" "5" "12"]
["2" "2014-09-18" "1" "31"]
["3" "2014-09-15" "8" "56"]
["4" "2014-03-11" "5" "4"]
["5" "2013-05-05" "3" "49"]]
(let [result (mt/user-http-request :rasta :post 200 "dataset/csv" :query
(json/generate-string (mt/mbql-query checkins)))]
(take 5 (parse-and-sort-csv result))))))
(deftest download-response-headers-test
(testing "Make sure CSV/etc. download requests come back with the correct headers"
(is (= {"Cache-Control" "max-age=0, no-cache, must-revalidate, proxy-revalidate"
......@@ -215,39 +171,6 @@
(update "Content-Disposition" #(some-> % (str/replace #"query_result_.+(\.\w+)"
"query_result_<timestamp>$1"))))))))
(deftest check-an-empty-date-column
(mt/dataset defs/test-data-with-null-date-checkins
(let [result (mt/user-http-request :rasta :post 200 "dataset/csv" :query
(json/generate-string (mt/mbql-query checkins)))]
(is (= [["1" "2014-04-07" "" "5" "12"]
["2" "2014-09-18" "" "1" "31"]
["3" "2014-09-15" "" "8" "56"]
["4" "2014-03-11" "" "5" "4"]
["5" "2013-05-05" "" "3" "49"]]
(take 5 (parse-and-sort-csv result)))))))
(deftest sqlite-datetime-test
(mt/test-driver :sqlite
(testing "SQLite doesn't return proper date objects but strings, they just pass through the qp untouched"
(let [result (mt/user-http-request :rasta :post 200 "dataset/csv" :query
(json/generate-string (mt/mbql-query checkins {:order-by [[:asc $id]], :limit 5})))]
(is (= [["1" "2014-04-07" "5" "12"]
["2" "2014-09-18" "1" "31"]
["3" "2014-09-15" "8" "56"]
["4" "2014-03-11" "5" "4"]
["5" "2013-05-05" "3" "49"]]
(parse-and-sort-csv result)))))))
(deftest datetime-fields-are-untouched-when-exported
(let [result (mt/user-http-request :rasta :post 200 "dataset/csv" :query
(json/generate-string (mt/mbql-query users {:order-by [[:asc $id]], :limit 5})))]
(is (= [["1" "Plato Yeshua" "2014-04-01T08:30:00"]
["2" "Felipinho Asklepios" "2014-12-05T15:15:00"]
["3" "Kaneonuskatew Eiran" "2014-11-06T16:15:00"]
["4" "Simcha Yan" "2014-01-01T08:30:00"]
["5" "Quentin Sören" "2014-10-03T17:30:00"]]
(parse-and-sort-csv result)))))
(deftest check-that-we-can-export-the-results-of-a-nested-query
(mt/with-temp-copy-of-db
(mt/with-temp Card [card {:dataset_query {:database (mt/id)
......@@ -269,35 +192,36 @@
(perms/revoke-permissions! (group/all-users) (mt/db))
(do-test))))))
;; POST /api/dataset/:format
(deftest formatted-results-ignore-query-constraints
(testing "Downloading CSV/JSON/XLSX results shouldn't be subject to the default query constraints (#9831)"
;; even if the query comes in with `add-default-userland-constraints` (as will be the case if the query gets saved
(with-redefs [constraints/default-query-constraints {:max-results 10, :max-results-bare-rows 10}]
(let [result (mt/user-http-request :rasta :post 200 "dataset/csv"
:query (json/generate-string
{:database (mt/id)
:type :query
:query {:source-table (mt/id :venues)}
:middleware
{:add-default-userland-constraints? true
:userland-query? true}}))]
(is (some? result))
(when (some? result)
(is (= 101
(count (csv/read-csv result)))))))))
(testing "POST /api/dataset/:format"
(testing "Downloading CSV/JSON/XLSX results shouldn't be subject to the default query constraints (#9831)"
;; even if the query comes in with `add-default-userland-constraints` (as will be the case if the query gets saved
(with-redefs [constraints/default-query-constraints {:max-results 10, :max-results-bare-rows 10}]
(let [result (mt/user-http-request :rasta :post 200 "dataset/csv"
:query (json/generate-string
{:database (mt/id)
:type :query
:query {:source-table (mt/id :venues)}
:middleware
{:add-default-userland-constraints? true
:userland-query? true}}))]
(is (some? result))
(when (some? result)
(is (= 101
(count (csv/read-csv result))))))))))
;; non-"download" queries should still get the default constraints
;; (this also is a sanitiy check to make sure the `with-redefs` in the test above actually works)
(deftest non--download--queries-should-still-get-the-default-constraints
(with-redefs [constraints/default-query-constraints {:max-results 10, :max-results-bare-rows 10}]
(let [{row-count :row_count, :as result}
(mt/user-http-request :rasta :post 202 "dataset"
{:database (mt/id)
:type :query
:query {:source-table (mt/id :venues)}})]
(is (= 10
(or row-count result))))))
(testing (str "non-\"download\" queries should still get the default constraints "
"(this also is a sanitiy check to make sure the `with-redefs` in the test above actually works)")
(with-redefs [constraints/default-query-constraints {:max-results 10, :max-results-bare-rows 10}]
(let [{row-count :row_count, :as result}
(mt/user-http-request :rasta :post 202 "dataset"
{:database (mt/id)
:type :query
:query {:source-table (mt/id :venues)}})]
(is (= 10
(or row-count result)))))))
(deftest check-permissions-test
(testing "make sure `POST /dataset` calls check user permissions"
......
(ns metabase.query-processor.streaming.csv-test
(:require [cheshire.core :as json]
[clojure.data.csv :as csv]
[clojure.test :refer :all]
[metabase.test :as mt]
[metabase.test.data.dataset-definitions :as defs]))
(defn- parse-and-sort-csv [response]
(assert (some? response))
(sort-by
;; ID in CSV is a string, parse it and sort it to get the first 5
(comp #(Integer/parseInt %) first)
;; First row is the header
(rest (csv/read-csv response))))
(deftest date-columns-should-be-emitted-without-time
(is (= [["1" "2014-04-07" "5" "12"]
["2" "2014-09-18" "1" "31"]
["3" "2014-09-15" "8" "56"]
["4" "2014-03-11" "5" "4"]
["5" "2013-05-05" "3" "49"]]
(let [result (mt/user-http-request :rasta :post 200 "dataset/csv" :query
(json/generate-string (mt/mbql-query checkins)))]
(take 5 (parse-and-sort-csv result))))))
(deftest check-an-empty-date-column
(testing "NULL values should be written correctly"
(mt/dataset defs/test-data-with-null-date-checkins
(let [result (mt/user-http-request :rasta :post 200 "dataset/csv" :query
(json/generate-string (mt/mbql-query checkins {:order-by [[:asc $id]], :limit 5})))]
(is (= [["1" "2014-04-07" "" "5" "12"]
["2" "2014-09-18" "" "1" "31"]
["3" "2014-09-15" "" "8" "56"]
["4" "2014-03-11" "" "5" "4"]
["5" "2013-05-05" "" "3" "49"]]
(parse-and-sort-csv result)))))))
(deftest sqlite-datetime-test
(mt/test-driver :sqlite
(let [result (mt/user-http-request :rasta :post 200 "dataset/csv" :query
(json/generate-string (mt/mbql-query checkins {:order-by [[:asc $id]], :limit 5})))]
(is (= [["1" "2014-04-07" "5" "12"]
["2" "2014-09-18" "1" "31"]
["3" "2014-09-15" "8" "56"]
["4" "2014-03-11" "5" "4"]
["5" "2013-05-05" "3" "49"]]
(parse-and-sort-csv result))))))
(deftest datetime-fields-are-untouched-when-exported
(let [result (mt/user-http-request :rasta :post 200 "dataset/csv" :query
(json/generate-string (mt/mbql-query users {:order-by [[:asc $id]], :limit 5})))]
(is (= [["1" "Plato Yeshua" "2014-04-01T08:30:00"]
["2" "Felipinho Asklepios" "2014-12-05T15:15:00"]
["3" "Kaneonuskatew Eiran" "2014-11-06T16:15:00"]
["4" "Simcha Yan" "2014-01-01T08:30:00"]
["5" "Quentin Sören" "2014-10-03T17:30:00"]]
(parse-and-sort-csv result)))))
(ns metabase.query-processor.streaming.xlsx-test
"Additional tests for downloads (including XLSX) are in places like `metabase.query-processor.streaming-test`."
(:require [clojure.test :refer :all]
[dk.ative.docjure.spreadsheet :as spreadsheet]
[metabase.driver :as driver]
[metabase.query-processor :as qp]
[metabase.query-processor.streaming :as qp.streaming]
[metabase.query-processor.timezone :as qp.timezone]
[metabase.test :as mt]
[metabase.util.files :as u.files]))
(:require [cheshire.generate :as generate]
[clojure.test :refer :all]
[dk.ative.docjure.spreadsheet :as spreadsheet])
(:import com.fasterxml.jackson.core.JsonGenerator))
(defn- timestamps-in-xlsx-results
"Run a query that "
[]
(mt/dataset attempted-murders
(let [filename (str (u.files/get-path (System/getProperty "java.io.tmpdir") "out.xlsx"))
query (mt/mbql-query attempts
{:fields [$date $datetime $datetime_ltz $datetime_tz $datetime_tz_id $time $time_ltz $time_tz]
:order-by [[:asc $id]]
:limit 1})]
(with-open [os (java.io.FileOutputStream. filename)]
(is (= :completed
(:status (qp/process-query query
(qp.streaming/streaming-context :xlsx os))))))
(->> (spreadsheet/load-workbook filename)
(spreadsheet/select-sheet "Query result")
(spreadsheet/select-columns {:A :date
:B :datetime
:C :datetime-ltz
:D :datetime-tz
:E :datetime-tz-id
:F :time
:G :time-ltz
:H :time-tz})
second))))
(defrecord ^:private SampleNastyClass [^String v])
(deftest report-timezone-test
;; H2 has limited timezone support so we run these tests with both H2 and Postgres
(mt/test-drivers #{:h2 :postgres}
(testing "XLSX downloads should format stuff with the report timezone rather than UTC (#13677)\n"
(testing "Should be in UTC by default"
(is (= (merge
{:date "2019-11-01T00:00:00Z"
:datetime "2019-11-01T00:23:18.331Z"
:datetime-ltz "2019-11-01T07:23:18.331Z"
:datetime-tz "2019-11-01T07:23:18.331Z"
:datetime-tz-id "2019-11-01T07:23:18.331Z"}
(case driver/*driver*
:postgres {:time "00:23:18.331Z"
:time-ltz "07:23:18.331Z"
:time-tz "07:23:18.331Z"}
:h2 {:time "00:23:18Z"
:time-ltz "07:23:18Z"
:time-tz "07:23:18Z"}))
(timestamps-in-xlsx-results))))
(testing "If report timezone is set, results should be in that timezone"
;; by overriding this we can force the code that applies timezone shifts to act like H2 has full timezone
;; support
(binding [qp.timezone/*results-timezone-id-override* "US/Pacific"]
(is (= (merge
{:date "2019-11-01T00:00:00-07:00"
:datetime "2019-11-01T00:23:18.331-07:00"
:datetime-ltz "2019-11-01T00:23:18.331-07:00"
:datetime-tz "2019-11-01T00:23:18.331-07:00"
:datetime-tz-id "2019-11-01T00:23:18.331-07:00"}
(case driver/*driver*
:postgres {:time "00:23:18.331-08:00"
:time-ltz "23:23:18.331-08:00"
:time-tz "23:23:18.331-08:00"}
;; H2 doesn't have a TIME WITH TIME ZONE type so these columns are actually all LocalTimes.
:h2 {:time "00:23:18-08:00"
:time-ltz "07:23:18-08:00"
:time-tz "07:23:18-08:00"}))
(timestamps-in-xlsx-results))))))))
(generate/add-encoder
SampleNastyClass
(fn [obj, ^JsonGenerator json-generator]
(.writeString json-generator (str (:v obj)))))
(defrecord ^:private AnotherNastyClass [^String v])
(deftest encode-strange-classes-test
(testing (str "Make sure that we're piggybacking off of the JSON encoding logic when encoding strange values in "
"XLSX (#5145, #5220, #5459)")
(is (= [{"Values" "values"}
;; should use the JSON encoding implementation for object
{"Values" "Hello XLSX World!"}
;; fall back to the implementation of `str` for an object if no JSON encoder exists rather than barfing
{"Values" "{:v \"No Encoder\"}"}
{"Values" "ABC"}]
(->> (spreadsheet/create-workbook "Results" [["values"]
[(SampleNastyClass. "Hello XLSX World!")]
[(AnotherNastyClass. "No Encoder")]
["ABC"]])
(spreadsheet/select-sheet "Results")
(spreadsheet/select-columns {:A "Values"}))))))
(ns metabase.query-processor.streaming-test
(:require [cheshire.core :as json]
[clojure.core.async :as a]
[clojure.data.csv :as csv]
[clojure.test :refer :all]
[dk.ative.docjure.spreadsheet :as spreadsheet]
[medley.core :as m]
[metabase.async.streaming-response :as streaming-response]
[metabase.query-processor :as qp]
[metabase.query-processor.streaming :as qp.streaming]
[metabase.test :as mt]
[metabase.test.util :as tu]
[metabase.util :as u]
[toucan.db :as db])
(:import [java.io BufferedInputStream BufferedOutputStream ByteArrayInputStream ByteArrayOutputStream InputStream InputStreamReader]
javax.servlet.AsyncContext))
(:import [java.io BufferedInputStream BufferedOutputStream ByteArrayInputStream ByteArrayOutputStream InputStream
InputStreamReader]))
(defmulti ^:private parse-result
{:arglists '([export-format ^InputStream input-stream])}
(fn [export-format _] (keyword export-format)))
(defmulti ^:private parse-result*
{:arglists '([export-format ^InputStream input-stream column-names])}
(fn [export-format _ _] (keyword export-format)))
(defmethod parse-result :api
[_ ^InputStream is]
(defmethod parse-result* :api
[_ ^InputStream is _]
(with-open [reader (InputStreamReader. is)]
(json/parse-stream reader true)))
(let [response (json/parse-stream reader true)]
(cond-> response
(map? response) (dissoc :database_id :started_at :json_query :average_execution_time :context :running_time)))))
(defmethod parse-result :json
[export-format is]
((get-method parse-result :api) export-format is))
(defmethod parse-result* :json
[export-format is column-names]
((get-method parse-result* :api) export-format is column-names))
(defmethod parse-result :csv
[_ ^InputStream is]
(defmethod parse-result* :csv
[_ ^InputStream is _]
(with-open [reader (InputStreamReader. is)]
(doall (csv/read-csv reader))))
(defmethod parse-result :xlsx
[_ ^InputStream is]
(defmethod parse-result* :xlsx
[_ ^InputStream is column-names]
(->> (spreadsheet/load-workbook-from-stream is)
(spreadsheet/select-sheet "Query result")
(spreadsheet/select-columns {:A "ID", :B "Name", :C "Category ID", :D "Latitude", :E "Longitude", :F "Price"})
(spreadsheet/select-columns (zipmap (map (comp keyword str char)
(range (int \A) (inc (int \Z))))
column-names))
rest))
(defn- process-query-basic-streaming [export-format query]
(defn parse-result
([export-format input-stream]
(parse-result export-format input-stream ["ID" "Name" "Category ID" "Latitude" "Longitude" "Price"]))
([export-format input-stream column-names]
(parse-result* export-format input-stream column-names)))
(defn process-query-basic-streaming
"Process `query` and export it as `export-format` (in-memory), then parse the results."
{:arglists '([export-format query] [export-format query column-names])}
[export-format query & args]
(with-open [bos (ByteArrayOutputStream.)
os (BufferedOutputStream. bos)]
(qp/process-query query (assoc (qp.streaming/streaming-context export-format os)
:timeout 15000))
(is (= :completed
(:status (qp/process-query query (assoc (qp.streaming/streaming-context export-format os)
:timeout 15000)))))
(.flush os)
(let [bytea (.toByteArray bos)]
(with-open [is (BufferedInputStream. (ByteArrayInputStream. bytea))]
(parse-result export-format is)))))
(apply parse-result export-format is args)))))
(defn- process-query-api-response-streaming [export-format query]
(with-open [bos (ByteArrayOutputStream.)
os (BufferedOutputStream. bos)]
(mt/with-open-channels [canceled-chan (a/promise-chan)]
(let [streaming-response (qp.streaming/streaming-response [context export-format]
(qp/process-query-async query (assoc context :timeout 5000)))]
(#'streaming-response/do-f-async (proxy [AsyncContext] []
(complete []))
(.f streaming-response)
os
(.donechan streaming-response)
canceled-chan)
(mt/wait-for-result (streaming-response/finished-chan streaming-response) 1000)))
(let [bytea (.toByteArray bos)]
(with-open [is (BufferedInputStream. (ByteArrayInputStream. bytea))]
(parse-result export-format is)))))
(defn process-query-api-response-streaming
"Process `query` as an API request, exporting it as `export-format` (in-memory), then parse the results."
{:arglists '([export-format query] [export-format query column-names])}
[export-format query & args]
(let [byytes (if (= export-format :api)
(mt/user-http-request :crowberto :post "dataset"
{:request-options {:as :byte-array}}
(assoc-in query [:middleware :js-int-to-string?] false))
(mt/user-http-request :crowberto :post (format "dataset/%s" (name export-format))
{:request-options {:as :byte-array}}
:query (json/generate-string query)))]
(with-open [is (ByteArrayInputStream. byytes)]
(apply parse-result export-format is args))))
(defmulti ^:private expected-results
{:arglists '([export-format normal-results])}
......@@ -72,11 +82,11 @@
(defmethod expected-results :api
[_ normal-results]
(tu/obj->json->obj normal-results))
(mt/obj->json->obj normal-results))
(defmethod expected-results :json
[_ normal-results]
(let [{{:keys [cols rows]} :data} (tu/obj->json->obj normal-results)]
(let [{{:keys [cols rows]} :data} (mt/obj->json->obj normal-results)]
(for [row rows]
(zipmap (map (comp keyword :display_name) cols)
row))))
......@@ -117,7 +127,7 @@
{:order-by [[:asc $id]]
:limit 5})]
(doseq [export-format (qp.streaming/export-formats)]
(testing export-format
(testing (u/colorize :yellow export-format)
(is (= (expected-results* export-format query)
(basic-actual-results* export-format query))))))))
......@@ -131,13 +141,13 @@
(deftest streaming-response-test
(testing "Test that the actual results going thru the same steps as an API response are correct."
(doseq [export-format (qp.streaming/export-formats)]
(testing export-format
(testing (u/colorize :yellow export-format)
(compare-results export-format (mt/mbql-query venues {:limit 5}))))))
(deftest utf8-test
;; UTF-8 isn't currently working for XLSX -- fix me
(doseq [export-format (disj (qp.streaming/export-formats) :xlsx)]
(testing export-format
(testing (u/colorize :yellow export-format)
(testing "Make sure our various streaming formats properly write values as UTF-8."
(testing "A query that will have a little → in its name"
(compare-results export-format (mt/mbql-query venues
......@@ -148,3 +158,112 @@
(let [[sql & args] (db/honeysql->sql {:select [["Cam 𝌆 Saul 💩" :cam]]})]
(compare-results export-format (mt/native-query {:query sql
:params args}))))))))
(defmulti ^:private first-row-map
"Return the first row in `results` as a map with `col-names` as the keys."
{:arglists '([export-format results col-names])}
(fn [export-format _ _] export-format))
(defmethod first-row-map :default
[_ results _]
results)
(defmethod first-row-map :api
[_ results col-names]
(zipmap col-names (mt/first-row results)))
(defmethod first-row-map :xlsx
[_ results _]
(first results))
(defmethod first-row-map :csv
[_ [_ row] col-names]
(zipmap col-names row))
(defmethod first-row-map :json
[_ [row] col-names]
;; this only works if the map is small enough that it's an still an array map and thus preserving the original order
(zipmap col-names (vals row)))
;; see also `metabase.query-processor.streaming.xlsx-test/report-timezone-test`
(deftest report-timezone-test
(testing "Export downloads should format stuff with the report timezone rather than UTC (#13677)\n"
(mt/test-driver :postgres
(let [query (mt/dataset attempted-murders
(mt/mbql-query attempts
{:fields [$date $datetime $datetime_ltz $datetime_tz $datetime_tz_id $time $time_ltz $time_tz]
:order-by [[:asc $id]]
:limit 1}))
col-names [:date :datetime :datetime-ltz :datetime-tz :datetime-tz-id :time :time-ltz :time-tz]]
(doseq [export-format (qp.streaming/export-formats)]
(letfn [(test-results [expected]
(testing (u/colorize :yellow export-format)
(is (= expected
(as-> (process-query-api-response-streaming export-format query col-names) results
(first-row-map export-format results col-names))))))]
(testing "UTC results"
(test-results
(case export-format
(:csv :json)
{:date "2019-11-01"
:datetime "2019-11-01T00:23:18.331"
:datetime-ltz "2019-11-01T07:23:18.331Z"
:datetime-tz "2019-11-01T07:23:18.331Z"
:datetime-tz-id "2019-11-01T07:23:18.331Z"
:time "00:23:18.331"
:time-ltz "07:23:18.331Z"
:time-tz "07:23:18.331Z"}
:api
{:date "2019-11-01T00:00:00Z"
:datetime "2019-11-01T00:23:18.331Z"
:datetime-ltz "2019-11-01T07:23:18.331Z"
:datetime-tz "2019-11-01T07:23:18.331Z"
:datetime-tz-id "2019-11-01T07:23:18.331Z"
:time "00:23:18.331Z"
:time-ltz "07:23:18.331Z"
:time-tz "07:23:18.331Z"}
:xlsx
{:date #inst "2019-11-01T00:00:00.000-00:00"
:datetime #inst "2019-11-01T00:23:18.331-00:00"
:datetime-ltz #inst "2019-11-01T07:23:18.331-00:00"
:datetime-tz #inst "2019-11-01T07:23:18.331-00:00"
:datetime-tz-id #inst "2019-11-01T07:23:18.331-00:00"
;; Excel actually displays these without the date info (which is zero), but since Docjure returns
;; java.util.Dates by default when parsing an XLSX doc, they have the date info here.
:time #inst "1899-12-31T00:23:18.000-00:00"
:time-ltz #inst "1899-12-31T07:23:18.000-00:00"
:time-tz #inst "1899-12-31T07:23:18.000-00:00"})))
(mt/with-temporary-setting-values [report-timezone "US/Pacific"]
(test-results
(case export-format
(:csv :json)
{:date "2019-11-01"
:datetime "2019-11-01T00:23:18.331"
:datetime-ltz "2019-11-01T00:23:18.331-07:00"
:datetime-tz "2019-11-01T00:23:18.331-07:00"
:datetime-tz-id "2019-11-01T00:23:18.331-07:00"
:time "00:23:18.331"
:time-ltz "23:23:18.331-08:00"
:time-tz "23:23:18.331-08:00"}
:api
{:date "2019-11-01T00:00:00-07:00"
:datetime "2019-11-01T00:23:18.331-07:00"
:datetime-ltz "2019-11-01T00:23:18.331-07:00"
:datetime-tz "2019-11-01T00:23:18.331-07:00"
:datetime-tz-id "2019-11-01T00:23:18.331-07:00"
:time "00:23:18.331-08:00"
:time-ltz "23:23:18.331-08:00"
:time-tz "23:23:18.331-08:00"}
:xlsx
{:date #inst "2019-11-01T00:00:00.000-00:00"
:datetime #inst "2019-11-01T00:23:18.331-00:00"
:datetime-ltz #inst "2019-11-01T00:23:18.331-00:00"
:datetime-tz #inst "2019-11-01T00:23:18.331-00:00"
:datetime-tz-id #inst "2019-11-01T00:23:18.331-00:00"
:time #inst "1899-12-31T00:23:18.000-00:00"
:time-ltz #inst "1899-12-31T23:23:18.000-00:00"
:time-tz #inst "1899-12-31T23:23:18.000-00:00"})))))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment