Skip to content
Snippets Groups Projects
Unverified Commit fb79343b authored by Tim Macdonald's avatar Tim Macdonald Committed by GitHub
Browse files

Handle BOMs in uploaded CSVs (#30392)

parent d67f22d1
Branches
Tags
No related merge requests found
......@@ -16,6 +16,7 @@
buddy/buddy-sign {:mvn/version "3.4.333"} ; JSON Web Tokens; High-Level message signing library
camel-snake-kebab/camel-snake-kebab {:mvn/version "0.4.3"} ; util functions for converting between camel, snake, and kebob case
cheshire/cheshire {:mvn/version "5.11.0"} ; fast JSON encoding (used by Ring JSON middleware)
clj-bom/clj-bom {:mvn/version "0.1.2"} ; handle BOMs in imported CSVs
clj-commons/iapetos {:mvn/version "0.1.13"} ; prometheus metrics
clj-http/clj-http {:mvn/version "3.12.3" ; HTTP client
:exclusions [commons-codec/commons-codec
......
(ns metabase.upload
(:require
[clj-bom.core :as bom]
[clojure.data.csv :as csv]
[clojure.java.io :as io]
[clojure.set :as set]
......@@ -194,7 +195,7 @@
(str truncated-name-without-time
(t/format time-format (t/local-date-time)))))
(def max-sample-rows "Maximum number of values to use for detecting a column's type" 1000)
(def ^:private max-sample-rows "Maximum number of values to use for detecting a column's type" 1000)
(defn- sample-rows
"Returns an improper subset of the rows no longer than [[max-sample-rows]]. Takes an evenly-distributed sample (not
......@@ -216,7 +217,7 @@
A column that is completely blank is assumed to be of type ::text."
[csv-file]
(with-open [reader (io/reader csv-file)]
(with-open [reader (bom/bom-reader csv-file)]
(let [[header & rows] (csv/read-csv reader)]
(rows->schema header (sample-rows rows)))))
......
(ns metabase.upload-test
(:require
[clj-bom.core :as bom]
[clojure.java.io :as io]
[clojure.string :as str]
[clojure.test :refer :all]
[metabase.driver :as driver]
......@@ -86,10 +88,13 @@
([rows]
(csv-file-with rows "test"))
([rows filename]
(csv-file-with rows filename io/writer))
([rows filename writer-fn]
(let [contents (str/join "\n" rows)
csv-file (doto (File/createTempFile filename ".csv")
(.deleteOnExit))]
(spit csv-file contents)
(with-open [^java.io.Writer w (writer-fn csv-file)]
(.write w contents))
csv-file)))
(deftest detect-schema-test
......@@ -402,3 +407,24 @@
(testing "Check that the table isn't created if the upload fails"
(sync/sync-database! (mt/db))
(is (nil? (t2/select-one Table :db_id (mt/id))))))))
(deftest load-from-csv-BOM-test
(testing "Upload a CSV file with a byte-order mark (BOM)"
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
(mt/with-empty-db
(upload/load-from-csv
driver/*driver*
(mt/id)
"upload_test"
(csv-file-with ["id,ship,captain"
"1,Serenity,Malcolm Reynolds"
"2,Millennium Falcon, Han Solo"]
"star-wars"
(partial bom/bom-writer "UTF-8")))
(testing "Table and Fields exist after sync"
(sync/sync-database! (mt/db))
(let [table (t2/select-one Table :db_id (mt/id))]
(is (=? {:name #"(?i)upload_test"} table))
(testing "Check the data was uploaded into the table correctly"
(is (= ["id", "ship", "captain"]
(column-names-for-table table))))))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment