Skip to content
Snippets Groups Projects
Unverified Commit 8824bd9a authored by dpsutton's avatar dpsutton Committed by GitHub
Browse files

Handle errors when inferring separator of csv (#44073)

* Handle errors when inferring separator of csv

This file

```
"c1","c2"
"a,b,c","d"
```

was failing to upload for a silly reason: when we were checking for
which separator it used, we had a parse error when using a semi-colon
and that blew up the whole pipeline.

* Use cal's helpful test suggestions
parent e2144d14
No related branches found
No related tags found
No related merge requests found
......@@ -191,12 +191,12 @@
(let [count-columns (fn [s]
;; Create a separate reader per separator, as the line-breaking behaviour depends on the parser.
(with-open [reader (bom/bom-reader file)]
(->> (csv/read-csv reader :separator s)
;; we only consider the header row and the first data row
(take 2)
(map count)
;; realize the list before the reader closes
doall)))]
(try (into []
;; take first two rows and count the number of columns in each to compare headers
;; vs data rows.
(comp (take 2) (map count))
(csv/read-csv reader :separator s))
(catch Exception _e nil))))]
(->> (map (juxt identity count-columns) separators)
;; We cannot have more data columns than header columns
;; We currently support files without any data rows, and these get a free pass.
......
......@@ -459,6 +459,39 @@
(is (= 2
(count (rows-for-table table)))))))))))
(deftest infer-separator-catch-exception-test
(testing "errors in [[upload/infer-separator]] should not prevent the upload (#44034)"
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
(with-mysql-local-infile-on-and-off
(with-upload-table!
[table (create-from-csv-and-sync-with-defaults!
:file (csv-file-with ["\"c1\",\"c2\""
"\"a,b,c\",\"d\""]))]
(testing "Check the data was uploaded into the table correctly"
(is (= (header-with-auto-pk ["c1", "c2"])
(column-names-for-table table)))
(is (= (rows-with-auto-pk [["a,b,c" "d"]])
(rows-for-table table)))))))))
(deftest infer-separator-test
(testing "doesn't error when checking alternative separators (#44034)"
(let [file (csv-file-with ["\"c1\",\"c2\""
"\"a,b,c\",\"d\""])]
(is (= \, (#'upload/infer-separator file)))))
(doseq [[separator lines] example-files]
(testing (str "inferring " separator)
(let [f (csv-file-with lines)
s ({:tab \tab :semi-colon \; :comma \,} separator)]
(is (= s (#'upload/infer-separator f))))))
;; it's actually decently hard to make it not stumble on comma or semicolon. The strategy here is that the data
;; column count is greater than the header column count regardless of the separators we choose
(let [lines [","
",,,;;;\t\t"]]
(testing "throws an error if there's no clear winner"
(let [f (csv-file-with lines)]
(is (thrown-with-msg? clojure.lang.ExceptionInfo #"Unable to recognise file separator"
(#'upload/infer-separator f)))))))
(deftest create-from-csv-date-test
(testing "Upload a CSV file with a datetime column"
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment