From 8824bd9a19e6596754a62eb861a4e3838408ea86 Mon Sep 17 00:00:00 2001
From: dpsutton <dan@dpsutton.com>
Date: Wed, 12 Jun 2024 17:21:52 -0500
Subject: [PATCH] Handle errors when inferring separator of csv (#44073)

* Handle errors when inferring separator of csv

This file

```
"c1","c2"
"a,b,c","d"
```

was failing to upload for a silly reason: when we were checking for
which separator it used, we had a parse error when using a semi-colon
and that blew up the whole pipeline.

* Use cal's helpful test suggestions
---
 src/metabase/upload.clj       | 12 ++++++------
 test/metabase/upload_test.clj | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/src/metabase/upload.clj b/src/metabase/upload.clj
index b7ca0b15d3b..6abd636469b 100644
--- a/src/metabase/upload.clj
+++ b/src/metabase/upload.clj
@@ -191,12 +191,12 @@
   (let [count-columns (fn [s]
                         ;; Create a separate reader per separator, as the line-breaking behaviour depends on the parser.
                         (with-open [reader (bom/bom-reader file)]
-                          (->> (csv/read-csv reader :separator s)
-                               ;; we only consider the header row and the first data row
-                               (take 2)
-                               (map count)
-                               ;; realize the list before the reader closes
-                               doall)))]
+                          (try (into []
+                                     ;; take first two rows and count the number of columns in each to compare headers
+                                     ;; vs data rows.
+                                     (comp (take 2) (map count))
+                                     (csv/read-csv reader :separator s))
+                               (catch Exception _e nil))))]
     (->> (map (juxt identity count-columns) separators)
          ;; We cannot have more data columns than header columns
          ;; We currently support files without any data rows, and these get a free pass.
diff --git a/test/metabase/upload_test.clj b/test/metabase/upload_test.clj
index 573a5e4dfab..b537548669b 100644
--- a/test/metabase/upload_test.clj
+++ b/test/metabase/upload_test.clj
@@ -459,6 +459,39 @@
               (is (= 2
                      (count (rows-for-table table)))))))))))
 
+(deftest infer-separator-catch-exception-test
+  (testing "errors in [[upload/infer-separator]] should not prevent the upload (#44034)"
+    (mt/test-drivers (mt/normal-drivers-with-feature :uploads)
+      (with-mysql-local-infile-on-and-off
+        (with-upload-table!
+          [table (create-from-csv-and-sync-with-defaults!
+                  :file (csv-file-with ["\"c1\",\"c2\""
+                                        "\"a,b,c\",\"d\""]))]
+          (testing "Check the data was uploaded into the table correctly"
+            (is (= (header-with-auto-pk ["c1", "c2"])
+                   (column-names-for-table table)))
+            (is (= (rows-with-auto-pk [["a,b,c" "d"]])
+                   (rows-for-table table)))))))))
+
+(deftest infer-separator-test
+  (testing "doesn't error when checking alternative separators (#44034)"
+    (let [file (csv-file-with ["\"c1\",\"c2\""
+                               "\"a,b,c\",\"d\""])]
+      (is (= \, (#'upload/infer-separator file)))))
+  (doseq [[separator lines] example-files]
+    (testing (str "inferring " separator)
+      (let [f (csv-file-with lines)
+            s ({:tab \tab :semi-colon \; :comma \,} separator)]
+        (is (= s (#'upload/infer-separator f))))))
+  ;; it's actually decently hard to make it not stumble on comma or semicolon. The strategy here is that the data
+  ;; column count is greater than the header column count regardless of the separators we choose
+  (let [lines [","
+               ",,,;;;\t\t"]]
+    (testing "throws an error if there's no clear winner"
+      (let [f (csv-file-with lines)]
+        (is (thrown-with-msg? clojure.lang.ExceptionInfo #"Unable to recognise file separator"
+                              (#'upload/infer-separator f)))))))
+
 (deftest create-from-csv-date-test
   (testing "Upload a CSV file with a datetime column"
     (mt/test-drivers (mt/normal-drivers-with-feature :uploads)
-- 
GitLab