Skip to content
Snippets Groups Projects
Unverified Commit 5830d8dc authored by Chris Truter's avatar Chris Truter Committed by GitHub
Browse files

Tidy up CSV encoding detection (#47809)

parent 56d21640
No related branches found
No related tags found
No related merge requests found
......@@ -36,6 +36,7 @@
:exclusions [it.unimi.dsi/fastutil
org.slf4j/slf4j-api]}
com.draines/postal {:mvn/version "2.0.5"} ; SMTP library
; Detect the charset in uploaded CSV files
com.github.albfernandez/juniversalchardet {:mvn/version "2.5.0"}
com.github.seancorfield/honeysql {:mvn/version "2.6.1126"} ; Honey SQL 2. SQL generation from Clojure data maps
com.github.seancorfield/next.jdbc {:mvn/version "1.3.925"} ; Talk to JDBC DBs
......
......@@ -291,16 +291,14 @@
(if (pos? bytes-read)
(do
(.handleData detector buffer 0 bytes-read)
(if (.isDone detector)
(.getDetectedCharset detector)
(when-not (.isDone detector)
(recur)))
(do
(.dataEnd detector)
(.getDetectedCharset detector)))))))
(.dataEnd detector)))))
(.getDetectedCharset detector))
(catch Exception _)))
(defn- ->reader ^Reader [^File file]
;; Just live with unrecognized characters
;; If we can't detect the encoding, just live with unrecognized characters.
(let [charset (or (detect-charset file) "UTF-8")]
(-> (bom/bom-input-stream file)
(InputStreamReader. charset))))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment