Skip to content
Snippets Groups Projects
Unverified Commit 0d2b4b06 authored by Chris Truter's avatar Chris Truter Committed by GitHub
Browse files

Support duplicate prefixes in long column names for CSV uploads (#44885)

parent 1175cafb
No related branches found
No related tags found
No related merge requests found
......@@ -16,6 +16,7 @@
[metabase.events :as events]
[metabase.legacy-mbql.util :as mbql.u]
[metabase.lib.core :as lib]
[metabase.lib.util :as lib.util]
[metabase.models :refer [Database]]
[metabase.models.card :as card]
[metabase.models.collection :as collection]
......@@ -264,10 +265,17 @@
[driver db]
(driver.u/supports? driver :upload-with-auto-pk db))
(defn- unique-alias-fn [driver]
(let [max-length (max-column-bytes driver)]
(fn [base suffix]
(as-> (str base "_" suffix) %
(driver/escape-alias driver %)
(lib.util/truncate-alias % max-length)))))
(defn- derive-column-names [driver header]
(let [normalized-header (for [h header] (normalize-column-name driver h))
unique-header (mbql.u/uniquify-names normalized-header)]
(map keyword unique-header)))
(let [generator-fn (mbql.u/unique-name-generator :unique-alias-fn (unique-alias-fn driver))]
(mapv (comp keyword generator-fn)
(for [h header] (normalize-column-name driver h)))))
(defn- create-from-csv!
"Creates a table from a CSV file. If the table already exists, it will throw an error.
......
......@@ -2035,25 +2035,28 @@
(testing "Upload a CSV file with unique column names that get sanitized to the same string"
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
(with-mysql-local-infile-on-and-off
(let [long-string (str (str/join (repeat 1000 "really_")) "long")]
;; See https://github.com/metabase/metabase/issues/44725#issuecomment-2195780743 for context on why we have
;; modified the test for now.
(let [long-string (str (str/join (repeat 1000 "really_")) "long")
header (str (str "a_" long-string ",")
(str "b_" long-string ",")
(str "b_" long-string "_with_a"))]
(with-upload-table!
[table (create-from-csv-and-sync-with-defaults!
:file (csv-file-with [(str (str "a_" long-string ",")
#_(str "b_" long-string ",")
(str "b_" long-string "_with_a"))
#_"a,b1,b2"
"a,b"]))]
:file (csv-file-with [header
"a,b1,b2"]))]
(testing "Table and Fields exist after sync"
(testing "Check the data was uploaded into the table correctly"
(let [column-names (column-names-for-table table)]
(is (= @#'upload/auto-pk-column-name (first column-names)))
(is (= #_4 3 (count (distinct column-names))))
(is (= 1 (count (filter #(str/starts-with? % "a_") column-names))))
(is (= #_2 1 (count (filter #(str/starts-with? % "b_") column-names)))))))))))))
(deftest append-with-really-long-names
(testing "We preserve names where possible"
(let [header-names (->> (str/split header #",")
(map (partial #'upload/normalize-column-name driver/*driver*)))]
(is (every? (set column-names) header-names))))
(testing "We preserve prefixes where_possible"
(is (= {"_mb_row_" 1
"a_really" 1
"b_really" 2}
(frequencies (map #(subs % 0 8) column-names))))))))))))))
(deftest append-with-really-long-names-test
(testing "Upload a CSV file with unique column names that get sanitized to the same string"
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
(with-mysql-local-infile-on-and-off
......@@ -2076,9 +2079,7 @@
(map rest (rows-for-table table)))))
(io/delete-file file))))))))
;; For now we have chosen not to support this edge case,
;; See https://github.com/metabase/metabase/issues/44725#issuecomment-2195780743 for more context
#_(deftest append-with-really-long-names-that-duplicate-test
(deftest append-with-really-long-names-that-duplicate-test
(testing "Upload a CSV file with unique column names that get sanitized to the same string"
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
(with-mysql-local-infile-on-and-off
......@@ -2102,28 +2103,10 @@
(map rest (rows-for-table table)))))
(io/delete-file file))))))))
;; See https://github.com/metabase/metabase/issues/44725#issuecomment-2195780743 for more context.
(deftest table-with-really-long-names-that-duplicate-fail-somehow-test
(testing "Upload a CSV file with unique column names that get sanitized to the same string"
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
(with-mysql-local-infile-on-and-off
(let [long-string (str (str/join (repeat 1000 "really_")) "long")
header (str (str "a_" long-string ",")
(str "b_" long-string ",")
(str "b_" long-string "_with_a"))
original-row "a,b1,b2"]
(try
(with-upload-table!
[table (create-from-csv-and-sync-with-defaults!
:file (csv-file-with [header original-row]))]
(testing "A table is created"
(is (seq (t2/select :model/Table :id (:id table)))))
(testing "But it is broken"
(is (thrown-with-msg? clojure.lang.ExceptionInfo
#"No fields found for table"
(column-names-for-table table)))))
(catch Exception _
(testing "Or, for some databases it (thankfully) just fails"
(is true)))))))))
(driver/register! ::short-column-test-driver)
(defmethod driver/column-name-length-limit ::short-column-test-driver [_] 10)
(deftest unique-long-column-names-test
(let [original ["αbcdεf" "αbcdεfg" "αbc_2_etc" "αbc_3_xyz"]
expected [:%CE%B1bcd% :%_852c229f :%CE%B1bc_2 :%CE%B1bc_3]]
(is (= expected (#'upload/derive-column-names ::short-column-test-driver original)))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment