diff --git a/src/metabase/upload.clj b/src/metabase/upload.clj index c5e2ba7076cbcf20ad504c17e03cda611584ca95..e1f13959ca1df54e626cd3b26a6d641cbc3e08e0 100644 --- a/src/metabase/upload.clj +++ b/src/metabase/upload.clj @@ -45,21 +45,19 @@ "This tracks the size of the metabase_field.name field" 254) -(def ^:private unique-field-name-buffer - "The number of characters to reserve for disambiguating column names" - ;; This corresponds to at most 4 digits, excluding the underscore, so 1,000 duplicates. - 5) - (def ^:private min-safe (fnil min Long/MAX_VALUE Long/MAX_VALUE)) +(defn- max-column-bytes [driver] + (let [column-limit (some-> driver driver/column-name-length-limit)] + (min-safe column-limit max-field-name-length))) + (defn- normalize-column-name [driver raw-name] (if (str/blank? raw-name) "unnamed_column" (u/slugify (str/trim raw-name) - (let [column-limit (some-> driver driver/column-name-length-limit) - max-len (min-safe column-limit max-field-name-length)] - {:max-length (- max-len unique-field-name-buffer)})))) + ;; since normalized names contain only ASCII characters, we can conflate bytes and length here. + {:max-length (max-column-bytes driver)}))) (def auto-pk-column-name "The lower-case name of the auto-incrementing PK column. The actual name in the database could be in upper-case." diff --git a/test/metabase/upload_test.clj b/test/metabase/upload_test.clj index a007a72630d41234fa178baa766b83b4993d1a20..126cb76e7b175b3f4f416487d9685ad763144fa3 100644 --- a/test/metabase/upload_test.clj +++ b/test/metabase/upload_test.clj @@ -1994,24 +1994,27 @@ (testing "We do not clean up any of the child resources synchronously (yet?)" (is (seq (t2/select :model/Field :table_id (:id table))))))))))) -(deftest create-csv-from-really-long-names +(deftest create-csv-from-really-long-names-test (testing "Upload a CSV file with unique column names that get sanitized to the same string" (mt/test-drivers (mt/normal-drivers-with-feature :uploads) (with-mysql-local-infile-on-and-off (let [long-string (str (str/join (repeat 1000 "really_")) "long")] + ;; See https://github.com/metabase/metabase/issues/44725#issuecomment-2195780743 for context on why we have + ;; modified the test for now. (with-upload-table! [table (create-from-csv-and-sync-with-defaults! :file (csv-file-with [(str (str "a_" long-string ",") - (str "b_" long-string ",") + #_(str "b_" long-string ",") (str "b_" long-string "_with_a")) - "a,b1,b2"]))] + #_"a,b1,b2" + "a,b"]))] (testing "Table and Fields exist after sync" (testing "Check the data was uploaded into the table correctly" (let [column-names (column-names-for-table table)] (is (= @#'upload/auto-pk-column-name (first column-names))) - (is (= 4 (count (distinct column-names)))) + (is (= #_4 3 (count (distinct column-names)))) (is (= 1 (count (filter #(str/starts-with? % "a_") column-names)))) - (is (= 2 (count (filter #(str/starts-with? % "b_") column-names))))))))))))) + (is (= #_2 1 (count (filter #(str/starts-with? % "b_") column-names))))))))))))) (deftest append-with-really-long-names (testing "Upload a CSV file with unique column names that get sanitized to the same string" @@ -2036,7 +2039,9 @@ (map rest (rows-for-table table))))) (io/delete-file file)))))))) -(deftest append-with-really-long-names-that-duplicate +;; For now we have chosen not to support this edge case, +;; See https://github.com/metabase/metabase/issues/44725#issuecomment-2195780743 for more context +#_(deftest append-with-really-long-names-that-duplicate-test (testing "Upload a CSV file with unique column names that get sanitized to the same string" (mt/test-drivers (mt/normal-drivers-with-feature :uploads) (with-mysql-local-infile-on-and-off @@ -2059,3 +2064,29 @@ (is (= (map second (rows-with-auto-pk [(csv/read-csv original-row)])) (map rest (rows-for-table table))))) (io/delete-file file)))))))) + +;; See https://github.com/metabase/metabase/issues/44725#issuecomment-2195780743 for more context. +(deftest table-with-really-long-names-that-duplicate-fail-somehow-test + (testing "Upload a CSV file with unique column names that get sanitized to the same string" + (mt/test-drivers (mt/normal-drivers-with-feature :uploads) + (with-mysql-local-infile-on-and-off + (let [long-string (str (str/join (repeat 1000 "really_")) "long") + header (str (str "a_" long-string ",") + (str "b_" long-string ",") + (str "b_" long-string "_with_a")) + original-row "a,b1,b2"] + (try + (with-upload-table! + [table (create-from-csv-and-sync-with-defaults! + :file (csv-file-with [header original-row]))] + + (testing "A table is created" + (is (seq (t2/select :model/Table :id (:id table))))) + + (testing "But it is broken" + (is (thrown-with-msg? clojure.lang.ExceptionInfo + #"No fields found for table" + (column-names-for-table table))))) + (catch Exception _ + (testing "Or, for some databases it (thankfully) just fails" + (is true)))))))))