Skip to content
Snippets Groups Projects
Unverified Commit 9e09a6d1 authored by Chris Truter's avatar Chris Truter Committed by GitHub
Browse files

Fix appending to models with existing non-ascii columns (#48237)

parent 24232c67
No related branches found
No related tags found
No related merge requests found
......@@ -99,9 +99,10 @@
[driver raw-name]
(if (str/blank? raw-name)
"unnamed_column"
(u/slugify (str/trim raw-name)
;; since slugified names contain only ASCII characters, we can conflate bytes and length here.
{:max-length (max-column-bytes driver)})))
(u/lower-case-en
(u/slugify (str/trim raw-name)
;; since slugified names contain only ASCII characters, we can conflate bytes and length here.
{:max-length (max-column-bytes driver)}))))
(def auto-pk-column-name
"The lower-case name of the auto-incrementing PK column. The actual name in the database could be in upper-case."
......@@ -756,18 +757,17 @@
[header & rows] (cond-> (parse reader)
auto-pk?
without-auto-pk-columns)
normed-name->field (m/index-by #(normalize-column-name driver (:name %))
(t2/select :model/Field :table_id (:id table) :active true))
name->field (m/index-by :name (t2/select :model/Field :table_id (:id table) :active true))
column-names (for [h header] (normalize-column-name driver h))
display-names (for [h header] (normalize-display-name h))
create-auto-pk? (and
auto-pk?
(driver/create-auto-pk-with-append-csv? driver)
(not (contains? normed-name->field auto-pk-column-name)))
normed-name->field (cond-> normed-name->field auto-pk? (dissoc auto-pk-column-name))
_ (check-schema normed-name->field column-names)
(not (contains? name->field auto-pk-column-name)))
name->field (cond-> name->field auto-pk? (dissoc auto-pk-column-name))
_ (check-schema name->field column-names)
settings (upload-parsing/get-settings)
old-types (map (comp upload-types/base-type->upload-type :base_type normed-name->field) column-names)
old-types (map (comp upload-types/base-type->upload-type :base_type name->field) column-names)
;; in the happy, and most common, case all the values will match the existing types
;; for now we just plan for the worst and perform a fairly expensive operation to detect any type changes
;; we can come back and optimize this to an optimistic-with-fallback approach later.
......
......@@ -1337,13 +1337,21 @@
(merge (ordered-map/ordered-map
upload/auto-pk-column-keyword ::upload-types/auto-incrementing-int-pk)))
rows [["Obi-Wan Kenobi"]]}}]
(let [driver driver/*driver*
db-id (mt/id)
table-name (ddl.i/format-name driver table-name)
schema-name (ddl.i/format-name driver schema-name)
(let [driver driver/*driver*
db-id (mt/id)
table-name (ddl.i/format-name driver table-name)
schema-name (ddl.i/format-name driver schema-name)
schema+table-name (#'upload/table-identifier {:schema schema-name :name table-name})
insert-col-names (remove #{upload/auto-pk-column-keyword} (keys col->upload-type))
col-definitions (#'upload/column-definitions driver col->upload-type)]
->normalized-col (comp keyword (partial #'upload/normalize-column-name driver) name)
names (->> (keys col->upload-type)
(map name)
(remove #{upload/auto-pk-column-name}))
normal-names (for [n names] (#'upload/normalize-column-name driver n))
display-names (#'upload/derive-display-names driver names)
name->display (zipmap normal-names display-names)
col->upload-type (update-keys col->upload-type ->normalized-col)
insert-col-names (remove #{upload/auto-pk-column-keyword} (keys col->upload-type))
col-definitions (#'upload/column-definitions driver col->upload-type)]
(driver/create-table! driver/*driver*
db-id
schema+table-name
......@@ -1357,6 +1365,9 @@
(t2/update! :model/Field
{:table_id (:id table), :name upload/auto-pk-column-name}
{:display_name upload/auto-pk-column-name})
;; and preserve the other display names from the CSV
(doseq [[nm dn] name->display]
(t2/update! :model/Field {:table_id (:id table), :name nm} {:display_name dn}))
table)))
(defn catch-ex-info* [f]
......@@ -1982,6 +1993,32 @@
(set (rows-for-table table)))))
(io/delete-file file)))))))))
(deftest update-new-non-ascii-column-onto-non-ascii-test
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
(doseq [action (actions-to-test driver/*driver*)]
(testing (action-testing-str action)
(with-uploads-enabled!
(testing "Append should handle new non-ascii columns being added in the latest CSV"
(with-upload-table! [table (create-upload-table!
:col->upload-type (columns-with-auto-pk {"α" ::upload-types/varchar-255}))]
;; We can't type a literal uppercase Alpha, as our whitespace linter will complain.
(is (= (header-with-auto-pk [(u/upper-case-en "α")])
(column-display-names-for-table table)))
;; Reorder as well for good measure
(let [csv-rows ["α,ϐ"
"omega,Everything"]
file (csv-file-with csv-rows)]
(testing "The new row is inserted with the values correctly reordered"
(is (= {:row-count 1} (update-csv! action {:file file, :table-id (:id table)})))
;; It's a historic quirk that added columns don't get humanized display names
(is (= (header-with-auto-pk [(u/upper-case-en "α") "ϐ"])
(column-display-names-for-table table)))
(is (= (set (updated-contents action
[["Obi-Wan Kenobi" nil]]
[["omega" "Everything"]]))
(set (rows-for-table table)))))
(io/delete-file file)))))))))
(deftest update-type-mismatch-test
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
(doseq [action (actions-to-test driver/*driver*)]
......@@ -2110,11 +2147,8 @@
(defn- round-floats
"Round all floats to have n digits of precision."
[digits-precision rows]
(let [factor (Math/pow 10 digits-precision)]
(mapv (partial mapv #(if (float? %)
(/ (Math/round (* factor %)) factor)
%))
rows)))
(let [round-if-float #(if (float? %) (u/round-to-decimals digits-precision %) %)]
(mapv (partial mapv round-if-float) rows)))
(deftest update-type-coercion-test
(mt/test-drivers (mt/normal-drivers-with-feature :uploads)
......@@ -2153,7 +2187,8 @@
(is (= {:row-count 1}
(update!))))
(is (= (rows-with-auto-pk [[coerced]])
;; Deal with 32 bit floats for Clickhouse
;; Clickhouse uses 32-bit floats, so we must account for that loss in precision.
;; In this case, 2.1 ⇒ 2.0999999046325684
(round-floats 6 (rows-for-table table)))))
(testing (format "\nUploading %s into a column of type %s should fail to coerce"
uncoerced (name upload-type))
......@@ -2189,8 +2224,8 @@
(is (= {:row-count 1}
(update!))))
(is (= (rows-with-auto-pk [[coerced coerced]])
;; Clickhouse uses 32bit floats, so we must account for that loss in precision.
;; In this case 2.1 => 2.0999999046325684
;; Clickhouse uses 32-bit floats, so we must account for that loss in precision.
;; In this case, 2.1 2.0999999046325684
(round-floats 6 (rows-for-table table)))))))))))))))
(deftest create-from-csv-int-and-float-test
......@@ -2219,8 +2254,8 @@
(testing "Check the data was uploaded into the table correctly"
(is (= (rows-with-auto-pk [[1.0 1.1]
[1.1 1.0]])
;; Clickhouse uses 32 bit floats,
;; so 1.1 is approximated by 1.10000002384185791015625
;; Clickhouse uses 32-bit floats, so we must account for that loss in precision.
;; In this case, 1.1 ⇒ 1.10000002384185791015625
(round-floats 7 (rows-for-table table))))))))))
(deftest update-from-csv-int-and-float-test
......@@ -2391,7 +2426,7 @@
(deftest unique-long-column-names-test
(let [original ["αbcdεf_αbcdεf" "αbcdεfg_αbcdεf" "αbc_2_etc_αbcdεf" "αbc_3_xyz_αbcdεf"]
expected [:%CE%B1bcd% :%_852c229f :%CE%B1bc_2 :%CE%B1bc_3]
expected [:%ce%b1bcd% :%_b59bccce :%ce%b1bc_2 :%ce%b1bc_3]
displays ["αbcdεf" "αbcdεfg" "αbc 2 etc" "αbc 3 xyz"]]
(is (= expected (#'upload/derive-column-names ::short-column-test-driver original)))
(mt/with-dynamic-redefs [upload/max-bytes (constantly 10)]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment