From b28aa417bea88cbfea9ce3b8174ebba8becfd3d1 Mon Sep 17 00:00:00 2001 From: Chris Truter <crisptrutski@users.noreply.github.com> Date: Tue, 1 Oct 2024 12:18:41 +0200 Subject: [PATCH] Upload test fixes for drivers without auto mb-id (#48194) --- test/metabase/upload_test.clj | 263 +++++++++++++++++++--------------- 1 file changed, 146 insertions(+), 117 deletions(-) diff --git a/test/metabase/upload_test.clj b/test/metabase/upload_test.clj index 52136126bbb..674e02a7352 100644 --- a/test/metabase/upload_test.clj +++ b/test/metabase/upload_test.clj @@ -519,19 +519,22 @@ "2\t || a |true |1.1\t |2022-01-01|2022-01-01T00:00:00" "\" 3\"|| b|false|\"$ 1,000.1\"|2022-02-01|2022-02-01T00:00:00"]}) +(defn- auto-pk-column? [] + (#'upload/auto-pk-column? driver/*driver* (mt/db))) + (defn- columns-with-auto-pk [columns] (cond-> columns - (driver.u/supports? driver/*driver* :upload-with-auto-pk (mt/db)) + (auto-pk-column?) (#'upload/columns-with-auto-pk))) (defn- header-with-auto-pk [header] (cond->> header - (driver.u/supports? driver/*driver* :upload-with-auto-pk (mt/db)) + (auto-pk-column?) (cons @#'upload/auto-pk-column-name))) (defn- rows-with-auto-pk [rows] (cond->> rows - (driver.u/supports? driver/*driver* :upload-with-auto-pk (mt/db)) + (auto-pk-column?) (map-indexed (fn [i row] (cons (inc i) row))))) (defn- column-position [table column-name] @@ -677,30 +680,31 @@ (deftest create-from-csv-offset-datetime-test (testing "Upload a CSV file with an offset datetime column" (mt/test-drivers (mt/normal-drivers-with-feature :uploads) - (with-mysql-local-infile-on-and-off - (with-redefs [driver/db-default-timezone (constantly "Z") - upload/current-database (constantly (mt/db))] - (let [transpose (fn [m] (apply mapv vector m)) - [csv-strs expected] (transpose [["2022-01-01T12:00:00-07" "2022-01-01T19:00:00Z"] - ["2022-01-01T12:00:00-07:00" "2022-01-01T19:00:00Z"] - ["2022-01-01T12:00:00-07:30" "2022-01-01T19:30:00Z"] - ["2022-01-01T12:00:00Z" "2022-01-01T12:00:00Z"] - ["2022-01-01T12:00:00-00:00" "2022-01-01T12:00:00Z"] - ["2022-01-01T12:00:00+07" "2022-01-01T05:00:00Z"] - ["2022-01-01T12:00:00+07:00" "2022-01-01T05:00:00Z"] - ["2022-01-01T12:00:00+07:30" "2022-01-01T04:30:00Z"] - ["2022-01-01T12:00:00+0730" "2022-01-01T04:30:00Z"]])] - (testing "Fields exists after sync" - (with-upload-table! - [table (create-from-csv-and-sync-with-defaults! - :file (csv-file-with (into ["offset_datetime"] csv-strs)))] - (testing "Check the offset datetime column the correct base_type" - (is (=? :type/DateTimeWithLocalTZ - (t2/select-one-fn :base_type Field :%lower.name "offset_datetime" :table_id (:id table))))) - (let [position (column-position table "offset_datetime") - values (map #(nth % position) (rows-for-table table))] - (is (= expected - values))))))))))) + (when (driver/upload-type->database-type driver/*driver* :metabase.upload/offset-datetime) + (with-mysql-local-infile-on-and-off + (with-redefs [driver/db-default-timezone (constantly "Z") + upload/current-database (constantly (mt/db))] + (let [transpose (fn [m] (apply mapv vector m)) + [csv-strs expected] (transpose [["2022-01-01T12:00:00-07" "2022-01-01T19:00:00Z"] + ["2022-01-01T12:00:00-07:00" "2022-01-01T19:00:00Z"] + ["2022-01-01T12:00:00-07:30" "2022-01-01T19:30:00Z"] + ["2022-01-01T12:00:00Z" "2022-01-01T12:00:00Z"] + ["2022-01-01T12:00:00-00:00" "2022-01-01T12:00:00Z"] + ["2022-01-01T12:00:00+07" "2022-01-01T05:00:00Z"] + ["2022-01-01T12:00:00+07:00" "2022-01-01T05:00:00Z"] + ["2022-01-01T12:00:00+07:30" "2022-01-01T04:30:00Z"] + ["2022-01-01T12:00:00+0730" "2022-01-01T04:30:00Z"]])] + (testing "Fields exists after sync" + (with-upload-table! + [table (create-from-csv-and-sync-with-defaults! + :file (csv-file-with (into ["offset_datetime"] csv-strs)))] + (testing "Check the offset datetime column the correct base_type" + (is (=? :type/DateTimeWithLocalTZ + (t2/select-one-fn :base_type Field :%lower.name "offset_datetime" :table_id (:id table))))) + (let [position (column-position table "offset_datetime") + values (map #(nth % position) (rows-for-table table))] + (is (= expected + values)))))))))))) (deftest create-from-csv-boolean-test (testing "Upload a CSV file" @@ -827,7 +831,7 @@ (testing "Check the data was uploaded into the table correctly" (is (= (header-with-auto-pk ["Cost $" "Cost %" "Cost #"]) (column-display-names-for-table table))) - (is (= [@#'upload/auto-pk-column-name "cost__" "cost___2" "cost___3"] + (is (= (header-with-auto-pk ["cost__" "cost___2" "cost___3"]) (column-names-for-table table)))))))))) (deftest create-from-csv-bool-and-int-test @@ -863,13 +867,14 @@ "9000000000,Razor Crest,Din Djarin,Spear"]) :auxiliary-sync-steps :synchronous))] (testing "Check the data was uploaded into the table correctly" - (is (= [@#'upload/auto-pk-column-name "id" "ship" "name" "weapon"] + (is (= (header-with-auto-pk ["id" "ship" "name" "weapon"]) (column-names-for-table table))) (is (=? {:name #"(?i)id" :semantic_type :type/PK :base_type :type/BigInteger :database_is_auto_increment false} - (t2/select-one Field :database_position 1 :table_id (:id table)))))))))) + (let [pos (if (auto-pk-column?) 1 0)] + (t2/select-one Field :database_position pos :table_id (:id table))))))))))) (deftest create-from-csv-auto-pk-column-test (mt/test-drivers (mt/normal-drivers-with-feature :uploads :upload-with-auto-pk) @@ -1175,7 +1180,7 @@ "size_mb" 3.910064697265625E-5 "num_columns" 2 "num_rows" 2 - "generated_columns" 1 + "generated_columns" (if (auto-pk-column?) 1 0) "upload_seconds" pos?} :user-id (str (mt/user->id :rasta))} (last (snowplow-test/pop-event-data-and-user-id!))))) @@ -1211,7 +1216,7 @@ :model-id pos? :stats {:num-rows 2 :num-columns 2 - :generated-columns 1 + :generated-columns (if (auto-pk-column?) 1 0) :size-mb 3.910064697265625E-5 :upload-seconds pos?}}} (last-audit-event :upload-create))))))))) @@ -1328,7 +1333,7 @@ :or {table-name (mt/random-name) schema-name (sql.tx/session-schema driver/*driver*) col->upload-type (cond->> (ordered-map/ordered-map :name ::upload-types/varchar-255) - (#'upload/auto-pk-column? driver/*driver* (mt/db)) + (auto-pk-column?) (merge (ordered-map/ordered-map upload/auto-pk-column-keyword ::upload-types/auto-incrementing-int-pk))) rows [["Obi-Wan Kenobi"]]}}] @@ -1347,7 +1352,12 @@ {:primary-key [upload/auto-pk-column-keyword]} {})) (driver/insert-into! driver db-id schema+table-name insert-col-names rows) - (sync-upload-test-table! :database (mt/db) :table-name table-name :schema-name schema-name))) + (let [table (sync-upload-test-table! :database (mt/db) :table-name table-name :schema-name schema-name)] + ;; ensure we have the same display name for the auto-pk-column that a real upload would have + (t2/update! :model/Field + {:table_id (:id table), :name upload/auto-pk-column-name} + {:display_name upload/auto-pk-column-name}) + table))) (defn catch-ex-info* [f] (try @@ -1481,6 +1491,8 @@ ["_mb_row_id,id,extra 1, extra 2,name"] nil ["extra 1, extra 2"] + ;; TODO note that the order of the fields is reversed + ;; It would be better if they were alphabetical, or matched the order in the database / file. (trim-lines "The CSV file is missing columns that are in the table: - id - name @@ -1500,8 +1512,8 @@ - name There are new columns in the CSV file that are not in the table: - - _mb_row_id - - extra_2"))}] + - extra_2 + - _mb_row_id"))}] (with-upload-table! [table (create-upload-table! {:col->upload-type (ordered-map/ordered-map @@ -1604,53 +1616,54 @@ (deftest update-mb-row-id-csv-only-test (mt/test-drivers (mt/normal-drivers-with-feature :uploads) - (doseq [action (actions-to-test driver/*driver*)] - (testing (action-testing-str action) - (testing "If the table doesn't have _mb_row_id but the CSV does, ignore the CSV _mb_row_id but create the column anyway" - (with-upload-table! - [table (create-upload-table! {:col->upload-type (ordered-map/ordered-map - :name vchar-type) - :rows [["Obi-Wan Kenobi"]]})] - (let [csv-rows ["_MB-row ID,name" "1000,Luke Skywalker"] - file (csv-file-with csv-rows)] - (is (= {:row-count 1} - (update-csv! action {:file file, :table-id (:id table)}))) - ;; Only create auto-pk columns for drivers that supported uploads before auto-pk columns - ;; were introduced by metabase#36249. Otherwise we can assume that the table was created - ;; with an auto-pk column. - (if (driver/create-auto-pk-with-append-csv? driver/*driver*) - (do - (testing "Check a _mb_row_id column was created" - (is (= ["name" "_mb_row_id"] - (column-names-for-table table)))) - (testing "Check a _mb_row_id column was sync'd" - (is (=? {:semantic_type :type/PK - :base_type :type/BigInteger - :name "_mb_row_id" - :display_name "_mb_row_id"} - (t2/select-one :model/Field :table_id (:id table) :name upload/auto-pk-column-name)))) - (testing "Check the data was uploaded into the table, but the _mb_row_id column values were ignored" + (when (driver.u/supports? driver/*driver* :upload-with-auto-pk (mt/db)) + (doseq [action (actions-to-test driver/*driver*)] + (testing (action-testing-str action) + (testing "If the table doesn't have _mb_row_id but the CSV does, ignore the CSV _mb_row_id but create the column anyway" + (with-upload-table! + [table (create-upload-table! {:col->upload-type (ordered-map/ordered-map + :name vchar-type) + :rows [["Obi-Wan Kenobi"]]})] + (let [csv-rows ["_MB-row ID,name" "1000,Luke Skywalker"] + file (csv-file-with csv-rows)] + (is (= {:row-count 1} + (update-csv! action {:file file, :table-id (:id table)}))) + ;; Only create auto-pk columns for drivers that supported uploads before auto-pk columns + ;; were introduced by metabase#36249. Otherwise we can assume that the table was created + ;; with an auto-pk column. + (if (driver/create-auto-pk-with-append-csv? driver/*driver*) + (do + (testing "Check a _mb_row_id column was created" + (is (= ["name" "_mb_row_id"] + (column-names-for-table table)))) + (testing "Check a _mb_row_id column was sync'd" + (is (=? {:semantic_type :type/PK + :base_type :type/BigInteger + :name "_mb_row_id" + :display_name "_mb_row_id"} + (t2/select-one :model/Field :table_id (:id table) :name upload/auto-pk-column-name)))) + (testing "Check the data was uploaded into the table, but the _mb_row_id column values were ignored" + (case action + ::upload/append + (is (= [["Obi-Wan Kenobi" 1] + ["Luke Skywalker" 2]] + (rows-for-table table))) + ::upload/replace + (is (= [["Luke Skywalker" 1]] + (rows-for-table table)))))) + (do + (testing "Check a _mb_row_id column wasn't created" + (is (= ["name"] + (column-names-for-table table)))) (case action ::upload/append - (is (= [["Obi-Wan Kenobi" 1] - ["Luke Skywalker" 2]] + (is (= [["Obi-Wan Kenobi"] + ["Luke Skywalker"]] (rows-for-table table))) ::upload/replace - (is (= [["Luke Skywalker" 1]] + (is (= [["Luke Skywalker"]] (rows-for-table table)))))) - (do - (testing "Check a _mb_row_id column wasn't created" - (is (= ["name"] - (column-names-for-table table)))) - (case action - ::upload/append - (is (= [["Obi-Wan Kenobi"] - ["Luke Skywalker"]] - (rows-for-table table))) - ::upload/replace - (is (= [["Luke Skywalker"]] - (rows-for-table table)))))) - (io/delete-file file)))))))) + (io/delete-file file))))))))) (deftest update-no-mb-row-id-failure-test (mt/test-drivers (mt/normal-drivers-with-feature :uploads) @@ -1859,26 +1872,13 @@ (deftest update-mb-row-id-csv-and-table-test (mt/test-drivers (mt/normal-drivers-with-feature :uploads) - (doseq [action (actions-to-test driver/*driver*)] - (testing (action-testing-str action) - (testing "Append succeeds if the table has _mb_row_id and the CSV does too" - (with-upload-table! [table (create-upload-table!)] - (let [csv-rows ["_mb_row_id,name" "1000,Luke Skywalker"] - file (csv-file-with csv-rows (mt/random-name))] - (is (= {:row-count 1} - (update-csv! action {:file file, :table-id (:id table)}))) - (testing "Check the data was uploaded into the table, but the _mb_row_id was ignored" - (is (= (set (updated-contents action - [["Obi-Wan Kenobi"]] - [["Luke Skywalker"]])) - (set (rows-for-table table))))) - (io/delete-file file))) - - ;; TODO we can deduplicate a lot of code in this test - (testing "with duplicate normalized _mb_row_id columns in the CSV file" + (when (driver.u/supports? driver/*driver* :upload-with-auto-pk (mt/db)) + (doseq [action (actions-to-test driver/*driver*)] + (testing (action-testing-str action) + (testing "Append succeeds if the table has _mb_row_id and the CSV does too" (with-upload-table! [table (create-upload-table!)] - (let [csv-rows ["_mb_row_id,name,-MB-ROW-ID" "1000,Luke Skywalker,1001"] - file (csv-file-with csv-rows)] + (let [csv-rows ["_mb_row_id,name" "1000,Luke Skywalker"] + file (csv-file-with csv-rows (mt/random-name))] (is (= {:row-count 1} (update-csv! action {:file file, :table-id (:id table)}))) (testing "Check the data was uploaded into the table, but the _mb_row_id was ignored" @@ -1886,7 +1886,21 @@ [["Obi-Wan Kenobi"]] [["Luke Skywalker"]])) (set (rows-for-table table))))) - (io/delete-file file))))))))) + (io/delete-file file))) + + ;; TODO we can deduplicate a lot of code in this test + (testing "with duplicate normalized _mb_row_id columns in the CSV file" + (with-upload-table! [table (create-upload-table!)] + (let [csv-rows ["_mb_row_id,name,-MB-ROW-ID" "1000,Luke Skywalker,1001"] + file (csv-file-with csv-rows)] + (is (= {:row-count 1} + (update-csv! action {:file file, :table-id (:id table)}))) + (testing "Check the data was uploaded into the table, but the _mb_row_id was ignored" + (is (= (set (updated-contents action + [["Obi-Wan Kenobi"]] + [["Luke Skywalker"]])) + (set (rows-for-table table))))) + (io/delete-file file)))))))))) (deftest update-duplicate-header-csv-test (mt/test-drivers (mt/normal-drivers-with-feature :uploads) @@ -1959,8 +1973,8 @@ file (csv-file-with csv-rows)] (testing "The new row is inserted with the values correctly reordered" (is (= {:row-count 1} (update-csv! action {:file file, :table-id (:id table)}))) - (is (= ["name" "α"] - (rest (column-display-names-for-table table)))) + (is (= (header-with-auto-pk ["name" "α"]) + (column-display-names-for-table table))) (is (= (set (updated-contents action [["Obi-Wan Kenobi" nil]] [["Everything" "omega"]])) @@ -1976,7 +1990,7 @@ ;; for drivers that insert rows in chunks, we change the chunk size to 1 so that we can test that the ;; inserted rows are rolled back (binding [driver/*insert-chunk-rows* 1] - (doseq [auto-pk-column? (if (driver.u/supports? driver/*driver* :upload-with-auto-pk (mt/db)) + (doseq [auto-pk-column? (if (auto-pk-column?) [true false] [false])] (testing (str "\nFor a table that has " (if auto-pk-column? "an" " no") " automatically generated PK already") @@ -2138,6 +2152,15 @@ (update!))))) (io/delete-file file))))))))))) +(defn- round-floats + "Round all floats to have n digits of precision." + [digits-precision rows] + (let [factor (Math/pow 10 digits-precision)] + (mapv (partial mapv #(if (float? %) + (/ (Math/round (* factor %)) factor) + %)) + rows))) + (deftest update-promotion-multiple-columns-test (mt/test-drivers (disj (mt/normal-drivers-with-feature :uploads) :redshift) ; redshift doesn't support promotion (doseq [action (actions-to-test driver/*driver*)] @@ -2163,8 +2186,10 @@ (testing "\nAppend should succeed" (is (= {:row-count 1} (update!)))) - (is (= [[1 coerced coerced]] - (rows-for-table table)))))))))))))) + (is (= (rows-with-auto-pk [[coerced coerced]]) + ;; Clickhouse uses 32bit floats, so we must account for that loss in precision. + ;; In this case 2.1 => 2.0999999046325684 + (round-floats 6 (rows-for-table table))))))))))))))) (deftest create-from-csv-int-and-float-test (testing "Creation should handle a mix of int and float-or-int values in any order" @@ -2176,8 +2201,8 @@ "1, 1.0" "1.0, 1"]))] (testing "Check the data was uploaded into the table correctly" - (is (= [[1 1.0 1.0] - [2 1.0 1.0]] + (is (= (rows-with-auto-pk [[1.0 1.0] + [1.0 1.0]]) (rows-for-table table))))))))) (deftest create-from-csv-int-and-non-integral-float-test @@ -2190,9 +2215,11 @@ "1, 1.1" "1.1, 1"]))] (testing "Check the data was uploaded into the table correctly" - (is (= [[1 1.0 1.1] - [2 1.1 1.0]] - (rows-for-table table))))))))) + (is (= (rows-with-auto-pk [[1.0 1.1] + [1.1 1.0]]) + ;; Clickhouse uses 32 bit floats, + ;; so 1.1 is approximated by 1.10000002384185791015625 + (round-floats 7 (rows-for-table table)))))))))) (deftest update-from-csv-int-and-float-test (mt/test-drivers (mt/normal-drivers-with-feature :uploads) @@ -2278,10 +2305,10 @@ (map (partial #'upload/normalize-column-name driver/*driver*)))] (is (every? (set column-names) header-names)))) (testing "We preserve prefixes where_possible" - (is (= {"_mb_row_" 1 - "a_really" 1 + (is (= {"a_really" 1 "b_really" 2} - (frequencies (map #(subs % 0 8) column-names)))))))))))))) + (dissoc (frequencies (map #(subs % 0 8) column-names)) + "_mb_row_"))))))))))))) (deftest append-with-really-long-names-test (testing "Upload a CSV file with unique column names that get sanitized to the same string" @@ -2300,10 +2327,12 @@ (is (= {:row-count 1} (update-csv! ::upload/append {:file file, :table-id (:id table)}))) (testing "Check the data was appended into the table" - (is (= (map second (rows-with-auto-pk - [(csv/read-csv original-row) - (csv/read-csv appended-row)])) - (map rest (rows-for-table table))))) + (is (= (set + (rows-with-auto-pk + (concat + (csv/read-csv original-row) + (csv/read-csv appended-row)))) + (set (rows-for-table table))))) (io/delete-file file)))))))) (deftest append-with-preserved-display-name-test @@ -2351,8 +2380,8 @@ :data {:status-code 422}} (catch-ex-info (update-csv! ::upload/append {:file file, :table-id (:id table)})))) (testing "Check the data was not uploaded into the table" - (is (= (map second (rows-with-auto-pk [(csv/read-csv original-row)])) - (map rest (rows-for-table table))))) + (is (= (rows-with-auto-pk (csv/read-csv original-row)) + (rows-for-table table)))) (io/delete-file file)))))))) (driver/register! ::short-column-test-driver) -- GitLab