Skip to content
Snippets Groups Projects
Unverified Commit b4610877 authored by Jeff Evans's avatar Jeff Evans Committed by GitHub
Browse files

Change all active TEXT columns in MySQL app DB to LONGTEXT (#18749)


* Change all active TEXT columns in MySQL app DB to LONGTEXT

Add new text.type Liquibase property to dynamically select the correct text type to use, per DB (which is LONGTEXT for MySQL), very similar to the existing blob.type one

Adding new migrations that update all existing app DB TEXT columns to LONGTEXT columns, only for mariadb/mysql

Update migrations linter to ensure no new text types get added, via a new predicate that searches for text types being added

Do the same logic for "blob" now (should be "${blob.type}"), update rule and test

Add test for migrations, that checks the new types, for all types in scope for this PR, to ensure that they have all been changed to their expected DB-specific type (either LONGTEXT for MySQL or TEXT/CLOB for others)

Updating a couple migrations to remove special casing that was originally done only for MySQL to simply make them universally "${text.type}", in order to unify behavior and reduce surprises going forward

Co-authored-by: default avatarCam Saul <github@camsaul.com>
parent 92213a23
No related branches found
No related tags found
No related merge requests found
......@@ -15,7 +15,7 @@
(s/def ::new-style-id
(s/and string?
#(re-matches #"^v\d{2}\.\d{2}-\d{3}$" %)))
#(re-matches #"^v\d{2,}\.\d{2}-\d{3}$" %)))
(s/def ::id
(s/or
......
......@@ -4,6 +4,8 @@
[clojure.java.io :as io]
[clojure.pprint :as pprint]
[clojure.spec.alpha :as s]
[clojure.string :as str]
[clojure.walk :as walk]
[yaml.core :as yaml]))
(comment change-set.strict/keep-me
......@@ -51,10 +53,64 @@
(let [ids (change-set-ids change-log)]
(= ids (sort-by identity compare-ids ids))))
(defn- assert-no-types-in-change-set
"Walks over x (a changeset map) to ensure it doesn't add any columns of `target-types` (a set of strings).
`found-cols` is an atom of vector, in which any problematic changes to the `target-types` will be stored.
A partial application of this function will be passed to postwalk below.
TODO: add and conform to a spec instead?"
[target-types found-cols x]
{:pre [(set? target-types) (instance? clojure.lang.Atom found-cols)]}
(if
(map? x)
(cond
;; a createTable or addColumn change; see if it adds a target-type col
(or (:createTable x) (:addColumn x))
(let [op (cond (:createTable x) :createTable (:addColumn x) :addColumn)
cols (filter (fn [col-def]
(contains? target-types
(str/lower-case (or (get-in col-def [:column :type]) ""))))
(get-in x [op :columns]))]
(doseq [col cols]
(swap! found-cols conj col))
x)
;; a modifyDataType change; see if it changes a column to target-type
(:modifyDataType x)
(if (= target-types (str/lower-case (or (get-in x [:modifyDataType :newDataType]) "")))
(do (swap! found-cols conj x)
x)
x)
true ; some other kind of change; continue walking
x)
x))
(defn no-bare-blob-or-text-types?
"Ensures that no \"text\" or \"blob\" type columns are added in changesets with id later than 320 (i.e. version
0.42.0). From that point on, \"${text.type}\" should be used instead, so that MySQL can handle it correctly (by using
`LONGTEXT`). And similarly, from an earlier point, \"${blob.type\" should be used instead of \"blob\"."
[change-log]
(let [problem-cols (atom [])
walk-fn (partial assert-no-types-in-change-set #{"blob" "text"} problem-cols)]
(doseq [{{id :id} :changeSet, :as change-set} change-log
:when (and id
(string? id))]
[id change-set])
(doseq [{{id :id} :changeSet :as change-set} change-log
:when (and id
;; only enforced in 42+ with new-style migration IDs.
(string? id)
(str/starts-with? id "v"))]
(walk/postwalk walk-fn change-set))
(empty? @problem-cols)))
;; TODO -- change sets must be distinct by ID.
(s/def ::databaseChangeLog
(s/and distinct-change-set-ids?
change-set-ids-in-order?
no-bare-blob-or-text-types?
(s/+ (s/alt :property (s/keys :req-un [::property])
:changeSet (s/keys :req-un [::changeSet])))))
......
......@@ -182,3 +182,20 @@
clojure.lang.ExceptionInfo
#"new-style-id"
(validate-id "42.01-001"))))))
(deftest prevent-text-types-test
(testing "should allow \"${text.type}\" columns from being added"
(is (= :ok
(validate
(mock-change-set
:id "v42.00-001"
:changes [(mock-add-column-changes :columns [(mock-column :type "${text.type")])]))))
(doseq [problem-type ["blob" "text"]]
(testing (format "should prevent \"%s\" columns from being added after ID 320" problem-type)
(is (thrown-with-msg?
clojure.lang.ExceptionInfo
#"(?s)^.*no-bare-blob-or-text-types\\?.*$"
(validate
(mock-change-set
:id "v42.00-001"
:changes [(mock-add-column-changes :columns [(mock-column :type problem-type)])]))))))))
......@@ -22,7 +22,7 @@
what we need."
[& body]
`(schema-migrations-test.impl/with-temp-empty-app-db [conn# :h2]
(schema-migrations-test.impl/run-migrations-in-range! conn# [0 999999]) ; this should catch all migrations)
(schema-migrations-test.impl/run-migrations-in-range! conn# [0 "v99.00-000"]) ; this should catch all migrations)
;; since the actual group defs are not dynamic, we need with-redefs to change them here
(with-redefs [group/all-users (#'group/get-or-create-magic-group! group/all-users-group-name)
group/admin (#'group/get-or-create-magic-group! group/admin-group-name)
......
This diff is collapsed.
......@@ -8,14 +8,18 @@
5. verify that data looks like what we'd expect after running migration(s)
See `metabase.db.schema-migrations-test.impl` for the implementation of this functionality."
(:require [clojure.test :refer :all]
(:require [clojure.java.jdbc :as jdbc]
[clojure.string :as str]
[clojure.test :refer :all]
[metabase.db.schema-migrations-test.impl :as impl]
[metabase.driver :as driver]
[metabase.models :refer [Database Field Table]]
[metabase.models.user :refer [User]]
[metabase.test.util :as tu]
[metabase.util :as u]
[toucan.db :as db])
(:import java.util.UUID))
(:import java.sql.Connection
java.util.UUID))
(deftest database-position-test
(testing "Migration 165: add `database_position` to Field"
......@@ -158,3 +162,99 @@
(map #(select-keys % [:base_type :effective_type :coercion_strategy
:semantic_type :name]))
(db/select Field :table_id table-id)))))))))
(defn app-db-column-types
"Returns a map of all column names to their respective type names, for the given `table-name`, from the currently
bound app DB connection."
[^Connection conn table-name]
(with-open [rset (.getColumns (.getMetaData conn) nil nil table-name nil)]
(into {} (take-while some?)
(repeatedly
(fn []
(when (.next rset)
[(.getString rset "COLUMN_NAME") (.getString rset "TYPE_NAME")]))))))
(deftest convert-text-to-longtext-migration-test
(testing "all columns that were TEXT type in MySQL were changed to"
(impl/test-migrations ["v42.00-004" "v42.00-063"] [migrate!]
(migrate!) ; just run migrations immediately, then check the new types
(let [all-text-cols [["activity" "details"]
["collection" "description"]
["collection" "name"]
["computation_job" "context"]
["computation_job_result" "payload"]
["core_session" "anti_csrf_token"]
["core_user" "login_attributes"]
["group_table_access_policy" "attribute_remappings"]
["login_history" "device_description"]
["login_history" "ip_address"]
["metabase_database" "caveats"]
["metabase_database" "description"]
["metabase_database" "details"]
["metabase_database" "options"]
["metabase_database" "points_of_interest"]
["metabase_field" "caveats"]
["metabase_field" "database_type"]
["metabase_field" "description"]
["metabase_field" "fingerprint"]
["metabase_field" "has_field_values"]
["metabase_field" "points_of_interest"]
["metabase_field" "settings"]
["metabase_fieldvalues" "human_readable_values"]
["metabase_fieldvalues" "values"]
["metabase_table" "caveats"]
["metabase_table" "description"]
["metabase_table" "points_of_interest"]
["metric" "caveats"]
["metric" "definition"]
["metric" "description"]
["metric" "how_is_this_calculated"]
["metric" "points_of_interest"]
["moderation_review" "text"]
["native_query_snippet" "content"]
["native_query_snippet" "description"]
["pulse" "parameters"]
["pulse_channel" "details"]
["query" "query"]
["query_execution" "error"]
["report_card" "dataset_query"]
["report_card" "description"]
["report_card" "embedding_params"]
["report_card" "result_metadata"]
["report_card" "visualization_settings"]
["report_dashboard" "caveats"]
["report_dashboard" "description"]
["report_dashboard" "embedding_params"]
["report_dashboard" "parameters"]
["report_dashboard" "points_of_interest"]
["report_dashboardcard" "parameter_mappings"]
["report_dashboardcard" "visualization_settings"]
["revision" "message"]
["revision" "object"]
["segment" "caveats"]
["segment" "definition"]
["segment" "description"]
["segment" "points_of_interest"]
["setting" "value"]
["task_history" "task_details"]
["view_log" "metadata"]]]
(with-open [conn (jdbc/get-connection (db/connection))]
(doseq [[tbl-nm col-nms] (group-by first all-text-cols)]
(let [^String exp-type (case driver/*driver*
:mysql "longtext"
:h2 "CLOB"
"text")
name-fn (case driver/*driver*
:h2 str/upper-case
identity)
tbl-cols (app-db-column-types conn (name-fn tbl-nm))]
(doseq [col-nm (map last col-nms)]
(testing (format " %s in %s" exp-type driver/*driver*)
;; just get the first/only scalar value from the results (which is a vec of maps)
(is (.equalsIgnoreCase exp-type (get tbl-cols (name-fn col-nm)))
(format "Using %s, type for %s.%s was supposed to be %s, but was %s"
driver/*driver*
tbl-nm
col-nm
exp-type
(get tbl-cols col-nm))))))))))))
......@@ -97,18 +97,23 @@
(defn- migration-id-in-range?
"Whether `id` should be considered to be between `start-id` and `end-id`, inclusive. Handles both legacy plain-integer
and new-style `vMM.mm-NNN` style IDs."
[start-id id end-id]
[start-id id end-id & [{:keys [inclusive-end?]
:or {inclusive-end? true}}]]
(let [start-id (migration-id->str start-id)
end-id (migration-id->str end-id)
id (migration-id->str id)]
(= (sort [start-id id end-id])
[start-id id end-id])))
(and (= (sort [start-id id end-id])
[start-id id end-id])
(or inclusive-end?
(not= id end-id)))))
(deftest migration-id-in-range?-test
(testing "legacy IDs"
(is (migration-id-in-range? 1 2 3))
(is (migration-id-in-range? 1 2 3 {:inclusive-end? false}))
(is (migration-id-in-range? 1 1 3))
(is (migration-id-in-range? 1 3 3))
(is (not (migration-id-in-range? 1 3 3 {:inclusive-end? false})))
(is (not (migration-id-in-range? 2 1 3)))
(is (not (migration-id-in-range? 2 4 3)))
(testing "strings"
......@@ -116,20 +121,24 @@
(is (not (migration-id-in-range? 1 "13" 3)))))
(testing "new-style IDs"
(is (migration-id-in-range? "v42.00-001" "v42.00-002" "v42.00-003"))
(is (migration-id-in-range? "v42.00-001" "v42.00-002" "v42.00-002"))
(is (not (migration-id-in-range? "v42.00-001" "v42.00-002" "v42.00-002" {:inclusive-end? false})))
(is (not (migration-id-in-range? "v42.00-001" "v42.00-004" "v42.00-003")))
(is (not (migration-id-in-range? "v42.00-002" "v42.00-001" "v42.00-003"))))
(testing "mixed"
(is (migration-id-in-range? 1 3 "v42.00-001"))
(is (migration-id-in-range? 1 "v42.00-001" "v42.00-002"))
(is (not (migration-id-in-range? "v42.00-002" 1000 "v42.00-003")))
(is (not (migration-id-in-range? "v42.00-002" 1000 "v42.00-003" {:inclusive-end? false})))
(is (not (migration-id-in-range? 1 "v42.00-001" 1000)))
(is (not (migration-id-in-range? 1 "v42.00-001" 1000)))))
(defn run-migrations-in-range!
"Run Liquibase migrations from our migrations YAML file in the range of `start-id` -> `end-id` (inclusive) against a
DB with `jdbc-spec`."
{:added "0.41.0"}
[^java.sql.Connection conn [start-id end-id]]
{:added "0.41.0", :arglists '([conn [start-id end-id]]
[conn [start-id end-id] {:keys [inclusive-end?], :or {inclusive-end? true}}])}
[^java.sql.Connection conn [start-id end-id] & [range-options]]
(liquibase/with-liquibase [liquibase conn]
(let [change-log (.getDatabaseChangeLog liquibase)
;; create a new change log that only has the subset of migrations we want to run.
......@@ -139,7 +148,7 @@
;; add the relevant migrations (change sets) to our subset change log
(doseq [^ChangeSet change-set (.getChangeSets change-log)
:let [id (.getId change-set)]
:when (migration-id-in-range? start-id id end-id)]
:when (migration-id-in-range? start-id id end-id range-options)]
(.addChangeSet subset-change-log change-set))
;; now create a new instance of Liquibase that will run just the subset change log
(let [subset-liquibase (Liquibase. subset-change-log (.getResourceAccessor liquibase) (.getDatabase liquibase))]
......@@ -159,7 +168,7 @@
(assert (zero? (count tables))
(str "'Empty' application DB is not actually empty. Found tables:\n"
(u/pprint-to-str tables))))))
(run-migrations-in-range! conn [1 (dec start-id)])
(run-migrations-in-range! conn [1 start-id] {:inclusive-end? false})
(f #(run-migrations-in-range! conn [start-id end-id])))
(log/debug (u/format-color 'green "Done testing migrations for driver %s." driver)))
......@@ -171,7 +180,7 @@
(let [[start-id end-id] (if (sequential? migration-range)
migration-range
[migration-range migration-range])]
(testing (format "Migrations %d-%d" start-id end-id)
(testing (format "Migrations %s thru %s" start-id end-id)
(mt/test-drivers #{:h2 :mysql :postgres}
(test-migrations-for-driver driver/*driver* [start-id end-id] f)))))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment