Skip to content
Snippets Groups Projects
Commit 3b0e3418 authored by Octavian Geagla's avatar Octavian Geagla Committed by Cam Saul
Browse files

Dump to h2 (#10877)

* [cmd] dump to h2 (#10843)

* Tests for load & dump commands

* Fixes

* test fixes

* Test fixes

* Test fixes
parent 091d6625
No related branches found
No related tags found
No related merge requests found
......@@ -345,12 +345,12 @@ jobs:
- attach-workspace
- restore-be-deps-cache
- run:
name: Test migrating from H2 -> << parameters.db-type >>
name: Test migrating from H2 -> << parameters.db-type >> -> H2
environment:
MB_DB_TYPE: << parameters.db-type >>
MB_DB_HOST: localhost
command: >
lein run load-from-h2 ./frontend/test/__runner__/test_db_fixture.db
./bin/test-load-and-dump.sh
no_output_timeout: 5m
......
#! /usr/bin/env bash
set -eou pipefail xtrace
SOURCE_DB="$(pwd)/frontend/test/__runner__/test_db_fixture.db"
DEST_DB="$(pwd)/dump.db"
echo -e "\n********************************************************************************"
echo "Migrating $SOURCE_DB..."
echo -e "********************************************************************************\n"
MB_DB_TYPE=h2 MB_DB_FILE="$SOURCE_DB" lein run migrate up
echo -e "\n********************************************************************************"
echo "Loading data from H2 $SOURCE_DB into Postgres/MySQL..."
echo -e "********************************************************************************\n"
lein run load-from-h2 "$SOURCE_DB"
echo -e "\n********************************************************************************"
echo "Dumping data from Postgres/MySQL into H2 $DEST_DB..."
echo -e "********************************************************************************\n"
lein run dump-to-h2 "$DEST_DB"
echo -e "\n********************************************************************************"
echo "Comparing contents of $SOURCE_DB and $DEST_DB..."
echo -e "********************************************************************************\n"
lein compare-h2-dbs "$SOURCE_DB" "$DEST_DB"
......@@ -17,7 +17,9 @@
"run" ["with-profile" "+run" "run"]
"ring" ["with-profile" "+ring" "ring"]
"test" ["with-profile" "+expectations" "expectations"]
"bikeshed" ["with-profile" "+bikeshed" "bikeshed" "--max-line-length" "205"]
"bikeshed" ["with-profile" "+bikeshed" "bikeshed"
"--max-line-length" "205"
"--exclude-profiles" "compare-h2-dbs"]
"check-namespace-decls" ["with-profile" "+check-namespace-decls" "check-namespace-decls"]
"eastwood" ["with-profile" "+eastwood" "eastwood"]
"check-reflection-warnings" ["with-profile" "+reflection-warnings" "check"]
......@@ -25,7 +27,9 @@
;; `lein lint` will run all linters
"lint" ["do" ["eastwood"] ["bikeshed"] ["check-namespace-decls"] ["docstring-checker"]]
"repl" ["with-profile" "+repl" "repl"]
"strip-and-compress" ["with-profile" "+strip-and-compress" "run"]}
"strip-and-compress" ["with-profile" "+strip-and-compress" "run"]
"compare-h2-dbs" ["with-profile" "+compare-h2-dbs" "run"]}
;; !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
;; !! PLEASE KEEP THESE ORGANIZED ALPHABETICALLY !!
......@@ -98,6 +102,7 @@
javax.jms/jms
com.sun.jdmk/jmxtools
com.sun.jmx/jmxri]]
[me.raynes/fs "1.4.6"] ; FS tools
[medley "1.2.0"] ; lightweight lib of useful functions
[metabase/connection-pool "1.0.2"] ; simple wrapper around C3P0. JDBC connection pools
[metabase/mbql "1.3.5"] ; MBQL language schema & util fns
......@@ -132,7 +137,7 @@
;; TODO - WHAT DOES THIS DO?
:manifest
{"Liquibase-Package"
#=(eval
#= (eval
(str "liquibase.change,liquibase.changelog,liquibase.database,liquibase.parser,liquibase.precondition,"
"liquibase.datatype,liquibase.serializer,liquibase.sqlgenerator,liquibase.executor,"
"liquibase.snapshot,liquibase.logging,liquibase.diff,liquibase.structure,"
......@@ -161,7 +166,7 @@
[ring/ring-mock "0.3.2"]]
:plugins
[[lein-environ "1.1.0"]] ; easy access to environment variables
[[lein-environ "1.1.0"]] ; easy access to environment variables
:env {:mb-run-mode "dev"}
:jvm-opts ["-Dlogfile.path=target/log"]}
......@@ -243,7 +248,8 @@
:bikeshed
[:include-all-drivers
{:plugins [[lein-bikeshed "0.4.1"]]}]
{:plugins
[[lein-bikeshed "0.4.1"]]}]
:eastwood
[:include-all-drivers
......@@ -259,7 +265,7 @@
;; disabled (yet)
;;
;; For example see https://github.com/jonase/eastwood/issues/193
;
;;
;; It's still useful to re-enable them and run them every once in a while because they catch
;; a lot of actual errors too. Keep an eye on the issue above and re-enable them if we can
;; get them to work
......@@ -314,11 +320,15 @@
;; Profile Metabase start time with `lein profile`
:profile
{:jvm-opts ["-XX:+CITime" ; print time spent in JIT compiler
"-XX:+PrintGC"]} ; print a message when garbage collection takes place
"-XX:+PrintGC"]} ; print a message when garbage collection takes place
;; get the H2 shell with 'lein h2'
:h2-shell
{:main org.h2.tools.Shell}
:generate-automagic-dashboards-pot
{:main metabase.automagic-dashboards.rules}})
{:main metabase.automagic-dashboards.rules}
:compare-h2-dbs
{:main ^:skip-aot metabase.cmd.compare-h2-dbs
:source-paths ["test"]}})
......@@ -37,6 +37,15 @@
(binding [mdb/*disable-data-migrations* true]
((resolve 'metabase.cmd.load-from-h2/load-from-h2!) h2-connection-string))))
(defn ^:command dump-to-h2
"Transfer data from existing database to newly created H2 DB."
[h2-filename]
(classloader/require 'metabase.cmd.dump-to-h2)
(binding [mdb/*disable-data-migrations* true]
(let [return-code ((resolve 'metabase.cmd.dump-to-h2/dump-to-h2!) h2-filename)]
(when (pos-int? return-code)
(System/exit return-code)))))
(defn ^:command profile
"Start Metabase the usual way and exit. Useful for profiling Metabase launch time."
[]
......
(ns metabase.cmd.dump-to-h2
"Commands for dumping data to an H2 file from app database.
Run this with `lein run dump-to-h2` or `java -jar metabase.jar dump-to-h2`.
Test this as follows:
```lein run dump-to-h2 \"/path/to/h2\"```
Validate with:
```lein run load-from-h2 \"/path/to/h2\"```
"
(:require [clojure.java
[io :as io]
[jdbc :as jdbc]]
[clojure.string :as str]
[colorize.core :as color]
[me.raynes.fs :as fs]
[metabase
[db :as mdb]
[util :as u]]
[metabase.db.migrations :refer [DataMigrations]]
[metabase.models
[activity :refer [Activity]]
[card :refer [Card]]
[card-favorite :refer [CardFavorite]]
[collection :refer [Collection]]
[collection-revision :refer [CollectionRevision]]
[dashboard :refer [Dashboard]]
[dashboard-card :refer [DashboardCard]]
[dashboard-card-series :refer [DashboardCardSeries]]
[dashboard-favorite :refer [DashboardFavorite]]
[database :refer [Database]]
[dependency :refer [Dependency]]
[dimension :refer [Dimension]]
[field :refer [Field]]
[field-values :refer [FieldValues]]
[metric :refer [Metric]]
[metric-important-field :refer [MetricImportantField]]
[permissions :refer [Permissions]]
[permissions-group :refer [PermissionsGroup]]
[permissions-group-membership :refer [PermissionsGroupMembership]]
[permissions-revision :refer [PermissionsRevision]]
[pulse :refer [Pulse]]
[pulse-card :refer [PulseCard]]
[pulse-channel :refer [PulseChannel]]
[pulse-channel-recipient :refer [PulseChannelRecipient]]
[revision :refer [Revision]]
[segment :refer [Segment]]
[session :refer [Session]]
[setting :refer [Setting]]
[table :refer [Table]]
[user :refer [User]]
[view-log :refer [ViewLog]]]
[metabase.util.i18n :refer [trs]]
[toucan.db :as db])
(:import java.sql.SQLException))
(defn- println-ok [] (println (color/green "[OK]")))
;;; ------------------------------------------ Models to Migrate (in order) ------------------------------------------
(def ^:private entities
"Entities in the order they should be serialized/deserialized. This is done so we make sure that we load load
instances of entities before others that might depend on them, e.g. `Databases` before `Tables` before `Fields`."
[Database
User
Setting
Dependency
Table
Field
FieldValues
Segment
Metric
MetricImportantField
Revision
ViewLog
Session
Dashboard
Card
CardFavorite
DashboardCard
DashboardCardSeries
Activity
Pulse
PulseCard
PulseChannel
PulseChannelRecipient
PermissionsGroup
PermissionsGroupMembership
Permissions
PermissionsRevision
Collection
CollectionRevision
DashboardFavorite
Dimension
;; migrate the list of finished DataMigrations as the very last thing (all models to copy over should be listed
;; above this line)
DataMigrations])
;;; --------------------------------------------- H2 Connection Options ----------------------------------------------
(defn- add-file-prefix-if-needed [connection-string-or-filename]
(if (str/starts-with? connection-string-or-filename "file:")
connection-string-or-filename
(str "file:" (.getAbsolutePath (io/file connection-string-or-filename)))))
(defn- h2-details [h2-connection-string-or-nil]
(let [h2-filename (add-file-prefix-if-needed h2-connection-string-or-nil)]
(mdb/jdbc-details {:type :h2, :db h2-filename})))
;;; ------------------------------------------- Fetching & Inserting Rows --------------------------------------------
(defn- objects->colums+values
"Given a sequence of objects/rows fetched from the H2 DB, return a the `columns` that should be used in the `INSERT`
statement, and a sequence of rows (as sequences)."
[objs]
;; 1) `:sizeX` and `:sizeY` come out of H2 as `:sizex` and `:sizey` because of automatic lowercasing; fix the names
;; of these before putting into the new DB
;;
;; 2) Need to wrap the column names in quotes because Postgres automatically lowercases unquoted identifiers
(let [source-keys (keys (first objs))
dest-keys (for [k source-keys]
((db/quote-fn) (name (case k
:sizex :sizeX
:sizey :sizeY
k))))]
{:cols dest-keys
:vals (for [row objs]
(map (comp u/jdbc-clob->str row) source-keys))}))
(def ^:private chunk-size 100)
(defn- insert-chunk! [target-db-conn table-name chunkk]
(print (color/blue \.))
(flush)
(try
(let [{:keys [cols vals]} (objects->colums+values chunkk)]
(jdbc/insert-multi! target-db-conn table-name (map str/upper-case cols) vals))
(catch SQLException e
(jdbc/print-sql-exception-chain e)
(throw e))))
(defn- insert-entity! [target-db-conn {table-name :table, entity-name :name} objs]
(print (u/format-color 'blue "Transferring %d instances of %s..." (count objs) entity-name))
(flush)
;; The connection closes prematurely on occasion when we're inserting thousands of rows at once. Break into
;; smaller chunks so connection stays alive
(doseq [chunk (partition-all chunk-size objs)]
(insert-chunk! target-db-conn table-name chunk))
(println-ok))
(defn- load-data! [target-db-conn]
(println "Source db:" (mdb/jdbc-details))
(jdbc/with-db-connection [db-conn (mdb/jdbc-details)]
(doseq [{table-name :table, :as e} entities
:let [rows (jdbc/query db-conn [(str "SELECT * FROM " (name table-name))])]
:when (seq rows)]
(insert-entity! target-db-conn e rows))))
(defn- get-target-db-conn [h2-filename]
(h2-details h2-filename))
;;; --------------------------------------------------- Public Fns ---------------------------------------------------
(defn dump-to-h2!
"Transfer data from existing database specified by connection string
to the H2 DB specified by env vars. Intended as a tool for migrating
from one instance to another using H2 as serialization target.
Defaults to using `@metabase.db/db-file` as the connection string."
[h2-filename]
(let [h2-filename (or h2-filename "metabase_dump.h2")]
(println "Dumping to " h2-filename)
(doseq [filename [h2-filename
(str h2-filename ".mv.db")]]
(when (.exists (io/file filename))
(fs/delete filename)
(println (u/format-color 'red (trs "Output H2 database already exists: %s, removing.") filename))))
(println "Dumping from configured Metabase db to H2 file" h2-filename)
(mdb/setup-db!* (get-target-db-conn h2-filename) true)
(mdb/setup-db!)
(if (= :h2 (mdb/db-type))
(println (u/format-color 'yellow (trs "Don't need to migrate, just use the existing H2 file")))
(jdbc/with-db-transaction [target-db-conn (get-target-db-conn h2-filename)]
(println "Conn of target: " target-db-conn)
(println-ok)
(println (u/format-color 'blue "Loading data..."))
(load-data! target-db-conn)
(println-ok)
(jdbc/db-unset-rollback-only! target-db-conn)))
(println "Dump complete")))
......@@ -31,6 +31,21 @@
;;; | DB FILE & CONNECTION DETAILS |
;;; +----------------------------------------------------------------------------------------------------------------+
(defn get-db-file
"Takes a filename and converts it to H2-compatible filename."
[db-file-name]
(let [
;; we need to enable MVCC for Quartz JDBC backend to work! Quartz depends on row-level locking, which
;; means without MVCC we "will experience dead-locks". MVCC is the default for everyone using the
;; MVStore engine anyway so this only affects people still with legacy PageStore databases
;;
;; Tell H2 to defrag when Metabase is shut down -- can reduce DB size by multiple GIGABYTES -- see #6510
options ";DB_CLOSE_DELAY=-1;MVCC=TRUE;DEFRAG_ALWAYS=TRUE"]
;; H2 wants file path to always be absolute
(str "file:"
(.getAbsolutePath (io/file db-file-name))
options)))
(def db-file
"Path to our H2 DB file from env var or app config."
;; see https://h2database.com/html/features.html for explanation of options
......@@ -40,36 +55,33 @@
;; DB_CLOSE_DELAY=-1 = don't close the Database until the JVM shuts down
"mem:metabase;DB_CLOSE_DELAY=-1"
;; File-based DB
(let [db-file-name (config/config-str :mb-db-file)
;; we need to enable MVCC for Quartz JDBC backend to work! Quartz depends on row-level locking, which
;; means without MVCC we "will experience dead-locks". MVCC is the default for everyone using the
;; MVStore engine anyway so this only affects people still with legacy PageStore databases
;;
;; Tell H2 to defrag when Metabase is shut down -- can reduce DB size by multiple GIGABYTES -- see #6510
options ";DB_CLOSE_DELAY=-1;MVCC=TRUE;DEFRAG_ALWAYS=TRUE"]
;; H2 wants file path to always be absolute
(str "file:"
(.getAbsolutePath (io/file db-file-name))
options)))))
(let [db-file-name (config/config-str :mb-db-file)]
(get-db-file db-file-name)))))
(def ^:private jdbc-connection-regex
#"^(jdbc:)?([^:/@]+)://(?:([^:/@]+)(?::([^:@]+))?@)?([^:@]+)(?::(\d+))?/([^/?]+)(?:\?(.*))?$")
(defn- parse-connection-string
;;TODO don't make this public
(defn parse-connection-string
"Parse a DB connection URI like
`postgres://cam@localhost.com:5432/cams_cool_db?ssl=true&sslfactory=org.postgresql.ssl.NonValidatingFactory` and
return a broken-out map."
[uri]
(when-let [[_ _ protocol user pass host port db query] (re-matches jdbc-connection-regex uri)]
(println "Parsed: " protocol user pass host port db query)
(u/prog1 (merge {:type (case (keyword protocol)
:postgres :postgres
:postgresql :postgres
:mysql :mysql)
:user user
:password pass
:host host
:port port
:dbname db}
:mysql :mysql
:h2 :h2)}
(case (keyword protocol)
:h2 {:db db}
{:user user
:password pass
:host host
:port port
:dbname db})
(some-> query
codec/form-decode
walk/keywordize-keys))
......@@ -265,10 +277,10 @@
see https://github.com/metabase/metabase/issues/3715"
[conn]
(let [liquibase-table-name (if (#{:h2 :mysql} (db-type))
(let [liquibase-table-name (if (#{:h2 :mysql} (:type conn))
"DATABASECHANGELOG"
"databasechangelog")
fresh-install? (jdbc/with-db-metadata [meta (jdbc-details)] ;; don't migrate on fresh install
fresh-install? (jdbc/with-db-metadata [meta (jdbc-details conn)] ;; don't migrate on fresh install
(empty? (jdbc/metadata-query
(.getTables meta nil nil liquibase-table-name (u/varargs String ["TABLE"])))))
statement (format "UPDATE %s SET FILENAME = ?" liquibase-table-name)]
......@@ -460,22 +472,28 @@
((resolve 'metabase.db.migrations/run-all!))))
(defn- setup-db!* []
(defn setup-db!*
"Connects to db and runs migrations."
[db-details auto-migrate]
(du/profile (trs "Database setup")
(u/with-us-locale
(verify-db-connection db-details)
(run-schema-migrations! auto-migrate db-details)
(create-connection-pool! (jdbc-details db-details))
(run-data-migrations!)))
nil)
(defn- setup-db-from-env!* []
(let [db-details @db-connection-details
auto-migrate (config/config-bool :mb-db-automigrate)]
(du/profile (trs "Application database setup")
(u/with-us-locale
(verify-db-connection db-details)
(run-schema-migrations! auto-migrate db-details)
(create-connection-pool! (jdbc-details db-details))
(run-data-migrations!)
(reset! db-setup-finished? true))))
(setup-db!* db-details auto-migrate)
(reset! db-setup-finished? true))
nil)
(defonce ^{:arglists '([]), :doc "Do general preparation of database by validating that we can connect. Caller can
specify if we should run any pending database migrations. If DB is already set up, this function will no-op."}
setup-db!
(partial deref (delay (setup-db!*))))
setup-db!
(partial deref (delay (setup-db-from-env!*))))
;;; Various convenience fns (experiMENTAL)
......
(ns metabase.setup
(:require [metabase.models.setting :refer [defsetting Setting]]
[toucan.db :as db]))
(:require [environ.core :refer [env]]
[metabase.models.setting :refer [defsetting Setting]]
[toucan.db :as db])
(:import java.util.UUID))
(defsetting ^:private setup-token
"A token used to signify that an instance has permissions to create the initial User. This is created upon the first
......@@ -24,8 +26,10 @@
[]
;; fetch the value directly from the DB; *do not* rely on cached value, in case a different instance came along and
;; already created it
(or (db/select-one-field :value Setting :key "setup-token")
(setup-token (str (java.util.UUID/randomUUID)))))
(let [mb-setup-token (env :mb-setup-token)]
(or (when mb-setup-token (setup-token mb-setup-token))
(db/select-one-field :value Setting :key "setup-token")
(setup-token (str (UUID/randomUUID))))))
(defn clear-token!
"Clear the setup token if it exists and reset it to `nil`."
......
(ns metabase.cmd.compare-h2-dbs
"Utility functions for comparing the contents of two H2 DBs, for testing the `load-from-h2 and `dump-to-h2` commands."
(:require [clojure
[data :as data]
[pprint :as pprint]
[string :as str]]
[clojure.java.jdbc :as jdbc]
[metabase.util :as u])
(:import org.h2.jdbc.JdbcClob))
(defn- jdbc-spec [db-file]
{:classname "org.h2.Driver"
:subprotocol "h2"
:subname (str "file:" db-file)
"IFEXISTS" "TRUE"
"ACCESS_MODE_DATA" "r"
;; close DB right away when done
"DB_CLOSE_DELAY" "0"})
(def ^:private ignored-table-names
"Set of Table names to skip diffing (e.g. because they're not ones we migrate.)"
#{"DATABASECHANGELOG"
"QRTZ_BLOB_TRIGGERS"
"QRTZ_CALENDARS"
"QRTZ_CRON_TRIGGERS"
"QRTZ_FIRED_TRIGGERS"
"QRTZ_JOB_DETAILS"
"QRTZ_LOCKS"
"QRTZ_PAUSED_TRIGGER_GRPS"
"QRTZ_SCHEDULER_STATE"
"QRTZ_SIMPLE_TRIGGERS"
"QRTZ_SIMPROP_TRIGGERS"
"QRTZ_TRIGGERS"
"QUERY"
"QUERY_QUERYEXECUTION"
"QUERY_CACHE"
"TASK_HISTORY"})
(defn- table-names
"Return a sorted collection of all non-system table names."
[spec]
(jdbc/with-db-metadata [metadata spec]
(let [result (jdbc/metadata-result
(.getTables metadata nil nil nil
(into-array String ["TABLE", "VIEW", "FOREIGN TABLE", "MATERIALIZED VIEW"])))]
(sort (remove ignored-table-names (map :table_name result))))))
(defmulti ^:private normalize-value
class)
(defmethod normalize-value :default
[v]
v)
(defmethod normalize-value JdbcClob
[v]
(u/jdbc-clob->str v))
(def ^:private ignored-keys
#{:created_at :updated_at :timestamp :last_login :date_joined :last_analyzed})
(defn- normalize-values [row]
(into {} (for [[k v] row
:when (not (ignored-keys (keyword (str/lower-case (name k)))))]
[k (normalize-value v)])))
(defn- sort-rows [rows]
(vec (sort-by (fn [row]
(or (:id row)
(vec (sort row))))
rows)))
(defn- rows
"Return a sorted collection of all rows for a Table."
[spec table-name]
(let [rows (jdbc/query spec (format "SELECT * FROM \"%s\";" table-name))]
(->> rows (mapv normalize-values) sort-rows)))
(defn- different-table-names?
"Diff the table names in two DBs. Returns a truthy value if there is a difference.
the same."
[conn-1 conn-2]
(let [[table-names-1 table-names-2] (map table-names [conn-1 conn-2])
_ (printf "Diffing %d/%d table names...\n" (count table-names-1) (count table-names-2))
[only-in-1 only-in-2] (data/diff table-names-1 table-names-2)]
(when (or (seq only-in-1) (seq only-in-2))
(println "Tables are different!")
(println "Only in first DB:")
(pprint/pprint only-in-1)
(println "Only in second DB:")
(pprint/pprint only-in-2)
:table-names-are-different)))
(defn- different-rows-for-table?
"Diff the rows belonging to a specific table for two DBs. Returns truthy value if there is a difference."
[conn-1 conn-2 table-name]
(let [rows-1 (rows conn-1 table-name)
rows-2 (rows conn-2 table-name)
_ (printf "Diffing %d/%d rows for table %s...\n" (count rows-1) (count rows-2) table-name)
[only-in-1 only-in-2] (data/diff rows-1 rows-2)]
(when (or (seq only-in-1) (seq only-in-2))
(printf "DBs have different sets of rows for Table %s\n" table-name)
(println "Only in first DB:")
(pprint/pprint only-in-1)
(println "Only in second DB:")
(pprint/pprint only-in-2)
:table-rows-are-different)))
(defn- different-rows?
"Diff rows for all tables in two DBs. Returns truthy if there are any differences."
[conn-1 conn-2]
(reduce
(fn [different? table-name]
(or different?
(different-rows-for-table? conn-1 conn-2 table-name)))
false
(distinct (sort (concat (table-names conn-1) (table-names conn-2))))))
(defn- different-contents?
"Diff contents of 2 DBs. Returns truthy if there is a difference, falsey if not."
[db-file-1 db-file-2]
(jdbc/with-db-connection [conn-1 (jdbc-spec db-file-1)]
(jdbc/with-db-connection [conn-2 (jdbc-spec db-file-2)]
(or (different-table-names? conn-1 conn-2)
(different-rows? conn-1 conn-2)))))
(defn -main
"Main entrypoint."
[db-file-1 db-file-2]
(when-let [difference (different-contents? db-file-1 db-file-2)]
(println "DB contents are different. Reason:" difference)
(System/exit 1))
(println "Success: DB contents match.")
(System/exit 0))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment