From 3b0e34182a509f8db6129d62132e1f9f207d2a96 Mon Sep 17 00:00:00 2001 From: Octavian Geagla <ogeagla@gmail.com> Date: Fri, 20 Sep 2019 13:03:42 -0600 Subject: [PATCH] Dump to h2 (#10877) * [cmd] dump to h2 (#10843) * Tests for load & dump commands * Fixes * test fixes * Test fixes * Test fixes --- .circleci/config.yml | 4 +- bin/test-load-and-dump.sh | 30 ++++ project.clj | 26 ++-- src/metabase/cmd.clj | 9 ++ src/metabase/cmd/dump_to_h2.clj | 196 +++++++++++++++++++++++++++ src/metabase/db.clj | 78 +++++++---- src/metabase/setup.clj | 12 +- test/metabase/cmd/compare_h2_dbs.clj | 134 ++++++++++++++++++ 8 files changed, 445 insertions(+), 44 deletions(-) create mode 100755 bin/test-load-and-dump.sh create mode 100644 src/metabase/cmd/dump_to_h2.clj create mode 100644 test/metabase/cmd/compare_h2_dbs.clj diff --git a/.circleci/config.yml b/.circleci/config.yml index ff4787dc3c2..e7568d5e76e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -345,12 +345,12 @@ jobs: - attach-workspace - restore-be-deps-cache - run: - name: Test migrating from H2 -> << parameters.db-type >> + name: Test migrating from H2 -> << parameters.db-type >> -> H2 environment: MB_DB_TYPE: << parameters.db-type >> MB_DB_HOST: localhost command: > - lein run load-from-h2 ./frontend/test/__runner__/test_db_fixture.db + ./bin/test-load-and-dump.sh no_output_timeout: 5m diff --git a/bin/test-load-and-dump.sh b/bin/test-load-and-dump.sh new file mode 100755 index 00000000000..e56785143fc --- /dev/null +++ b/bin/test-load-and-dump.sh @@ -0,0 +1,30 @@ +#! /usr/bin/env bash + +set -eou pipefail xtrace + +SOURCE_DB="$(pwd)/frontend/test/__runner__/test_db_fixture.db" +DEST_DB="$(pwd)/dump.db" + +echo -e "\n********************************************************************************" +echo "Migrating $SOURCE_DB..." +echo -e "********************************************************************************\n" + +MB_DB_TYPE=h2 MB_DB_FILE="$SOURCE_DB" lein run migrate up + +echo -e "\n********************************************************************************" +echo "Loading data from H2 $SOURCE_DB into Postgres/MySQL..." +echo -e "********************************************************************************\n" + +lein run load-from-h2 "$SOURCE_DB" + +echo -e "\n********************************************************************************" +echo "Dumping data from Postgres/MySQL into H2 $DEST_DB..." +echo -e "********************************************************************************\n" + +lein run dump-to-h2 "$DEST_DB" + +echo -e "\n********************************************************************************" +echo "Comparing contents of $SOURCE_DB and $DEST_DB..." +echo -e "********************************************************************************\n" + +lein compare-h2-dbs "$SOURCE_DB" "$DEST_DB" diff --git a/project.clj b/project.clj index a62ae48e212..96874aad90b 100644 --- a/project.clj +++ b/project.clj @@ -17,7 +17,9 @@ "run" ["with-profile" "+run" "run"] "ring" ["with-profile" "+ring" "ring"] "test" ["with-profile" "+expectations" "expectations"] - "bikeshed" ["with-profile" "+bikeshed" "bikeshed" "--max-line-length" "205"] + "bikeshed" ["with-profile" "+bikeshed" "bikeshed" + "--max-line-length" "205" + "--exclude-profiles" "compare-h2-dbs"] "check-namespace-decls" ["with-profile" "+check-namespace-decls" "check-namespace-decls"] "eastwood" ["with-profile" "+eastwood" "eastwood"] "check-reflection-warnings" ["with-profile" "+reflection-warnings" "check"] @@ -25,7 +27,9 @@ ;; `lein lint` will run all linters "lint" ["do" ["eastwood"] ["bikeshed"] ["check-namespace-decls"] ["docstring-checker"]] "repl" ["with-profile" "+repl" "repl"] - "strip-and-compress" ["with-profile" "+strip-and-compress" "run"]} + "strip-and-compress" ["with-profile" "+strip-and-compress" "run"] + "compare-h2-dbs" ["with-profile" "+compare-h2-dbs" "run"]} + ;; !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ;; !! PLEASE KEEP THESE ORGANIZED ALPHABETICALLY !! @@ -98,6 +102,7 @@ javax.jms/jms com.sun.jdmk/jmxtools com.sun.jmx/jmxri]] + [me.raynes/fs "1.4.6"] ; FS tools [medley "1.2.0"] ; lightweight lib of useful functions [metabase/connection-pool "1.0.2"] ; simple wrapper around C3P0. JDBC connection pools [metabase/mbql "1.3.5"] ; MBQL language schema & util fns @@ -132,7 +137,7 @@ ;; TODO - WHAT DOES THIS DO? :manifest {"Liquibase-Package" - #=(eval + #= (eval (str "liquibase.change,liquibase.changelog,liquibase.database,liquibase.parser,liquibase.precondition," "liquibase.datatype,liquibase.serializer,liquibase.sqlgenerator,liquibase.executor," "liquibase.snapshot,liquibase.logging,liquibase.diff,liquibase.structure," @@ -161,7 +166,7 @@ [ring/ring-mock "0.3.2"]] :plugins - [[lein-environ "1.1.0"]] ; easy access to environment variables + [[lein-environ "1.1.0"]] ; easy access to environment variables :env {:mb-run-mode "dev"} :jvm-opts ["-Dlogfile.path=target/log"]} @@ -243,7 +248,8 @@ :bikeshed [:include-all-drivers - {:plugins [[lein-bikeshed "0.4.1"]]}] + {:plugins + [[lein-bikeshed "0.4.1"]]}] :eastwood [:include-all-drivers @@ -259,7 +265,7 @@ ;; disabled (yet) ;; ;; For example see https://github.com/jonase/eastwood/issues/193 - ; + ;; ;; It's still useful to re-enable them and run them every once in a while because they catch ;; a lot of actual errors too. Keep an eye on the issue above and re-enable them if we can ;; get them to work @@ -314,11 +320,15 @@ ;; Profile Metabase start time with `lein profile` :profile {:jvm-opts ["-XX:+CITime" ; print time spent in JIT compiler - "-XX:+PrintGC"]} ; print a message when garbage collection takes place + "-XX:+PrintGC"]} ; print a message when garbage collection takes place ;; get the H2 shell with 'lein h2' :h2-shell {:main org.h2.tools.Shell} :generate-automagic-dashboards-pot - {:main metabase.automagic-dashboards.rules}}) + {:main metabase.automagic-dashboards.rules} + + :compare-h2-dbs + {:main ^:skip-aot metabase.cmd.compare-h2-dbs + :source-paths ["test"]}}) diff --git a/src/metabase/cmd.clj b/src/metabase/cmd.clj index d66e15f739f..34e87549cb4 100644 --- a/src/metabase/cmd.clj +++ b/src/metabase/cmd.clj @@ -37,6 +37,15 @@ (binding [mdb/*disable-data-migrations* true] ((resolve 'metabase.cmd.load-from-h2/load-from-h2!) h2-connection-string)))) +(defn ^:command dump-to-h2 + "Transfer data from existing database to newly created H2 DB." + [h2-filename] + (classloader/require 'metabase.cmd.dump-to-h2) + (binding [mdb/*disable-data-migrations* true] + (let [return-code ((resolve 'metabase.cmd.dump-to-h2/dump-to-h2!) h2-filename)] + (when (pos-int? return-code) + (System/exit return-code))))) + (defn ^:command profile "Start Metabase the usual way and exit. Useful for profiling Metabase launch time." [] diff --git a/src/metabase/cmd/dump_to_h2.clj b/src/metabase/cmd/dump_to_h2.clj new file mode 100644 index 00000000000..c4bbc801188 --- /dev/null +++ b/src/metabase/cmd/dump_to_h2.clj @@ -0,0 +1,196 @@ +(ns metabase.cmd.dump-to-h2 + "Commands for dumping data to an H2 file from app database. + Run this with `lein run dump-to-h2` or `java -jar metabase.jar dump-to-h2`. + + Test this as follows: + ```lein run dump-to-h2 \"/path/to/h2\"``` + Validate with: + ```lein run load-from-h2 \"/path/to/h2\"``` + " + (:require [clojure.java + [io :as io] + [jdbc :as jdbc]] + [clojure.string :as str] + [colorize.core :as color] + [me.raynes.fs :as fs] + [metabase + [db :as mdb] + [util :as u]] + [metabase.db.migrations :refer [DataMigrations]] + [metabase.models + [activity :refer [Activity]] + [card :refer [Card]] + [card-favorite :refer [CardFavorite]] + [collection :refer [Collection]] + [collection-revision :refer [CollectionRevision]] + [dashboard :refer [Dashboard]] + [dashboard-card :refer [DashboardCard]] + [dashboard-card-series :refer [DashboardCardSeries]] + [dashboard-favorite :refer [DashboardFavorite]] + [database :refer [Database]] + [dependency :refer [Dependency]] + [dimension :refer [Dimension]] + [field :refer [Field]] + [field-values :refer [FieldValues]] + [metric :refer [Metric]] + [metric-important-field :refer [MetricImportantField]] + [permissions :refer [Permissions]] + [permissions-group :refer [PermissionsGroup]] + [permissions-group-membership :refer [PermissionsGroupMembership]] + [permissions-revision :refer [PermissionsRevision]] + [pulse :refer [Pulse]] + [pulse-card :refer [PulseCard]] + [pulse-channel :refer [PulseChannel]] + [pulse-channel-recipient :refer [PulseChannelRecipient]] + [revision :refer [Revision]] + [segment :refer [Segment]] + [session :refer [Session]] + [setting :refer [Setting]] + [table :refer [Table]] + [user :refer [User]] + [view-log :refer [ViewLog]]] + [metabase.util.i18n :refer [trs]] + [toucan.db :as db]) + (:import java.sql.SQLException)) + + +(defn- println-ok [] (println (color/green "[OK]"))) + +;;; ------------------------------------------ Models to Migrate (in order) ------------------------------------------ + +(def ^:private entities + "Entities in the order they should be serialized/deserialized. This is done so we make sure that we load load + instances of entities before others that might depend on them, e.g. `Databases` before `Tables` before `Fields`." + [Database + User + Setting + Dependency + Table + Field + FieldValues + Segment + Metric + MetricImportantField + Revision + ViewLog + Session + Dashboard + Card + CardFavorite + DashboardCard + DashboardCardSeries + Activity + Pulse + PulseCard + PulseChannel + PulseChannelRecipient + PermissionsGroup + PermissionsGroupMembership + Permissions + PermissionsRevision + Collection + CollectionRevision + DashboardFavorite + Dimension + ;; migrate the list of finished DataMigrations as the very last thing (all models to copy over should be listed + ;; above this line) + DataMigrations]) + + +;;; --------------------------------------------- H2 Connection Options ---------------------------------------------- + +(defn- add-file-prefix-if-needed [connection-string-or-filename] + (if (str/starts-with? connection-string-or-filename "file:") + connection-string-or-filename + (str "file:" (.getAbsolutePath (io/file connection-string-or-filename))))) + +(defn- h2-details [h2-connection-string-or-nil] + (let [h2-filename (add-file-prefix-if-needed h2-connection-string-or-nil)] + (mdb/jdbc-details {:type :h2, :db h2-filename}))) + + +;;; ------------------------------------------- Fetching & Inserting Rows -------------------------------------------- + +(defn- objects->colums+values + "Given a sequence of objects/rows fetched from the H2 DB, return a the `columns` that should be used in the `INSERT` + statement, and a sequence of rows (as sequences)." + [objs] + ;; 1) `:sizeX` and `:sizeY` come out of H2 as `:sizex` and `:sizey` because of automatic lowercasing; fix the names + ;; of these before putting into the new DB + ;; + ;; 2) Need to wrap the column names in quotes because Postgres automatically lowercases unquoted identifiers + (let [source-keys (keys (first objs)) + dest-keys (for [k source-keys] + ((db/quote-fn) (name (case k + :sizex :sizeX + :sizey :sizeY + k))))] + {:cols dest-keys + :vals (for [row objs] + (map (comp u/jdbc-clob->str row) source-keys))})) + +(def ^:private chunk-size 100) + +(defn- insert-chunk! [target-db-conn table-name chunkk] + (print (color/blue \.)) + (flush) + (try + (let [{:keys [cols vals]} (objects->colums+values chunkk)] + (jdbc/insert-multi! target-db-conn table-name (map str/upper-case cols) vals)) + (catch SQLException e + (jdbc/print-sql-exception-chain e) + (throw e)))) + +(defn- insert-entity! [target-db-conn {table-name :table, entity-name :name} objs] + (print (u/format-color 'blue "Transferring %d instances of %s..." (count objs) entity-name)) + (flush) + ;; The connection closes prematurely on occasion when we're inserting thousands of rows at once. Break into + ;; smaller chunks so connection stays alive + (doseq [chunk (partition-all chunk-size objs)] + (insert-chunk! target-db-conn table-name chunk)) + (println-ok)) + +(defn- load-data! [target-db-conn] + (println "Source db:" (mdb/jdbc-details)) + (jdbc/with-db-connection [db-conn (mdb/jdbc-details)] + (doseq [{table-name :table, :as e} entities + :let [rows (jdbc/query db-conn [(str "SELECT * FROM " (name table-name))])] + :when (seq rows)] + (insert-entity! target-db-conn e rows)))) + +(defn- get-target-db-conn [h2-filename] + (h2-details h2-filename)) + +;;; --------------------------------------------------- Public Fns --------------------------------------------------- + +(defn dump-to-h2! + "Transfer data from existing database specified by connection string + to the H2 DB specified by env vars. Intended as a tool for migrating + from one instance to another using H2 as serialization target. + + Defaults to using `@metabase.db/db-file` as the connection string." + [h2-filename] + (let [h2-filename (or h2-filename "metabase_dump.h2")] + (println "Dumping to " h2-filename) + (doseq [filename [h2-filename + (str h2-filename ".mv.db")]] + (when (.exists (io/file filename)) + (fs/delete filename) + (println (u/format-color 'red (trs "Output H2 database already exists: %s, removing.") filename)))) + + (println "Dumping from configured Metabase db to H2 file" h2-filename) + + (mdb/setup-db!* (get-target-db-conn h2-filename) true) + (mdb/setup-db!) + + (if (= :h2 (mdb/db-type)) + (println (u/format-color 'yellow (trs "Don't need to migrate, just use the existing H2 file"))) + (jdbc/with-db-transaction [target-db-conn (get-target-db-conn h2-filename)] + (println "Conn of target: " target-db-conn) + (println-ok) + (println (u/format-color 'blue "Loading data...")) + (load-data! target-db-conn) + (println-ok) + (jdbc/db-unset-rollback-only! target-db-conn))) + + (println "Dump complete"))) diff --git a/src/metabase/db.clj b/src/metabase/db.clj index c23ba475b78..88aaa11b786 100644 --- a/src/metabase/db.clj +++ b/src/metabase/db.clj @@ -31,6 +31,21 @@ ;;; | DB FILE & CONNECTION DETAILS | ;;; +----------------------------------------------------------------------------------------------------------------+ +(defn get-db-file + "Takes a filename and converts it to H2-compatible filename." + [db-file-name] + (let [ + ;; we need to enable MVCC for Quartz JDBC backend to work! Quartz depends on row-level locking, which + ;; means without MVCC we "will experience dead-locks". MVCC is the default for everyone using the + ;; MVStore engine anyway so this only affects people still with legacy PageStore databases + ;; + ;; Tell H2 to defrag when Metabase is shut down -- can reduce DB size by multiple GIGABYTES -- see #6510 + options ";DB_CLOSE_DELAY=-1;MVCC=TRUE;DEFRAG_ALWAYS=TRUE"] + ;; H2 wants file path to always be absolute + (str "file:" + (.getAbsolutePath (io/file db-file-name)) + options))) + (def db-file "Path to our H2 DB file from env var or app config." ;; see https://h2database.com/html/features.html for explanation of options @@ -40,36 +55,33 @@ ;; DB_CLOSE_DELAY=-1 = don't close the Database until the JVM shuts down "mem:metabase;DB_CLOSE_DELAY=-1" ;; File-based DB - (let [db-file-name (config/config-str :mb-db-file) - ;; we need to enable MVCC for Quartz JDBC backend to work! Quartz depends on row-level locking, which - ;; means without MVCC we "will experience dead-locks". MVCC is the default for everyone using the - ;; MVStore engine anyway so this only affects people still with legacy PageStore databases - ;; - ;; Tell H2 to defrag when Metabase is shut down -- can reduce DB size by multiple GIGABYTES -- see #6510 - options ";DB_CLOSE_DELAY=-1;MVCC=TRUE;DEFRAG_ALWAYS=TRUE"] - ;; H2 wants file path to always be absolute - (str "file:" - (.getAbsolutePath (io/file db-file-name)) - options))))) + (let [db-file-name (config/config-str :mb-db-file)] + (get-db-file db-file-name))))) (def ^:private jdbc-connection-regex #"^(jdbc:)?([^:/@]+)://(?:([^:/@]+)(?::([^:@]+))?@)?([^:@]+)(?::(\d+))?/([^/?]+)(?:\?(.*))?$") -(defn- parse-connection-string +;;TODO don't make this public +(defn parse-connection-string "Parse a DB connection URI like `postgres://cam@localhost.com:5432/cams_cool_db?ssl=true&sslfactory=org.postgresql.ssl.NonValidatingFactory` and return a broken-out map." [uri] (when-let [[_ _ protocol user pass host port db query] (re-matches jdbc-connection-regex uri)] + (println "Parsed: " protocol user pass host port db query) (u/prog1 (merge {:type (case (keyword protocol) :postgres :postgres :postgresql :postgres - :mysql :mysql) - :user user - :password pass - :host host - :port port - :dbname db} + :mysql :mysql + :h2 :h2)} + + (case (keyword protocol) + :h2 {:db db} + {:user user + :password pass + :host host + :port port + :dbname db}) (some-> query codec/form-decode walk/keywordize-keys)) @@ -265,10 +277,10 @@ see https://github.com/metabase/metabase/issues/3715" [conn] - (let [liquibase-table-name (if (#{:h2 :mysql} (db-type)) + (let [liquibase-table-name (if (#{:h2 :mysql} (:type conn)) "DATABASECHANGELOG" "databasechangelog") - fresh-install? (jdbc/with-db-metadata [meta (jdbc-details)] ;; don't migrate on fresh install + fresh-install? (jdbc/with-db-metadata [meta (jdbc-details conn)] ;; don't migrate on fresh install (empty? (jdbc/metadata-query (.getTables meta nil nil liquibase-table-name (u/varargs String ["TABLE"]))))) statement (format "UPDATE %s SET FILENAME = ?" liquibase-table-name)] @@ -460,22 +472,28 @@ ((resolve 'metabase.db.migrations/run-all!)))) -(defn- setup-db!* [] +(defn setup-db!* + "Connects to db and runs migrations." + [db-details auto-migrate] + (du/profile (trs "Database setup") + (u/with-us-locale + (verify-db-connection db-details) + (run-schema-migrations! auto-migrate db-details) + (create-connection-pool! (jdbc-details db-details)) + (run-data-migrations!))) + nil) + +(defn- setup-db-from-env!* [] (let [db-details @db-connection-details auto-migrate (config/config-bool :mb-db-automigrate)] - (du/profile (trs "Application database setup") - (u/with-us-locale - (verify-db-connection db-details) - (run-schema-migrations! auto-migrate db-details) - (create-connection-pool! (jdbc-details db-details)) - (run-data-migrations!) - (reset! db-setup-finished? true)))) + (setup-db!* db-details auto-migrate) + (reset! db-setup-finished? true)) nil) (defonce ^{:arglists '([]), :doc "Do general preparation of database by validating that we can connect. Caller can specify if we should run any pending database migrations. If DB is already set up, this function will no-op."} - setup-db! - (partial deref (delay (setup-db!*)))) + setup-db! + (partial deref (delay (setup-db-from-env!*)))) ;;; Various convenience fns (experiMENTAL) diff --git a/src/metabase/setup.clj b/src/metabase/setup.clj index a35b232e1e2..031b9d576f5 100644 --- a/src/metabase/setup.clj +++ b/src/metabase/setup.clj @@ -1,6 +1,8 @@ (ns metabase.setup - (:require [metabase.models.setting :refer [defsetting Setting]] - [toucan.db :as db])) + (:require [environ.core :refer [env]] + [metabase.models.setting :refer [defsetting Setting]] + [toucan.db :as db]) + (:import java.util.UUID)) (defsetting ^:private setup-token "A token used to signify that an instance has permissions to create the initial User. This is created upon the first @@ -24,8 +26,10 @@ [] ;; fetch the value directly from the DB; *do not* rely on cached value, in case a different instance came along and ;; already created it - (or (db/select-one-field :value Setting :key "setup-token") - (setup-token (str (java.util.UUID/randomUUID))))) + (let [mb-setup-token (env :mb-setup-token)] + (or (when mb-setup-token (setup-token mb-setup-token)) + (db/select-one-field :value Setting :key "setup-token") + (setup-token (str (UUID/randomUUID)))))) (defn clear-token! "Clear the setup token if it exists and reset it to `nil`." diff --git a/test/metabase/cmd/compare_h2_dbs.clj b/test/metabase/cmd/compare_h2_dbs.clj new file mode 100644 index 00000000000..f2fe0acb00c --- /dev/null +++ b/test/metabase/cmd/compare_h2_dbs.clj @@ -0,0 +1,134 @@ +(ns metabase.cmd.compare-h2-dbs + "Utility functions for comparing the contents of two H2 DBs, for testing the `load-from-h2 and `dump-to-h2` commands." + (:require [clojure + [data :as data] + [pprint :as pprint] + [string :as str]] + [clojure.java.jdbc :as jdbc] + [metabase.util :as u]) + (:import org.h2.jdbc.JdbcClob)) + +(defn- jdbc-spec [db-file] + {:classname "org.h2.Driver" + :subprotocol "h2" + :subname (str "file:" db-file) + "IFEXISTS" "TRUE" + "ACCESS_MODE_DATA" "r" + ;; close DB right away when done + "DB_CLOSE_DELAY" "0"}) + +(def ^:private ignored-table-names + "Set of Table names to skip diffing (e.g. because they're not ones we migrate.)" + #{"DATABASECHANGELOG" + "QRTZ_BLOB_TRIGGERS" + "QRTZ_CALENDARS" + "QRTZ_CRON_TRIGGERS" + "QRTZ_FIRED_TRIGGERS" + "QRTZ_JOB_DETAILS" + "QRTZ_LOCKS" + "QRTZ_PAUSED_TRIGGER_GRPS" + "QRTZ_SCHEDULER_STATE" + "QRTZ_SIMPLE_TRIGGERS" + "QRTZ_SIMPROP_TRIGGERS" + "QRTZ_TRIGGERS" + "QUERY" + "QUERY_QUERYEXECUTION" + "QUERY_CACHE" + "TASK_HISTORY"}) + +(defn- table-names + "Return a sorted collection of all non-system table names." + [spec] + (jdbc/with-db-metadata [metadata spec] + (let [result (jdbc/metadata-result + (.getTables metadata nil nil nil + (into-array String ["TABLE", "VIEW", "FOREIGN TABLE", "MATERIALIZED VIEW"])))] + (sort (remove ignored-table-names (map :table_name result)))))) + +(defmulti ^:private normalize-value + class) + +(defmethod normalize-value :default + [v] + v) + +(defmethod normalize-value JdbcClob + [v] + (u/jdbc-clob->str v)) + +(def ^:private ignored-keys + #{:created_at :updated_at :timestamp :last_login :date_joined :last_analyzed}) + +(defn- normalize-values [row] + (into {} (for [[k v] row + :when (not (ignored-keys (keyword (str/lower-case (name k)))))] + [k (normalize-value v)]))) + +(defn- sort-rows [rows] + (vec (sort-by (fn [row] + (or (:id row) + (vec (sort row)))) + rows))) + +(defn- rows + "Return a sorted collection of all rows for a Table." + [spec table-name] + (let [rows (jdbc/query spec (format "SELECT * FROM \"%s\";" table-name))] + (->> rows (mapv normalize-values) sort-rows))) + +(defn- different-table-names? + "Diff the table names in two DBs. Returns a truthy value if there is a difference. + the same." + [conn-1 conn-2] + (let [[table-names-1 table-names-2] (map table-names [conn-1 conn-2]) + _ (printf "Diffing %d/%d table names...\n" (count table-names-1) (count table-names-2)) + [only-in-1 only-in-2] (data/diff table-names-1 table-names-2)] + (when (or (seq only-in-1) (seq only-in-2)) + (println "Tables are different!") + (println "Only in first DB:") + (pprint/pprint only-in-1) + (println "Only in second DB:") + (pprint/pprint only-in-2) + :table-names-are-different))) + +(defn- different-rows-for-table? + "Diff the rows belonging to a specific table for two DBs. Returns truthy value if there is a difference." + [conn-1 conn-2 table-name] + (let [rows-1 (rows conn-1 table-name) + rows-2 (rows conn-2 table-name) + _ (printf "Diffing %d/%d rows for table %s...\n" (count rows-1) (count rows-2) table-name) + [only-in-1 only-in-2] (data/diff rows-1 rows-2)] + (when (or (seq only-in-1) (seq only-in-2)) + (printf "DBs have different sets of rows for Table %s\n" table-name) + (println "Only in first DB:") + (pprint/pprint only-in-1) + (println "Only in second DB:") + (pprint/pprint only-in-2) + :table-rows-are-different))) + +(defn- different-rows? + "Diff rows for all tables in two DBs. Returns truthy if there are any differences." + [conn-1 conn-2] + (reduce + (fn [different? table-name] + (or different? + (different-rows-for-table? conn-1 conn-2 table-name))) + false + (distinct (sort (concat (table-names conn-1) (table-names conn-2)))))) + +(defn- different-contents? + "Diff contents of 2 DBs. Returns truthy if there is a difference, falsey if not." + [db-file-1 db-file-2] + (jdbc/with-db-connection [conn-1 (jdbc-spec db-file-1)] + (jdbc/with-db-connection [conn-2 (jdbc-spec db-file-2)] + (or (different-table-names? conn-1 conn-2) + (different-rows? conn-1 conn-2))))) + +(defn -main + "Main entrypoint." + [db-file-1 db-file-2] + (when-let [difference (different-contents? db-file-1 db-file-2)] + (println "DB contents are different. Reason:" difference) + (System/exit 1)) + (println "Success: DB contents match.") + (System/exit 0)) -- GitLab