From d522b91471a85d2e0daf6f9b55dda7eedd6cb702 Mon Sep 17 00:00:00 2001 From: bryan <bryan.maass@gmail.com> Date: Mon, 13 Nov 2023 13:39:40 -0600 Subject: [PATCH] Audit db sync cannot happen (#35621) * Audit db sync cannot happen - disable all job scheduling for audit db - make syncing/scanning auditdb a noop in prod (throws otherwise) * better name for job-data -> jobs-to-create * test no scheduling and no running audit db sync * sort ns * fix trivial check in test * use metabase.task/scheduler-info intead qrtz table * sort ns --- .../metabase_enterprise/audit_db_test.clj | 19 +++ src/metabase/task/sync_databases.clj | 120 +++++++++--------- 2 files changed, 79 insertions(+), 60 deletions(-) diff --git a/enterprise/backend/test/metabase_enterprise/audit_db_test.clj b/enterprise/backend/test/metabase_enterprise/audit_db_test.clj index 73be93741be..c351cce6d56 100644 --- a/enterprise/backend/test/metabase_enterprise/audit_db_test.clj +++ b/enterprise/backend/test/metabase_enterprise/audit_db_test.clj @@ -2,12 +2,14 @@ (:require [babashka.fs :as fs] [clojure.java.io :as io] + [clojure.string :as str] [clojure.test :refer [deftest is testing]] [metabase-enterprise.audit-db :as audit-db] [metabase.core :as mbc] [metabase.models.database :refer [Database]] [metabase.models.permissions :as perms] [metabase.task :as task] + [metabase.task.sync-databases :as task.sync-databases] [metabase.test :as mt] [toucan2.core :as t2])) @@ -68,3 +70,20 @@ (is (= #{"plugins/instance_analytics/collections" "plugins/instance_analytics/databases"} (set (map str (fs/list-dir "plugins/instance_analytics")))))) + +(defn- get-audit-db-trigger-keys [] + (let [trigger-keys (->> (task/scheduler-info) :jobs (mapcat :triggers) (map :key)) + audit-db? #(str/includes? % (str perms/audit-db-id))] + (filter audit-db? trigger-keys))) + +(deftest no-sync-tasks-for-audit-db + (with-audit-db-restoration + (audit-db/ensure-audit-db-installed!) + (is (= 0 (count (get-audit-db-trigger-keys))) "no sync scheduled after installation") + + (with-redefs [task.sync-databases/job-context->database-id (constantly perms/audit-db-id)] + (is (thrown-with-msg? + clojure.lang.ExceptionInfo + #"Cannot sync Database: It is the audit db." + (#'task.sync-databases/sync-and-analyze-database! "job-context")))) + (is (= 0 (count (get-audit-db-trigger-keys))) "no sync occured even when called directly for audit db."))) diff --git a/src/metabase/task/sync_databases.clj b/src/metabase/task/sync_databases.clj index 2c2b2929bae..bed269419a5 100644 --- a/src/metabase/task/sync_databases.clj +++ b/src/metabase/task/sync_databases.clj @@ -10,10 +10,12 @@ [clojurewerkz.quartzite.triggers :as triggers] [java-time.api :as t] [malli.core :as mc] + [metabase.config :as config] [metabase.db.query :as mdb.query] [metabase.lib.schema.id :as lib.schema.id] [metabase.models.database :as database :refer [Database]] [metabase.models.interface :as mi] + [metabase.models.permissions :as perms] [metabase.sync.analyze :as analyze] [metabase.sync.field-values :as field-values] [metabase.sync.schedules :as sync.schedules] @@ -28,7 +30,11 @@ [metabase.util.malli.schema :as ms] [toucan2.core :as t2]) (:import - (org.quartz CronTrigger JobDetail JobKey TriggerKey))) + (org.quartz + CronTrigger + JobDetail + JobKey + TriggerKey))) (set! *warn-on-reflection* true) @@ -70,17 +76,26 @@ "The sync and analyze database job, as a function that can be used in a test" [job-context] (when-let [database-id (job-context->database-id job-context)] - (log/info (trs "Starting sync task for Database {0}." database-id)) - (when-let [database (or (t2/select-one Database :id database-id) - (do - (unschedule-tasks-for-db! (mi/instance Database {:id database-id})) - (log/warn (trs "Cannot sync Database {0}: Database does not exist." database-id))))] - (sync-metadata/sync-db-metadata! database) - ;; only run analysis if this is a "full sync" database - (when (:is_full_sync database) - (let [results (analyze/analyze-db! database)] - (when (and (:refingerprint database) (should-refingerprint-fields? results)) - (analyze/refingerprint-db! database))))))) + (if (= perms/audit-db-id database-id) + (do + (log/warn (trs "Cannot sync Database: It is the audit db.")) + (when-not config/is-prod? + (throw (ex-info "Cannot sync Database: It is the audit db." + {:database-id database-id + :raw-job-context job-context + :job-context (pr-str job-context)})))) + (do + (log/info (trs "Starting sync task for Database {0}." database-id)) + (when-let [database (or (t2/select-one Database :id database-id) + (do + (unschedule-tasks-for-db! (mi/instance Database {:id database-id})) + (log/warn (trs "Cannot sync Database {0}: Database does not exist." database-id))))] + (sync-metadata/sync-db-metadata! database) + ;; only run analysis if this is a "full sync" database + (when (:is_full_sync database) + (let [results (analyze/analyze-db! database)] + (when (and (:refingerprint database) (should-refingerprint-fields? results)) + (analyze/refingerprint-db! database))))))))) (jobs/defjob ^{org.quartz.DisallowConcurrentExecution true :doc "Sync and analyze the database"} @@ -227,58 +242,43 @@ ;; See https://www.nurkiewicz.com/2012/04/quartz-scheduler-misfire-instructions.html for more info (cron/with-misfire-handling-instruction-do-nothing))))) -(defn is-audit-db-sync-job? - "Returns true when the job is a sync job, and the database is an audit database. - - This is in [[check-and-schedule-tasks-for-db!]] to skip the creation of the sync job test for audit databases." - [database {:keys [key db-schedule-column]}] - (and (:is_audit database) - (= key :sync-and-analyze) - (= db-schedule-column :metadata_sync_schedule))) - ;; called [[from metabase.models.database/schedule-tasks!]] from the post-insert and the pre-update (mu/defn check-and-schedule-tasks-for-db! "Schedule a new Quartz job for `database` and `task-info` if it doesn't already exist or is incorrect." [database :- (ms/InstanceOf Database)] - (let [sync-job (task/job-info (job-key sync-analyze-task-info)) - fv-job (task/job-info (job-key field-values-task-info)) - - sync-trigger (trigger database sync-analyze-task-info) - fv-trigger (trigger database field-values-task-info) - - existing-sync-trigger (some (fn [trigger] (when (= (:key trigger) (.. sync-trigger getKey getName)) - trigger)) - (:triggers sync-job)) - existing-fv-trigger (some (fn [trigger] (when (= (:key trigger) (.. fv-trigger getKey getName)) - trigger)) - (:triggers fv-job)) - job-data [{:existing-trigger existing-sync-trigger - :existing-schedule (:metadata_sync_schedule database) - :ti sync-analyze-task-info - :trigger sync-trigger - :description "sync/analyze"} - {:existing-trigger existing-fv-trigger - :existing-schedule (:cache_field_values_schedule database) - :ti field-values-task-info - :trigger fv-trigger - :description "field-values"}] - {:keys [jobs-to-create - jobs-to-skip]} (group-by (fn [{:keys [ti]}] - (if (is-audit-db-sync-job? database ti) - :jobs-to-skip - :jobs-to-create)) - job-data)] - (doseq [{:keys [description]} jobs-to-skip] - (log/info (u/format-color :red "Not scheduling %s for database %d" description (:id database)))) - (doseq [{:keys [existing-trigger existing-schedule ti trigger description]} jobs-to-create - :when (or (not existing-trigger) - (not= (:schedule existing-trigger) existing-schedule))] - (delete-task! database ti) - (log/info - (u/format-color :green "Scheduling %s for database %d: trigger: %s" - description (:id database) (.. ^org.quartz.Trigger trigger getKey getName))) - ;; now (re)schedule the task - (task/add-trigger! trigger)))) + (if (= perms/audit-db-id (:id database)) + (log/info (u/format-color :red "Not scheduling tasks for audit database")) + (let [sync-job (task/job-info (job-key sync-analyze-task-info)) + fv-job (task/job-info (job-key field-values-task-info)) + + sync-trigger (trigger database sync-analyze-task-info) + fv-trigger (trigger database field-values-task-info) + + existing-sync-trigger (some (fn [trigger] (when (= (:key trigger) (.. sync-trigger getKey getName)) + trigger)) + (:triggers sync-job)) + existing-fv-trigger (some (fn [trigger] (when (= (:key trigger) (.. fv-trigger getKey getName)) + trigger)) + (:triggers fv-job)) + jobs-to-create [{:existing-trigger existing-sync-trigger + :existing-schedule (:metadata_sync_schedule database) + :ti sync-analyze-task-info + :trigger sync-trigger + :description "sync/analyze"} + {:existing-trigger existing-fv-trigger + :existing-schedule (:cache_field_values_schedule database) + :ti field-values-task-info + :trigger fv-trigger + :description "field-values"}]] + (doseq [{:keys [existing-trigger existing-schedule ti trigger description]} jobs-to-create + :when (or (not existing-trigger) + (not= (:schedule existing-trigger) existing-schedule))] + (delete-task! database ti) + (log/info + (u/format-color :green "Scheduling %s for database %d: trigger: %s" + description (:id database) (.. ^org.quartz.Trigger trigger getKey getName))) + ;; now (re)schedule the task + (task/add-trigger! trigger))))) ;;; +----------------------------------------------------------------------------------------------------------------+ -- GitLab