From d522b91471a85d2e0daf6f9b55dda7eedd6cb702 Mon Sep 17 00:00:00 2001
From: bryan <bryan.maass@gmail.com>
Date: Mon, 13 Nov 2023 13:39:40 -0600
Subject: [PATCH] Audit db sync cannot happen (#35621)

* Audit db sync cannot happen

- disable all job scheduling for audit db
- make syncing/scanning auditdb a noop in prod (throws otherwise)

* better name for job-data -> jobs-to-create

* test no scheduling and no running audit db sync

* sort ns

* fix trivial check in test

* use metabase.task/scheduler-info intead qrtz table

* sort ns
---
 .../metabase_enterprise/audit_db_test.clj     |  19 +++
 src/metabase/task/sync_databases.clj          | 120 +++++++++---------
 2 files changed, 79 insertions(+), 60 deletions(-)

diff --git a/enterprise/backend/test/metabase_enterprise/audit_db_test.clj b/enterprise/backend/test/metabase_enterprise/audit_db_test.clj
index 73be93741be..c351cce6d56 100644
--- a/enterprise/backend/test/metabase_enterprise/audit_db_test.clj
+++ b/enterprise/backend/test/metabase_enterprise/audit_db_test.clj
@@ -2,12 +2,14 @@
   (:require
    [babashka.fs :as fs]
    [clojure.java.io :as io]
+   [clojure.string :as str]
    [clojure.test :refer [deftest is testing]]
    [metabase-enterprise.audit-db :as audit-db]
    [metabase.core :as mbc]
    [metabase.models.database :refer [Database]]
    [metabase.models.permissions :as perms]
    [metabase.task :as task]
+   [metabase.task.sync-databases :as task.sync-databases]
    [metabase.test :as mt]
    [toucan2.core :as t2]))
 
@@ -68,3 +70,20 @@
   (is (= #{"plugins/instance_analytics/collections"
            "plugins/instance_analytics/databases"}
          (set (map str (fs/list-dir "plugins/instance_analytics"))))))
+
+(defn- get-audit-db-trigger-keys []
+  (let [trigger-keys (->> (task/scheduler-info) :jobs (mapcat :triggers) (map :key))
+        audit-db? #(str/includes? % (str perms/audit-db-id))]
+    (filter audit-db? trigger-keys)))
+
+(deftest no-sync-tasks-for-audit-db
+  (with-audit-db-restoration
+    (audit-db/ensure-audit-db-installed!)
+    (is (= 0 (count (get-audit-db-trigger-keys))) "no sync scheduled after installation")
+
+    (with-redefs [task.sync-databases/job-context->database-id (constantly perms/audit-db-id)]
+      (is (thrown-with-msg?
+           clojure.lang.ExceptionInfo
+           #"Cannot sync Database: It is the audit db."
+           (#'task.sync-databases/sync-and-analyze-database! "job-context"))))
+    (is (= 0 (count (get-audit-db-trigger-keys))) "no sync occured even when called directly for audit db.")))
diff --git a/src/metabase/task/sync_databases.clj b/src/metabase/task/sync_databases.clj
index 2c2b2929bae..bed269419a5 100644
--- a/src/metabase/task/sync_databases.clj
+++ b/src/metabase/task/sync_databases.clj
@@ -10,10 +10,12 @@
    [clojurewerkz.quartzite.triggers :as triggers]
    [java-time.api :as t]
    [malli.core :as mc]
+   [metabase.config :as config]
    [metabase.db.query :as mdb.query]
    [metabase.lib.schema.id :as lib.schema.id]
    [metabase.models.database :as database :refer [Database]]
    [metabase.models.interface :as mi]
+   [metabase.models.permissions :as perms]
    [metabase.sync.analyze :as analyze]
    [metabase.sync.field-values :as field-values]
    [metabase.sync.schedules :as sync.schedules]
@@ -28,7 +30,11 @@
    [metabase.util.malli.schema :as ms]
    [toucan2.core :as t2])
   (:import
-   (org.quartz CronTrigger JobDetail JobKey TriggerKey)))
+   (org.quartz
+    CronTrigger
+    JobDetail
+    JobKey
+    TriggerKey)))
 
 (set! *warn-on-reflection* true)
 
@@ -70,17 +76,26 @@
   "The sync and analyze database job, as a function that can be used in a test"
   [job-context]
   (when-let [database-id (job-context->database-id job-context)]
-    (log/info (trs "Starting sync task for Database {0}." database-id))
-    (when-let [database (or (t2/select-one Database :id database-id)
-                            (do
-                              (unschedule-tasks-for-db! (mi/instance Database {:id database-id}))
-                              (log/warn (trs "Cannot sync Database {0}: Database does not exist." database-id))))]
-      (sync-metadata/sync-db-metadata! database)
-      ;; only run analysis if this is a "full sync" database
-      (when (:is_full_sync database)
-        (let [results (analyze/analyze-db! database)]
-          (when (and (:refingerprint database) (should-refingerprint-fields? results))
-            (analyze/refingerprint-db! database)))))))
+    (if (= perms/audit-db-id database-id)
+      (do
+        (log/warn (trs "Cannot sync Database: It is the audit db."))
+        (when-not config/is-prod?
+          (throw (ex-info "Cannot sync Database: It is the audit db."
+                          {:database-id database-id
+                           :raw-job-context job-context
+                           :job-context (pr-str job-context)}))))
+      (do
+        (log/info (trs "Starting sync task for Database {0}." database-id))
+        (when-let [database (or (t2/select-one Database :id database-id)
+                                (do
+                                  (unschedule-tasks-for-db! (mi/instance Database {:id database-id}))
+                                  (log/warn (trs "Cannot sync Database {0}: Database does not exist." database-id))))]
+          (sync-metadata/sync-db-metadata! database)
+          ;; only run analysis if this is a "full sync" database
+          (when (:is_full_sync database)
+            (let [results (analyze/analyze-db! database)]
+              (when (and (:refingerprint database) (should-refingerprint-fields? results))
+                (analyze/refingerprint-db! database)))))))))
 
 (jobs/defjob ^{org.quartz.DisallowConcurrentExecution true
                :doc "Sync and analyze the database"}
@@ -227,58 +242,43 @@
       ;; See https://www.nurkiewicz.com/2012/04/quartz-scheduler-misfire-instructions.html for more info
       (cron/with-misfire-handling-instruction-do-nothing)))))
 
-(defn is-audit-db-sync-job?
-  "Returns true when the job is a sync job, and the database is an audit database.
-
-  This is in [[check-and-schedule-tasks-for-db!]] to skip the creation of the sync job test for audit databases."
-  [database {:keys [key db-schedule-column]}]
-  (and (:is_audit database)
-       (= key :sync-and-analyze)
-       (= db-schedule-column :metadata_sync_schedule)))
-
 ;; called [[from metabase.models.database/schedule-tasks!]] from the post-insert and the pre-update
 (mu/defn check-and-schedule-tasks-for-db!
   "Schedule a new Quartz job for `database` and `task-info` if it doesn't already exist or is incorrect."
   [database :- (ms/InstanceOf Database)]
-  (let [sync-job (task/job-info (job-key sync-analyze-task-info))
-        fv-job   (task/job-info (job-key field-values-task-info))
-
-        sync-trigger (trigger database sync-analyze-task-info)
-        fv-trigger   (trigger database field-values-task-info)
-
-        existing-sync-trigger (some (fn [trigger] (when (= (:key trigger) (.. sync-trigger getKey getName))
-                                                    trigger))
-                                    (:triggers sync-job))
-        existing-fv-trigger   (some (fn [trigger] (when (= (:key trigger) (.. fv-trigger getKey getName))
-                                                    trigger))
-                                    (:triggers fv-job))
-        job-data [{:existing-trigger  existing-sync-trigger
-                   :existing-schedule (:metadata_sync_schedule database)
-                   :ti                sync-analyze-task-info
-                   :trigger           sync-trigger
-                   :description       "sync/analyze"}
-                  {:existing-trigger  existing-fv-trigger
-                   :existing-schedule (:cache_field_values_schedule database)
-                   :ti                field-values-task-info
-                   :trigger           fv-trigger
-                   :description       "field-values"}]
-        {:keys [jobs-to-create
-                jobs-to-skip]} (group-by (fn [{:keys [ti]}]
-                                           (if (is-audit-db-sync-job? database ti)
-                                             :jobs-to-skip
-                                             :jobs-to-create))
-                                         job-data)]
-    (doseq [{:keys [description]} jobs-to-skip]
-      (log/info (u/format-color :red "Not scheduling %s for database %d" description (:id database))))
-    (doseq [{:keys [existing-trigger existing-schedule ti trigger description]} jobs-to-create
-            :when (or (not existing-trigger)
-                      (not= (:schedule existing-trigger) existing-schedule))]
-      (delete-task! database ti)
-      (log/info
-       (u/format-color :green "Scheduling %s for database %d: trigger: %s"
-                       description (:id database) (.. ^org.quartz.Trigger trigger getKey getName)))
-      ;; now (re)schedule the task
-      (task/add-trigger! trigger))))
+  (if (= perms/audit-db-id (:id database))
+    (log/info (u/format-color :red "Not scheduling tasks for audit database"))
+    (let [sync-job (task/job-info (job-key sync-analyze-task-info))
+          fv-job   (task/job-info (job-key field-values-task-info))
+
+          sync-trigger (trigger database sync-analyze-task-info)
+          fv-trigger   (trigger database field-values-task-info)
+
+          existing-sync-trigger (some (fn [trigger] (when (= (:key trigger) (.. sync-trigger getKey getName))
+                                                      trigger))
+                                      (:triggers sync-job))
+          existing-fv-trigger   (some (fn [trigger] (when (= (:key trigger) (.. fv-trigger getKey getName))
+                                                      trigger))
+                                      (:triggers fv-job))
+          jobs-to-create [{:existing-trigger  existing-sync-trigger
+                           :existing-schedule (:metadata_sync_schedule database)
+                           :ti                sync-analyze-task-info
+                           :trigger           sync-trigger
+                           :description       "sync/analyze"}
+                          {:existing-trigger  existing-fv-trigger
+                           :existing-schedule (:cache_field_values_schedule database)
+                           :ti                field-values-task-info
+                           :trigger           fv-trigger
+                           :description       "field-values"}]]
+      (doseq [{:keys [existing-trigger existing-schedule ti trigger description]} jobs-to-create
+              :when (or (not existing-trigger)
+                        (not= (:schedule existing-trigger) existing-schedule))]
+        (delete-task! database ti)
+        (log/info
+         (u/format-color :green "Scheduling %s for database %d: trigger: %s"
+                         description (:id database) (.. ^org.quartz.Trigger trigger getKey getName)))
+        ;; now (re)schedule the task
+        (task/add-trigger! trigger)))))
 
 
 ;;; +----------------------------------------------------------------------------------------------------------------+
-- 
GitLab