Skip to content
Snippets Groups Projects
Unverified Commit 2f6f878b authored by github-automation-metabase's avatar github-automation-metabase Committed by GitHub
Browse files

Batch query_execution, audit_log and view_log truncations (#51490) (#51602)

parent 68b2f66d
No related branches found
No related tags found
No related merge requests found
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
[clojurewerkz.quartzite.triggers :as triggers] [clojurewerkz.quartzite.triggers :as triggers]
[java-time.api :as t] [java-time.api :as t]
[metabase.config :as config] [metabase.config :as config]
[metabase.db :as mdb]
[metabase.models.setting :as setting :refer [defsetting]] [metabase.models.setting :as setting :refer [defsetting]]
[metabase.models.task-history :as task-history] [metabase.models.task-history :as task-history]
[metabase.plugins.classloader :as classloader] [metabase.plugins.classloader :as classloader]
...@@ -45,7 +46,6 @@ ...@@ -45,7 +46,6 @@
:audit :never :audit :never
:getter (fn [] :getter (fn []
(let [env-var-value (setting/get-value-of-type :integer :audit-max-retention-days)] (let [env-var-value (setting/get-value-of-type :integer :audit-max-retention-days)]
(def env-var-value env-var-value)
(cond (cond
(nil? env-var-value) (nil? env-var-value)
default-retention-days default-retention-days
...@@ -70,23 +70,54 @@ ...@@ -70,23 +70,54 @@
Twice a day, Metabase will delete rows older than this threshold. The minimum value is 30 days (Metabase will treat entered values of 1 to 29 the same as 30). Twice a day, Metabase will delete rows older than this threshold. The minimum value is 30 days (Metabase will treat entered values of 1 to 29 the same as 30).
If set to 0, Metabase will keep all rows.") If set to 0, Metabase will keep all rows.")
(defsetting audit-table-truncation-batch-size
(deferred-tru "Batch size to use for deletion of old rows for audit-related tables (like query_execution). Can be only set as an environment variable.")
:visibility :internal
:setter :none
:type :integer
:default 50000
:audit :never
:export? false)
(defn- truncate-table-batched!
[table-name time-column]
(t2/query-one
(case (mdb/db-type)
(:postgres :h2)
{:delete-from (keyword table-name)
:where [:in
:id
{:select [:id]
:from (keyword table-name)
:where [:<=
(keyword time-column)
(t/minus (t/offset-date-time) (t/days (audit-max-retention-days)))]
:limit (audit-table-truncation-batch-size)}]}
(:mysql :mariadb)
{:delete-from (keyword table-name)
:where [:<=
(keyword time-column)
(t/minus (t/offset-date-time) (t/days (audit-max-retention-days)))]
:limit (audit-table-truncation-batch-size)})))
(defn- truncate-table! (defn- truncate-table!
"Given a model, deletes all rows older than the configured threshold" "Given a model, deletes all rows older than the configured threshold"
[model time-column] [model time-column]
(when-not (infinite? (audit-max-retention-days)) (when-not (infinite? (audit-max-retention-days))
(let [table-name (name (t2/table-name model))] (let [table-name (name (t2/table-name model))]
(task-history/with-task-history {:task "task-history-cleanup"} (try
(try (log/infof "Cleaning up %s table" table-name)
(log/infof "Cleaning up %s table" table-name) (loop [total-rows-deleted 0]
(let [rows-deleted (t2/delete! (let [batch-rows-deleted (truncate-table-batched! table-name time-column)]
model ;; Only try to delete another batch if the last batch was full
time-column (if (= batch-rows-deleted (audit-table-truncation-batch-size))
[:<= (t/minus (t/offset-date-time) (t/days (audit-max-retention-days)))])] (recur (+ total-rows-deleted (long batch-rows-deleted)))
(if (> rows-deleted 0) (if (not= total-rows-deleted 0)
(log/infof "%s cleanup successful, %d rows were deleted" table-name rows-deleted) (log/infof "%s cleanup successful, %d rows were deleted" table-name total-rows-deleted)
(log/infof "%s cleanup successful, no rows were deleted" table-name))) (log/infof "%s cleanup successful, no rows were deleted" table-name)))))
(catch Throwable e (catch Throwable e
(log/errorf e "%s cleanup failed" table-name))))))) (log/errorf e "%s cleanup failed" table-name))))))
(defenterprise audit-models-to-truncate (defenterprise audit-models-to-truncate
"List of models to truncate. OSS implementation only truncates `query_execution` table." "List of models to truncate. OSS implementation only truncates `query_execution` table."
...@@ -98,7 +129,8 @@ If set to 0, Metabase will keep all rows.") ...@@ -98,7 +129,8 @@ If set to 0, Metabase will keep all rows.")
[] []
(run! (run!
(fn [{:keys [model timestamp-col]}] (fn [{:keys [model timestamp-col]}]
(truncate-table! model timestamp-col)) (task-history/with-task-history {:task "task-history-cleanup"}
(truncate-table! model timestamp-col)))
(audit-models-to-truncate))) (audit-models-to-truncate)))
(jobs/defjob ^{:doc "Triggers the removal of `query_execution` rows older than the configured threshold."} TruncateAuditTables [_] (jobs/defjob ^{:doc "Triggers the removal of `query_execution` rows older than the configured threshold."} TruncateAuditTables [_]
......
...@@ -8,8 +8,7 @@ ...@@ -8,8 +8,7 @@
[metabase.task.truncate-audit-tables :as task.truncate-audit-tables] [metabase.task.truncate-audit-tables :as task.truncate-audit-tables]
[metabase.test :as mt] [metabase.test :as mt]
[metabase.test.fixtures :as fixtures] [metabase.test.fixtures :as fixtures]
[toucan2.core :as t2] [toucan2.core :as t2]))
[toucan2.tools.with-temp :as t2.with-temp]))
(use-fixtures :once (fixtures/initialize :db)) (use-fixtures :once (fixtures/initialize :db))
...@@ -45,7 +44,7 @@ ...@@ -45,7 +44,7 @@
(deftest truncate-table-test (deftest truncate-table-test
(testing "truncate-table accurately truncates a table according to the value of the audit-max-retention-days env var" (testing "truncate-table accurately truncates a table according to the value of the audit-max-retention-days env var"
(mt/with-premium-features #{} (mt/with-premium-features #{}
(t2.with-temp/with-temp (mt/with-temp
[:model/QueryExecution {qe1-id :id} (merge (query-execution-defaults) [:model/QueryExecution {qe1-id :id} (merge (query-execution-defaults)
{:started_at (t/offset-date-time)}) {:started_at (t/offset-date-time)})
;; 31 days ago ;; 31 days ago
...@@ -79,3 +78,19 @@ ...@@ -79,3 +78,19 @@
(#'task.truncate-audit-tables/truncate-audit-tables!) (#'task.truncate-audit-tables/truncate-audit-tables!)
(is (= #{qe1-id} (is (= #{qe1-id}
(t2/select-fn-set :id :model/QueryExecution {:where [:in :id [qe1-id qe2-id qe3-id]]}))))))))))) (t2/select-fn-set :id :model/QueryExecution {:where [:in :id [qe1-id qe2-id qe3-id]]})))))))))))
(deftest batched-deletion-test
(testing "Deletion is batched into multiple queries"
(mt/with-temporary-setting-values [audit-table-truncation-batch-size 2]
(mt/with-temp
[:model/QueryExecution {qe1-id :id} (merge (query-execution-defaults)
{:started_at (t/minus (t/offset-date-time) (t/years 4))})
:model/QueryExecution {qe2-id :id} (merge (query-execution-defaults)
{:started_at (t/minus (t/offset-date-time) (t/years 4))})
:model/QueryExecution {qe3-id :id} (merge (query-execution-defaults)
{:started_at (t/minus (t/offset-date-time) (t/years 4))})]
(t2/with-call-count [call-count]
(#'task.truncate-audit-tables/truncate-table! :model/QueryExecution :started_at)
;; Should make deletion queries in two batches (2 rows, then 1 row)
(is (= 2 (call-count)))
(is (nil? (t2/select-fn-set :id :model/QueryExecution {:where [:in :id [qe1-id qe2-id qe3-id]]}))))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment