From c3f7e594e64e7fb76399c2a658febb3fce44d497 Mon Sep 17 00:00:00 2001 From: Alexander Solovyov <alexander@solovyov.net> Date: Mon, 4 Dec 2023 09:06:50 +0200 Subject: [PATCH] store query_execution.cache_hash (#36223) --- .../row_level_restrictions_test.clj | 8 ++-- .../migrations/001_update_migrations.yaml | 15 +++++++ .../query_processor/middleware/cache.clj | 27 +++++++------ .../middleware/process_userland_query.clj | 21 +++++----- test/metabase/api/dataset_test.clj | 3 +- .../query_processor/middleware/cache_test.clj | 39 +++++++++++-------- .../process_userland_query_test.clj | 4 +- .../query_processor/streaming_test.clj | 3 +- test/metabase/query_processor_test.clj | 2 +- 9 files changed, 77 insertions(+), 45 deletions(-) diff --git a/enterprise/backend/test/metabase_enterprise/sandbox/query_processor/middleware/row_level_restrictions_test.clj b/enterprise/backend/test/metabase_enterprise/sandbox/query_processor/middleware/row_level_restrictions_test.clj index 961a9fe2877..a5fd2412b38 100644 --- a/enterprise/backend/test/metabase_enterprise/sandbox/query_processor/middleware/row_level_restrictions_test.clj +++ b/enterprise/backend/test/metabase_enterprise/sandbox/query_processor/middleware/row_level_restrictions_test.clj @@ -731,7 +731,7 @@ (testing "Run the query, should not be cached" (let [result (run-query)] (is (= nil - (:cached result))) + (:cached (:cache/details result)))) (is (= [[10]] (mt/rows result))))) (testing "Cache entry should be saved within 5 seconds" @@ -742,7 +742,7 @@ (testing "Run it again, should be cached" (let [result (run-query)] (is (= true - (:cached result))) + (:cached (:cache/details result)))) (is (= [[10]] (mt/rows result))))) (testing "Run the query with different User attributes, should not get the cached result" @@ -753,7 +753,7 @@ (:login_attributes @api/*current-user*))) (let [result (run-query)] (is (= nil - (:cached result))) + (:cached (:cache/details result)))) (is (= [[9]] (mt/rows result))))))))))) @@ -1042,7 +1042,7 @@ query (t2/select-one-fn :dataset_query Card :id card-id) run-query (fn [] (let [results (qp/process-query (assoc query :cache-ttl 100))] - {:cached? (boolean (:cached results)) + {:cached? (boolean (:cached (:cache/details results))) :num-rows (count (mt/rows results))}))] (mt/with-temporary-setting-values [enable-query-caching true query-caching-min-ttl 0] diff --git a/resources/migrations/001_update_migrations.yaml b/resources/migrations/001_update_migrations.yaml index e242bac5005..f08da0fa327 100644 --- a/resources/migrations/001_update_migrations.yaml +++ b/resources/migrations/001_update_migrations.yaml @@ -4157,6 +4157,21 @@ databaseChangeLog: - column: name: started_at + - changeSet: + id: v48.00-061 + author: piranha + comment: 'Adds query_execution.cache_hash -> query_cache.query_hash' + changes: + - addColumn: + tableName: query_execution + columns: + - column: + name: cache_hash + type: ${blob.type} + constraints: + nullable: true + remarks: 'Hash of normalized query, calculated in middleware.cache' + # >>>>>>>>>> DO NOT ADD NEW MIGRATIONS BELOW THIS LINE! ADD THEM ABOVE <<<<<<<<<< diff --git a/src/metabase/query_processor/middleware/cache.clj b/src/metabase/query_processor/middleware/cache.clj index 168a7b8f6c3..8fcf36eb12a 100644 --- a/src/metabase/query_processor/middleware/cache.clj +++ b/src/metabase/query_processor/middleware/cache.clj @@ -107,13 +107,17 @@ (add-object-to-cache! (if (map? result) (m/dissoc-in result [:data :rows]) {})) - (let [duration-ms (- (System/currentTimeMillis) start-time)] - (log/info (trs "Query took {0} to run; minimum for cache eligibility is {1}" - (u/format-milliseconds duration-ms) (u/format-milliseconds (min-duration-ms)))) - (when (and @has-rows? - (> duration-ms (min-duration-ms))) + (let [duration-ms (- (System/currentTimeMillis) start-time) + eligible? (and @has-rows? + (> duration-ms (min-duration-ms)))] + (log/infof "Query took %s to run; minimum for cache eligibility is %s; %s" + (u/format-milliseconds duration-ms) + (u/format-milliseconds (min-duration-ms)) + (if eligible? "eligible" "not eligible")) + (when eligible? (cache-results! query-hash))) - (rf result)) + (rf (cond-> result + (map? result) (assoc-in [:cache/details :hash] query-hash)))) ([acc row] (add-object-to-cache! row) @@ -126,7 +130,7 @@ (defn- cached-results-rff "Reducing function for cached results. Merges the final object in the cached results, the `final-metdata` map, with the reduced value assuming it is a normal metadata map." - [rff] + [rff query-hash] (fn [{:keys [last-ran], :as metadata}] (let [metadata (dissoc metadata :last-ran :cache-version) rf (rff metadata) @@ -141,7 +145,7 @@ result* (-> (if normal-format? (merge-with merge @final-metadata (unreduced result)) (unreduced result)) - (assoc :cached true, :updated_at last-ran))] + (assoc :cache/details {:hash query-hash :cached true :updated_at last-ran}))] (rf (cond-> result* (reduced? result) reduced)))) @@ -164,7 +168,7 @@ (when (and (= (:cache-version metadata) cache-version) reducible-rows) (log/tracef "Reducing cached rows...") - (qp.context/reducef (cached-results-rff rff) context metadata reducible-rows) + (qp.context/reducef (cached-results-rff rff query-hash) context metadata reducible-rows) (log/tracef "All cached rows reduced") ::ok))))) ::miss) @@ -181,8 +185,9 @@ (defn- run-query-with-cache [qp {:keys [cache-ttl middleware], :as query} rff {:keys [reducef], :as context}] - ;; TODO - Query will already have `info.hash` if it's a userland query. I'm not 100% sure it will be the same hash, - ;; because this is calculated after normalization, instead of before + ;; Query will already have `info.hash` if it's a userland query. It's not the same hash, because this is calculated + ;; after normalization, instead of before. This is necessary to make caching work properly with sandboxed users, see + ;; #14388. (let [query-hash (qp.util/query-hash query) result (maybe-reduce-cached-results (:ignore-cached-results? middleware) query-hash cache-ttl rff context)] (when (= result ::miss) diff --git a/src/metabase/query_processor/middleware/process_userland_query.clj b/src/metabase/query_processor/middleware/process_userland_query.clj index 42fed765990..2ea6f23e0c9 100644 --- a/src/metabase/query_processor/middleware/process_userland_query.clj +++ b/src/metabase/query_processor/middleware/process_userland_query.clj @@ -59,10 +59,11 @@ (catch Throwable e (log/error e (trs "Error saving query execution info")))))))) -(defn- save-successful-query-execution! [cached? is_sandboxed? query-execution result-rows] +(defn- save-successful-query-execution! [cache-details is_sandboxed? query-execution result-rows] (let [qe-map (assoc query-execution - :cache_hit (boolean cached?) - :result_rows result-rows + :cache_hit (boolean (:cached cache-details)) + :cache_hash (:hash cache-details) + :result_rows result-rows :is_sandboxed (boolean is_sandboxed?))] (save-query-execution! qe-map))) @@ -74,14 +75,15 @@ ;;; | Middleware | ;;; +----------------------------------------------------------------------------------------------------------------+ -(defn- success-response [{query-hash :hash, :as query-execution} {cached? :cached, :as result}] +(defn- success-response [{query-hash :hash, :as query-execution} {cache :cache/details :as result}] (merge (-> query-execution add-running-time (dissoc :error :hash :executor_id :action_id :is_sandboxed :card_id :dashboard_id :pulse_id :result_rows :native)) - result - {:status :completed - :average_execution_time (when cached? + (dissoc result :cache/details) + {:cached (boolean (:cached cache)) + :status :completed + :average_execution_time (when (:cached cache) (query/average-execution-time-ms query-hash))})) (defn- add-and-save-execution-info-xform! [execution-info rf] @@ -92,12 +94,13 @@ ([] (rf)) - ([acc] ;; We don't actually have a guarantee that it's from a card just because it's userland + ([acc] + ;; We don't actually have a guarantee that it's from a card just because it's userland (when (integer? (:card_id execution-info)) (events/publish-event! :event/card-query {:user-id (:executor_id execution-info) :card-id (:card_id execution-info) :context (:context execution-info)})) - (save-successful-query-execution! (:cached acc) (get-in acc [:data :is_sandboxed]) execution-info @row-count) + (save-successful-query-execution! (:cache/details acc) (get-in acc [:data :is_sandboxed]) execution-info @row-count) (rf (if (map? acc) (success-response execution-info acc) acc))) diff --git a/test/metabase/api/dataset_test.clj b/test/metabase/api/dataset_test.clj index 7a418eec527..362ffd496a2 100644 --- a/test/metabase/api/dataset_test.clj +++ b/test/metabase/api/dataset_test.clj @@ -43,7 +43,7 @@ (m/dissoc-in [:data :results_metadata]) (m/dissoc-in [:data :insights]))] (cond - (contains? #{:id :started_at :running_time :hash} k) + (contains? #{:id :started_at :running_time :hash :cache_hash} k) [k (boolean v)] (and (= :data k) (contains? v :native_form)) @@ -109,6 +109,7 @@ :id true :action_id nil :cache_hit false + :cache_hash false :database_id (mt/id) :started_at true :running_time true} diff --git a/test/metabase/query_processor/middleware/cache_test.clj b/test/metabase/query_processor/middleware/cache_test.clj index 06920ed8b27..39e2bded9b5 100644 --- a/test/metabase/query_processor/middleware/cache_test.clj +++ b/test/metabase/query_processor/middleware/cache_test.clj @@ -142,7 +142,7 @@ (let [result (apply run-query* args)] (is (partial= {:status :completed} result)) - (if (:cached result) + (if (:cached (:cache/details result)) :cached :not-cached))) @@ -281,12 +281,13 @@ (run-query) (mt/wait-for-result save-chan) (let [result (run-query*)] - (is (= {:data {} - :cached true - :updated_at #t "2020-02-19T02:31:07.798Z[UTC]" - :row_count 8 - :status :completed} - result))))))) + (is (=? {:data {} + :cache/details {:cached true + :updated_at #t "2020-02-19T02:31:07.798Z[UTC]" + :cache-hash some?} + :row_count 8 + :status :completed} + result))))))) (deftest e2e-test (testing "Test that the caching middleware actually working in the context of the entire QP" @@ -305,13 +306,16 @@ _ (while (a/poll! save-chan)) _ (mt/wait-for-result save-chan) cached-result (qp/process-query query)] - (is (= {:cached true - :updated_at #t "2020-02-19T04:44:26.056Z[UTC]" - :row_count 5 - :status :completed} + (is (=? {:cache/details {:cached true + :updated_at #t "2020-02-19T04:44:26.056Z[UTC]" + :cache-hash some?} + :row_count 5 + :status :completed} (dissoc cached-result :data)) "Results should be cached") - (is (= original-result (dissoc cached-result :cached :updated_at)) + (is (= (seq (-> original-result :cache/details :cache-hash)) + (seq (-> cached-result :cache/details :cache-hash)))) + (is (= (dissoc original-result :cache/details) (dissoc cached-result :cache/details)) "Cached result should be in the same format as the uncached result, except for added keys")))))))) (testing "Cached results don't impact average execution time" (let [query (assoc (mt/mbql-query venues {:order-by [[:asc $id]] :limit 42}) @@ -382,7 +386,7 @@ (qp/process-query query rff context)) (mt/wait-for-result save-chan)) (is (= true - (:cached (qp/process-query query))) + (:cached (:cache/details (qp/process-query query)))) "Results should be cached") (let [uncached-results (with-open [ostream (java.io.PipedOutputStream.) istream (java.io.PipedInputStream. ostream) @@ -416,11 +420,10 @@ (boolean (:cached (qp/process-query query rff context)))) "Query shouldn't be cached after first run with the mock cache in place")) (mt/wait-for-result save-chan)) - (is (= (-> (assoc normal-results :cached true) - (dissoc :updated_at) + (is (= (-> (assoc normal-results :cache/details {:cached true}) (m/dissoc-in [:data :results_metadata :checksum])) (-> (qp/process-query query) - (dissoc :updated_at) + (update :cache/details select-keys [:cached]) (m/dissoc-in [:data :results_metadata :checksum]))) "Query should be cached and results should match those ran without cache")))))) @@ -463,7 +466,9 @@ chan)))) (testing "Run forbidden query again as superuser again, should be cached" (mw.session/with-current-user (mt/user->id :crowberto) - (is (=? {:cached true} + (is (=? {:cache/details {:cached true + :updated_at some? + :cache-hash some?}} (run-forbidden-query))))) (testing "Run query as regular user, should get perms Exception even though result is cached" (is (thrown-with-msg? diff --git a/test/metabase/query_processor/middleware/process_userland_query_test.clj b/test/metabase/query_processor/middleware/process_userland_query_test.clj index c3b5b5ac954..521c9651c91 100644 --- a/test/metabase/query_processor/middleware/process_userland_query_test.clj +++ b/test/metabase/query_processor/middleware/process_userland_query_test.clj @@ -58,7 +58,8 @@ :json_query query :average_execution_time nil :context nil - :running_time true} + :running_time true + :cached false} (process-userland-query query)) "Result should have query execution info") (is (= {:hash "29f0bca06d6679e873b1f5a3a36dac18a5b4642c6545d24456ad34b1cad4ecc6" @@ -75,6 +76,7 @@ :context nil :running_time true :cache_hit false + :cache_hash nil ;; this is filled only for eligible queries :dashboard_id nil} (qe)) "QueryExecution should be saved")))) diff --git a/test/metabase/query_processor/streaming_test.clj b/test/metabase/query_processor/streaming_test.clj index db163fbced1..ad8c769e0c9 100644 --- a/test/metabase/query_processor/streaming_test.clj +++ b/test/metabase/query_processor/streaming_test.clj @@ -51,7 +51,8 @@ (defn- compare-results [export-format query] (is (= (expected-results* export-format query) - (actual-results* export-format query)))) + (cond-> (actual-results* export-format query) + (= export-format :api) (dissoc :cached))))) (deftest streaming-response-test (testing "Test that the actual results going thru the same steps as an API response are correct." diff --git a/test/metabase/query_processor_test.clj b/test/metabase/query_processor_test.clj index 6043b17cd00..4dc407fe392 100644 --- a/test/metabase/query_processor_test.clj +++ b/test/metabase/query_processor_test.clj @@ -20,7 +20,7 @@ :cache-ttl 10) run-query (fn [] (let [results (qp/process-query query)] - {:cached? (boolean (:cached results)) + {:cached? (boolean (:cached (:cache/details results))) :num-rows (count (mt/rows results))})) expected-results (qp/preprocess query)] (testing "Check preprocess before caching to make sure results make sense" -- GitLab