From d53e7e493b989d2776345ee782cd41653c35faf8 Mon Sep 17 00:00:00 2001 From: lbrdnk <lbrdnk@users.noreply.github.com> Date: Mon, 8 Jul 2024 16:18:25 +0200 Subject: [PATCH] Add tables pre-fetching to dashboard load (#45107) * Move collect-source-tables to util * Add table prefetching * Add test comparing call counts of select * Update comment * Add exception handling * Add nil handling * Handle possibye empty table-ids * Update exception handling and test case * Update src/metabase/models/card.clj Co-authored-by: Braden Shepherdson <braden@metabase.com> --------- Co-authored-by: Braden Shepherdson <braden@metabase.com> --- src/metabase/api/query_metadata.clj | 10 +------ src/metabase/lib/util.cljc | 9 ++++++ src/metabase/models/card.clj | 26 ++++++++++++++++++ test/metabase/api/dashboard_test.clj | 41 ++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+), 9 deletions(-) diff --git a/src/metabase/api/query_metadata.clj b/src/metabase/api/query_metadata.clj index f3c7f7f61e5..8ebd24572fa 100644 --- a/src/metabase/api/query_metadata.clj +++ b/src/metabase/api/query_metadata.clj @@ -57,14 +57,6 @@ :fields (->> (:field dependents) metadata-for-field-dependents)}) -(defn- collect-source-tables - [query] - (let [from-joins (mapcat collect-source-tables (:joins query))] - (if-let [source-query (:source-query query)] - (concat (collect-source-tables source-query) from-joins) - (cond->> from-joins - (:source-table query) (cons (:source-table query)))))) - (defn- dependents-for-cards [cards] (let [cards-by-db (group-by :database_id cards) db->mp (into {} (map (juxt identity lib.metadata.jvm/application-database-metadata-provider) @@ -72,7 +64,7 @@ (doseq [[database-id cards] cards-by-db] (let [{:keys [card-ids table-ids]} (->> cards - (mapcat (comp collect-source-tables #(-> % :dataset_query :query))) + (mapcat (comp lib.util/collect-source-tables #(-> % :dataset_query :query))) partition-table-ids) real-card-ids (filter pos-int? (concat (map :id cards) card-ids)) ; xray cards don't have real ids mp (db->mp database-id) diff --git a/src/metabase/lib/util.cljc b/src/metabase/lib/util.cljc index 8e6cede92b5..81ab4f39552 100644 --- a/src/metabase/lib/util.cljc +++ b/src/metabase/lib/util.cljc @@ -627,3 +627,12 @@ (into #{} (comp cat (filter some?)) (lib.util.match/match coll [:field opts (id :guard int?)] [id (:source-field opts)])))) + +(defn collect-source-tables + "Return sequence of source tables from `query`." + [query] + (let [from-joins (mapcat collect-source-tables (:joins query))] + (if-let [source-query (:source-query query)] + (concat (collect-source-tables source-query) from-joins) + (cond->> from-joins + (:source-table query) (cons (:source-table query)))))) diff --git a/src/metabase/models/card.clj b/src/metabase/models/card.clj index 6cc97616013..41309adae49 100644 --- a/src/metabase/models/card.clj +++ b/src/metabase/models/card.clj @@ -15,6 +15,8 @@ [metabase.events :as events] [metabase.legacy-mbql.normalize :as mbql.normalize] [metabase.lib.core :as lib] + [metabase.lib.metadata.jvm :as lib.metadata.jvm] + [metabase.lib.metadata.protocols :as lib.metadata.protocols] [metabase.lib.schema.template-tag :as lib.schema.template-tag] [metabase.lib.util :as lib.util] [metabase.models.audit-log :as audit-log] @@ -140,6 +142,24 @@ :mbql/query (-> query lib/normalize lib.util/source-card-id) nil))) +(defn- card->integer-table-ids + "Return integer source table ids for card's :dataset_query." + [card] + (when-some [query (-> card :dataset_query :query)] + (not-empty (filter pos-int? (lib.util/collect-source-tables query))))) + +(defn- prefetch-tables-for-cards! + "Collect tables from `dataset-cards` and prefetch metadata. Should be used only with metdata provider caching + enabled, as per https://github.com/metabase/metabase/pull/45050. Returns `nil`." + [dataset-cards] + (let [db-id->table-ids (-> (group-by :database_id dataset-cards) + (update-vals (partial into #{} (comp (mapcat card->integer-table-ids) + (remove nil?)))))] + (doseq [[db-id table-ids] db-id->table-ids + :let [mp (lib.metadata.jvm/application-database-metadata-provider db-id)] + :when (seq table-ids)] + (lib.metadata.protocols/metadatas mp :metadata/table table-ids)))) + (defn with-can-run-adhoc-query "Adds can_run_adhoc_query to each card." [cards] @@ -147,6 +167,12 @@ source-card-ids (into #{} (keep (comp source-card-id :dataset_query)) dataset-cards)] + ;; Prefetching code should not propagate any exceptions. + (when lib.metadata.jvm/*metadata-provider-cache* + (try + (prefetch-tables-for-cards! dataset-cards) + (catch Throwable t + (log/errorf t "Failed prefething cards `%s`." (pr-str (map :id dataset-cards)))))) (binding [query-perms/*card-instances* (when (seq source-card-ids) (t2/select-fn->fn :id identity [Card :id :collection_id] :id [:in source-card-ids]))] diff --git a/test/metabase/api/dashboard_test.clj b/test/metabase/api/dashboard_test.clj index 0643f1dffd2..f5d5dec7fa5 100644 --- a/test/metabase/api/dashboard_test.clj +++ b/test/metabase/api/dashboard_test.clj @@ -4736,3 +4736,44 @@ (testing "only construct the MetadataProvider once" (is (= {(mt/id) 1} @provider-counts)))))))) + +(deftest ^:synchronized dashboard-table-prefetch-test + (t2.with-temp/with-temp + [:model/Dashboard d {:name "D"} + :model/Card c1 {:name "C1" + :dataset_query {:database (mt/id) + :type :query + :query {:source-table (mt/id :products)}}} + :model/Card c2 {:name "C2" + :dataset_query {:database (mt/id) + :type :query + :query {:source-table (mt/id :orders)}}} + :model/DashboardCard _ {:dashboard_id (:id d) + :card_id (:id c1)} + :model/DashboardCard _ {:dashboard_id (:id d) + :card_id (:id c2)}] + (let [original-select-fn @#'t2/select + uncached-calls-count (atom 0) + cached-calls-count (atom 0)] + ;; Get _uncached_ call count of t2/select count for :metadata/table + (with-redefs [t2/select (fn [& args] + (when (= :metadata/table (first args)) + (swap! uncached-calls-count inc)) + (apply original-select-fn args))] + (mt/user-http-request :crowberto :get 200 (format "dashboard/%d" (:id d))) + (mt/user-http-request :crowberto :get 200 (format "dashboard/%d/query_metadata" (:id d)))) + ;; Get _cached_ call count of t2/select count for :metadata/table + (let [load-id (str (random-uuid))] + (with-redefs [t2/select (fn [& args] + (when (= :metadata/table (first args)) + (swap! cached-calls-count inc)) + (apply original-select-fn args))] + (mt/user-http-request :crowberto :get 200 + (format "dashboard/%d?dashboard_load_id=%s" (:id d) load-id)) + (mt/user-http-request :crowberto :get 200 + (format "dashboard/%d/query_metadata?dashboard_load_id=%s" (:id d) load-id)))) + (testing "Call count for :metadata/table is smaller with caching in place" + (is (< @cached-calls-count @uncached-calls-count))) + ;; If we need more for _some reason_, this test should be updated accordingly. + (testing "At most 1 db call should be executed for :metadata/tables" + (is (= @cached-calls-count 1)))))) -- GitLab