From d53e7e493b989d2776345ee782cd41653c35faf8 Mon Sep 17 00:00:00 2001
From: lbrdnk <lbrdnk@users.noreply.github.com>
Date: Mon, 8 Jul 2024 16:18:25 +0200
Subject: [PATCH] Add tables pre-fetching to dashboard load (#45107)

* Move collect-source-tables to util

* Add table prefetching

* Add test comparing call counts of select

* Update comment

* Add exception handling

* Add nil handling

* Handle possibye empty table-ids

* Update exception handling and test case

* Update src/metabase/models/card.clj

Co-authored-by: Braden Shepherdson <braden@metabase.com>

---------

Co-authored-by: Braden Shepherdson <braden@metabase.com>
---
 src/metabase/api/query_metadata.clj  | 10 +------
 src/metabase/lib/util.cljc           |  9 ++++++
 src/metabase/models/card.clj         | 26 ++++++++++++++++++
 test/metabase/api/dashboard_test.clj | 41 ++++++++++++++++++++++++++++
 4 files changed, 77 insertions(+), 9 deletions(-)

diff --git a/src/metabase/api/query_metadata.clj b/src/metabase/api/query_metadata.clj
index f3c7f7f61e5..8ebd24572fa 100644
--- a/src/metabase/api/query_metadata.clj
+++ b/src/metabase/api/query_metadata.clj
@@ -57,14 +57,6 @@
    :fields (->> (:field dependents)
                 metadata-for-field-dependents)})
 
-(defn- collect-source-tables
-  [query]
-  (let [from-joins (mapcat collect-source-tables (:joins query))]
-    (if-let [source-query (:source-query query)]
-      (concat (collect-source-tables source-query) from-joins)
-      (cond->> from-joins
-        (:source-table query) (cons (:source-table query))))))
-
 (defn- dependents-for-cards [cards]
   (let [cards-by-db (group-by :database_id cards)
         db->mp (into {} (map (juxt identity lib.metadata.jvm/application-database-metadata-provider)
@@ -72,7 +64,7 @@
     (doseq [[database-id cards] cards-by-db]
       (let [{:keys [card-ids table-ids]}
             (->> cards
-                 (mapcat (comp collect-source-tables #(-> % :dataset_query :query)))
+                 (mapcat (comp lib.util/collect-source-tables #(-> % :dataset_query :query)))
                  partition-table-ids)
             real-card-ids (filter pos-int? (concat (map :id cards) card-ids)) ; xray cards don't have real ids
             mp (db->mp database-id)
diff --git a/src/metabase/lib/util.cljc b/src/metabase/lib/util.cljc
index 8e6cede92b5..81ab4f39552 100644
--- a/src/metabase/lib/util.cljc
+++ b/src/metabase/lib/util.cljc
@@ -627,3 +627,12 @@
    (into #{}
          (comp cat (filter some?))
          (lib.util.match/match coll [:field opts (id :guard int?)] [id (:source-field opts)]))))
+
+(defn collect-source-tables
+  "Return sequence of source tables from `query`."
+  [query]
+  (let [from-joins (mapcat collect-source-tables (:joins query))]
+    (if-let [source-query (:source-query query)]
+      (concat (collect-source-tables source-query) from-joins)
+      (cond->> from-joins
+        (:source-table query) (cons (:source-table query))))))
diff --git a/src/metabase/models/card.clj b/src/metabase/models/card.clj
index 6cc97616013..41309adae49 100644
--- a/src/metabase/models/card.clj
+++ b/src/metabase/models/card.clj
@@ -15,6 +15,8 @@
    [metabase.events :as events]
    [metabase.legacy-mbql.normalize :as mbql.normalize]
    [metabase.lib.core :as lib]
+   [metabase.lib.metadata.jvm :as lib.metadata.jvm]
+   [metabase.lib.metadata.protocols :as lib.metadata.protocols]
    [metabase.lib.schema.template-tag :as lib.schema.template-tag]
    [metabase.lib.util :as lib.util]
    [metabase.models.audit-log :as audit-log]
@@ -140,6 +142,24 @@
       :mbql/query (-> query lib/normalize lib.util/source-card-id)
       nil)))
 
+(defn- card->integer-table-ids
+  "Return integer source table ids for card's :dataset_query."
+  [card]
+  (when-some [query (-> card :dataset_query :query)]
+    (not-empty (filter pos-int? (lib.util/collect-source-tables query)))))
+
+(defn- prefetch-tables-for-cards!
+  "Collect tables from `dataset-cards` and prefetch metadata. Should be used only with metdata provider caching
+  enabled, as per https://github.com/metabase/metabase/pull/45050. Returns `nil`."
+  [dataset-cards]
+  (let [db-id->table-ids (-> (group-by :database_id dataset-cards)
+                             (update-vals (partial into #{} (comp (mapcat card->integer-table-ids)
+                                                                  (remove nil?)))))]
+    (doseq [[db-id table-ids] db-id->table-ids
+            :let  [mp (lib.metadata.jvm/application-database-metadata-provider db-id)]
+            :when (seq table-ids)]
+      (lib.metadata.protocols/metadatas mp :metadata/table table-ids))))
+
 (defn with-can-run-adhoc-query
   "Adds can_run_adhoc_query to each card."
   [cards]
@@ -147,6 +167,12 @@
         source-card-ids (into #{}
                               (keep (comp source-card-id :dataset_query))
                               dataset-cards)]
+    ;; Prefetching code should not propagate any exceptions.
+    (when lib.metadata.jvm/*metadata-provider-cache*
+      (try
+        (prefetch-tables-for-cards! dataset-cards)
+      (catch Throwable t
+        (log/errorf t "Failed prefething cards `%s`." (pr-str (map :id dataset-cards))))))
     (binding [query-perms/*card-instances*
               (when (seq source-card-ids)
                 (t2/select-fn->fn :id identity [Card :id :collection_id] :id [:in source-card-ids]))]
diff --git a/test/metabase/api/dashboard_test.clj b/test/metabase/api/dashboard_test.clj
index 0643f1dffd2..f5d5dec7fa5 100644
--- a/test/metabase/api/dashboard_test.clj
+++ b/test/metabase/api/dashboard_test.clj
@@ -4736,3 +4736,44 @@
           (testing "only construct the MetadataProvider once"
             (is (= {(mt/id) 1}
                    @provider-counts))))))))
+
+(deftest ^:synchronized dashboard-table-prefetch-test
+  (t2.with-temp/with-temp
+    [:model/Dashboard     d  {:name "D"}
+     :model/Card          c1 {:name "C1"
+                              :dataset_query {:database (mt/id)
+                                              :type     :query
+                                              :query    {:source-table (mt/id :products)}}}
+     :model/Card          c2 {:name "C2"
+                              :dataset_query {:database (mt/id)
+                                              :type     :query
+                                              :query    {:source-table (mt/id :orders)}}}
+     :model/DashboardCard _  {:dashboard_id       (:id d)
+                              :card_id            (:id c1)}
+     :model/DashboardCard _  {:dashboard_id       (:id d)
+                              :card_id            (:id c2)}]
+    (let [original-select-fn   @#'t2/select
+          uncached-calls-count (atom 0)
+          cached-calls-count   (atom 0)]
+      ;; Get _uncached_ call count of t2/select count for :metadata/table
+      (with-redefs [t2/select (fn [& args]
+                                (when (= :metadata/table (first args))
+                                  (swap! uncached-calls-count inc))
+                                (apply original-select-fn args))]
+        (mt/user-http-request :crowberto :get 200 (format "dashboard/%d" (:id d)))
+        (mt/user-http-request :crowberto :get 200 (format "dashboard/%d/query_metadata" (:id d))))
+      ;; Get _cached_ call count of t2/select count for :metadata/table
+      (let [load-id (str (random-uuid))]
+        (with-redefs [t2/select (fn [& args]
+                                  (when (= :metadata/table (first args))
+                                    (swap! cached-calls-count inc))
+                                  (apply original-select-fn args))]
+          (mt/user-http-request :crowberto :get 200
+                                (format "dashboard/%d?dashboard_load_id=%s" (:id d) load-id))
+          (mt/user-http-request :crowberto :get 200
+                                (format "dashboard/%d/query_metadata?dashboard_load_id=%s" (:id d) load-id))))
+      (testing "Call count for :metadata/table is smaller with caching in place"
+        (is (< @cached-calls-count @uncached-calls-count)))
+      ;; If we need more for _some reason_, this test should be updated accordingly.
+      (testing "At most 1 db call should be executed for :metadata/tables"
+        (is (= @cached-calls-count 1))))))
-- 
GitLab