From 03aff864096db3dffebd0ce869f6297cc4d4bde6 Mon Sep 17 00:00:00 2001
From: appleby <86076+appleby@users.noreply.github.com>
Date: Wed, 4 Sep 2024 19:20:26 -0600
Subject: [PATCH] Store ::nil markers in CachedMetadataProvider for failed
 lookups (#47589)

* Fix typo in MetadataProvider docstring

* Fix typo in caching-test: s/Second/Third/

* Store ::nil markers in CachedMetadataProvider for failed lookups

Store ::nil markers in CachedMetadataProvider for any ids for which the wrapped/uncached upstream provider fails to return
metadatas. This prevents repeatedly querying the uncached-provider for ids that don't exist.

The downside is that if the uncached-provider suddenly starts returning metadata for an id that previously did not
exist, we won't pick up on it, but the assumption here is that this is no different / worse than cache invalidation
for existing ids that happen to change after we cache them.

* PR suggestion: remove unnecessary test assertion
---
 .../lib/metadata/cached_provider.cljc         | 14 +++++++---
 src/metabase/lib/metadata/protocols.cljc      |  2 +-
 .../lib/metadata/cached_provider_test.cljc    | 26 +++++++++++++++----
 3 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/src/metabase/lib/metadata/cached_provider.cljc b/src/metabase/lib/metadata/cached_provider.cljc
index d01f3c2119a..1c6c25be59a 100644
--- a/src/metabase/lib/metadata/cached_provider.cljc
+++ b/src/metabase/lib/metadata/cached_provider.cljc
@@ -58,9 +58,17 @@
           (log/tracef "Already fetched %s: %s" metadata-type (pr-str (sort (set/intersection (set ids) existing-ids))))
           (when (seq missing-ids)
             (log/tracef "Need to fetch %s: %s" metadata-type (pr-str (sort missing-ids)))
-            ;; TODO -- we should probably store `::nil` markers for things we tried to fetch that didn't exist
-            (doseq [instance (lib.metadata.protocols/metadatas uncached-provider metadata-type missing-ids)]
-              (store-in-cache! cache [metadata-type (:id instance)] instance))))))
+            (let [fetched-metadatas (lib.metadata.protocols/metadatas uncached-provider metadata-type missing-ids)
+                  fetched-ids       (map :id fetched-metadatas)
+                  unfetched-ids     (set/difference (set missing-ids) (set fetched-ids))]
+              (when (seq fetched-ids)
+                (log/tracef "Fetched %s: %s" metadata-type (pr-str (sort fetched-ids)))
+                (doseq [instance fetched-metadatas]
+                  (store-in-cache! cache [metadata-type (:id instance)] instance)))
+              (when (seq unfetched-ids)
+                (log/tracef "Failed to fetch %s: %s" metadata-type (pr-str (sort unfetched-ids)))
+                (doseq [unfetched-id unfetched-ids]
+                  (store-in-cache! cache [metadata-type unfetched-id] ::nil))))))))
     (into []
           (keep (fn [id]
                   (get-in-cache cache [metadata-type id])))
diff --git a/src/metabase/lib/metadata/protocols.cljc b/src/metabase/lib/metadata/protocols.cljc
index 4fbdaabc606..16cb46a6772 100644
--- a/src/metabase/lib/metadata/protocols.cljc
+++ b/src/metabase/lib/metadata/protocols.cljc
@@ -9,7 +9,7 @@
 
 (#?(:clj p/defprotocol+ :cljs defprotocol) MetadataProvider
   "Protocol for something that we can get information about Tables and Fields
-  from. This can be provided in various ways various ways:
+  from. This can be provided in various ways:
 
   1. By raw metadata attached to the query itself
 
diff --git a/test/metabase/lib/metadata/cached_provider_test.cljc b/test/metabase/lib/metadata/cached_provider_test.cljc
index 86d09771767..dc951bdf0b8 100644
--- a/test/metabase/lib/metadata/cached_provider_test.cljc
+++ b/test/metabase/lib/metadata/cached_provider_test.cljc
@@ -10,15 +10,19 @@
 
 (deftest ^:parallel caching-test
   (let [fetch-count (atom 0)
+        missing-id  123
         provider    (lib.metadata.cached-provider/cached-metadata-provider
                      (reify
                        lib.metadata.protocols/MetadataProvider
                        (metadatas [_this metadata-type ids]
                          (case metadata-type
-                           :metadata/table (for [id ids]
-                                             (do
-                                               (swap! fetch-count inc)
-                                               (assoc (meta/table-metadata :venues) :id id)))))))]
+                           :metadata/table
+                           (->> (for [id ids]
+                                  (do
+                                    (swap! fetch-count inc)
+                                    (when (not= id missing-id)
+                                      (assoc (meta/table-metadata :venues) :id id))))
+                                (filter some?))))))]
     (testing "Initial fetch"
       (is (=? {:id 1}
               (lib.metadata.protocols/table provider 1)))
@@ -29,7 +33,7 @@
               (lib.metadata.protocols/table provider 1)))
       (is (= 1
              @fetch-count)))
-    (testing "Second fetch"
+    (testing "Third fetch"
       (is (=? {:id 1}
               (lib.metadata.protocols/table provider 1)))
       (is (= 1
@@ -50,6 +54,18 @@
                  {:id 2}]
                 (lib.metadata.protocols/metadatas provider :metadata/table #{1 2})))
         (is (= 2
+               @fetch-count)))
+      (testing "Fetch a missing id, first fetch should inc fetch count"
+        (let [results (lib.metadata.protocols/metadatas provider :metadata/table #{1 missing-id})]
+          (is (=? [{:id 1}]
+                  results)))
+        (is (= 3
+               @fetch-count)))
+      (testing "Fetch a missing id, second fetch should not inc fetch count"
+        (let [results (lib.metadata.protocols/metadatas provider :metadata/table #{1 missing-id})]
+          (is (=? [{:id 1}]
+                  results)))
+        (is (= 3
                @fetch-count))))))
 
 (deftest ^:parallel equality-test
-- 
GitLab