diff --git a/.clj-kondo/config.edn b/.clj-kondo/config.edn index b33a1249d2f7a3027eeff13ad721b0c1db1850b3..58124caaf2deeb238a7c14b776832b12faa6fb91 100644 --- a/.clj-kondo/config.edn +++ b/.clj-kondo/config.edn @@ -611,7 +611,9 @@ toucan.models models}}} :lint-as - {clojure.core.logic/defne clj-kondo.lint-as/def-catch-all + {cljs.cache/defcache clojure.core/deftype + clojure.core.cache/defcache clojure.core/deftype + clojure.core.logic/defne clj-kondo.lint-as/def-catch-all clojure.test.check.clojure-test/defspec clojure.test/deftest clojurewerkz.quartzite.jobs/defjob clojure.core/defn honeysql.helpers/defhelper clj-kondo.lint-as/def-catch-all diff --git a/src/metabase/lib/js/metadata.cljs b/src/metabase/lib/js/metadata.cljs index 5be301e3494fb8bbef88555d453fd2b037226f94..14f4ac1cf676b489138b479f563d2821eb4ad291 100644 --- a/src/metabase/lib/js/metadata.cljs +++ b/src/metabase/lib/js/metadata.cljs @@ -25,12 +25,6 @@ ;;; ;;; where keys are a map of String ID => metadata -;; TODO: This is always wrapped with `keyword` in its usage so that may as well be memoized too. -(def ^:private ^{:arglists '([k])} memoized-kebab-key - "Even tho [[u/->kebab-case-en]] has LRU memoization, plain memoization is significantly faster, and since the keys - we're parsing here are bounded it's fine to memoize this stuff forever." - (memoize u/->kebab-case-en)) - (defn- object-get [obj k] (when (and obj (js-in k obj)) (gobject/get obj k))) @@ -108,7 +102,7 @@ (comp ;; convert keys to kebab-case keywords (map (fn [[k v]] - [(cond-> (keyword (memoized-kebab-key k)) + [(cond-> (keyword (u/->kebab-case-en k)) rename-key (#(or (rename-key %) %))) v])) ;; remove [[excluded-keys]] @@ -244,7 +238,7 @@ [m] (obj->clj (map (fn [[k v]] - (let [k (keyword (memoized-kebab-key k)) + (let [k (keyword (u/->kebab-case-en k)) k (if (= k :binning-strategy) :strategy k) diff --git a/src/metabase/util.cljc b/src/metabase/util.cljc index a36c0e6b5134fa7678e13e1f1f8adcb23d67ab29..1b2c6906af805c0dd1ebcacb0cb9507be2d4146c 100644 --- a/src/metabase/util.cljc +++ b/src/metabase/util.cljc @@ -219,24 +219,27 @@ (def ^{:arglists '([x])} ->kebab-case-en "Like [[camel-snake-kebab.core/->kebab-case]], but always uses English for lower-casing, supports keywords with namespaces, and returns `nil` when passed `nil` (rather than throwing an exception)." - (memoize/lru (wrap-csk-conversion-fn-to-handle-nil-and-namespaced-keywords ->kebab-case-en*) :lru/threshold 256)) + (memoize/fast-bounded (wrap-csk-conversion-fn-to-handle-nil-and-namespaced-keywords ->kebab-case-en*) + :bounded/threshold 10000)) (def ^{:arglists '([x])} ->snake_case_en "Like [[camel-snake-kebab.core/->snake_case]], but always uses English for lower-casing, supports keywords with namespaces, and returns `nil` when passed `nil` (rather than throwing an exception)." - (memoize/lru (wrap-csk-conversion-fn-to-handle-nil-and-namespaced-keywords ->snake_case_en*) :lru/threshold 256)) + (memoize/fast-bounded (wrap-csk-conversion-fn-to-handle-nil-and-namespaced-keywords ->snake_case_en*) + :bounded/threshold 10000)) (def ^{:arglists '([x])} ->camelCaseEn "Like [[camel-snake-kebab.core/->camelCase]], but always uses English for upper- and lower-casing, supports keywords with namespaces, and returns `nil` when passed `nil` (rather than throwing an exception)." - (memoize/lru (wrap-csk-conversion-fn-to-handle-nil-and-namespaced-keywords ->camelCaseEn*) :lru/threshold 256)) + (memoize/fast-bounded (wrap-csk-conversion-fn-to-handle-nil-and-namespaced-keywords ->camelCaseEn*) + :bounded/threshold 10000)) (def ^{:arglists '([x])} ->SCREAMING_SNAKE_CASE_EN "Like [[camel-snake-kebab.core/->SCREAMING_SNAKE_CASE]], but always uses English for upper- and lower-casing, supports keywords with namespaces, and returns `nil` when passed `nil` (rather than throwing an exception)." - (memoize/lru (wrap-csk-conversion-fn-to-handle-nil-and-namespaced-keywords ->SCREAMING_SNAKE_CASE_EN*) - :lru/threshold 256)) + (memoize/fast-bounded (wrap-csk-conversion-fn-to-handle-nil-and-namespaced-keywords ->SCREAMING_SNAKE_CASE_EN*) + :bounded/threshold 10000)) (defn capitalize-first-char "Like string/capitalize, only it ignores the rest of the string diff --git a/src/metabase/util/memoize.clj b/src/metabase/util/memoize.clj deleted file mode 100644 index 37f05bf0df327ff3b70c2ed19fc51b31ccdab1b3..0000000000000000000000000000000000000000 --- a/src/metabase/util/memoize.clj +++ /dev/null @@ -1,12 +0,0 @@ -(ns metabase.util.memoize - (:require - [clojure.core.memoize :as memoize] - [metabase.shared.util.namespaces :as shared.ns])) - -(comment - memoize/keep-me) - -(shared.ns/import-fns - [memoize - lru - memoizer]) diff --git a/src/metabase/util/memoize.cljc b/src/metabase/util/memoize.cljc new file mode 100644 index 0000000000000000000000000000000000000000..c61fa2c335211b52c3a25cb897dc07064b952428 --- /dev/null +++ b/src/metabase/util/memoize.cljc @@ -0,0 +1,83 @@ +(ns metabase.util.memoize + "Cross-platform version of `clojure.core.memoize`. + + In CLJ this re-exports some of the public functions from `clojure.core.memoize`. + In CLJS it implements the same design in CLJS. + + There are also some extra memoization tools here, like [[fast-memo]] and [[fast-bounded]]." + (:require + [metabase.shared.util.namespaces :as shared.ns] + [metabase.util.memoize.impl.bounded :as bounded] + #?@(:clj ([clojure.core.memoize :as memoize]) + :cljs ([metabase.util.memoize.impl.js :as memoize])))) + +(shared.ns/import-fns + [memoize + lru + memoizer + memo]) + +(defn bounded + "Memoizes a function with zero overhead on a *hit*, but keeping to a bounded size. + + This is intended for functions that have many calls and nearly always *hit*. We want to use [[memo]] but are concerned + about leaking memory if the set of inputs isn't bounded. + + It works by checking the cache size only on a *miss*: if it has reached the (configurable) threshold, then **discard + the cache** and start over with an empty map. There's no bookkeeping overhead on a *hit*, which keeps the frequent + hits very fast. The cost is that it will occasionally recompute even the most frequently hit values. + + The threshold is intended to be big enough to hold everything forever! It's just there as a safety valve in case + the input set is larger than expected. + + This is not quite a drop-in replacement for [[memo]] (or `clojure.core/memoize`) because those will *never* call the + inner function twice for the same arguments, but this will. + + Default `:bounded/threshold` is 1024." + ([f] (bounded f :bounded/threshold 1024)) + ([f tkey threshold] + (assert (= tkey :bounded/threshold) (str "wrong parameter tkey " tkey)) + (memoize/memoizer f (bounded/bounded-cache-factory {} threshold) {}))) + +(defn fast-memo + "Variant of [[memo]] that optimizes a common special case: a function with only a single argument, where that + argument makes a good map key. + + In CLJ, this uses `ConcurrentHashMap.computeIfAbsent` in Clojure for a significant speedup. + Note that this also doesn't support the memoization API like `memo-swap!`, `memoized?` etc. + The key should have a good `Object.hashCode`; Clojure values like strings and keywords have this built in. + + See this thread for an analysis of the performance https://metaboat.slack.com/archives/C04CYTEL9N2/p1702671632956539 + + In CLJS, this is identical to [[memo]], but it's defined in both dialects for convenience." + [f] + #?(:clj (let [cache (java.util.concurrent.ConcurrentHashMap.) + mapping-fn (reify java.util.function.Function + (apply [_this k] + (f k)))] + (fn [k] + (.computeIfAbsent cache k mapping-fn))) + :cljs (memoize/memo f))) + +(defn fast-bounded + "Variant of [[bounded]] that optimizes a common special case: a function with only a single argument, where that + argument makes a good map key. + + In CLJ, this uses `ConcurrentHashMap.computeIfAbsent` in Clojure for a significant speedup. + Note that this also doesn't support the memoization API like `memo-swap!`, `memoized?` etc. + The key should have a good `Object.hashCode`; Clojure values like strings and keywords have this built in. + + In CLJS, this is identical to [[bounded]], but it's defined in both dialects for convenience." + ([f] (fast-bounded f :bounded/threshold 1024)) + ([f tkey threshold] + (assert (= tkey :bounded/threshold) (str "wrong parameter tkey " tkey)) + #?(:clj (let [cache (java.util.concurrent.ConcurrentHashMap.) + mapping-fn (reify java.util.function.Function + (apply [_this k] + (f k)))] + (fn [k] + (when (>= (.size cache) threshold) + ;; If the cache gets too large, empty it in place. + (.clear cache)) + (.computeIfAbsent cache k mapping-fn))) + :cljs (bounded f tkey threshold)))) diff --git a/src/metabase/util/memoize/impl/bounded.cljc b/src/metabase/util/memoize/impl/bounded.cljc new file mode 100644 index 0000000000000000000000000000000000000000..d10acbc5a7199e02cfc23991cdf4ddc0c797aba3 --- /dev/null +++ b/src/metabase/util/memoize/impl/bounded.cljc @@ -0,0 +1,32 @@ +(ns metabase.util.memoize.impl.bounded + (:require + [metabase.util.log :as log] + #?(:clj [clojure.core.cache :as cache] + :cljs [cljs.cache :as cache]))) + +(cache/defcache BoundedCache [cache threshold] + cache/CacheProtocol + (lookup [_ item] (get cache item)) + (lookup [_ item not-found] (get cache item not-found)) + (has? [_ item] (contains? cache item)) + (hit [this _item] this) + (miss [_ item result] + (if (< (count cache) threshold) + (BoundedCache. (assoc cache item result) threshold) + ;; It's too big! Throw away the original cache and start over! + (do + (log/warnf "BoundedCache threshold (%d) exceeded - runaway cache? threshold too small?" threshold) + (BoundedCache. {item result} threshold)))) + (evict [_ item] (BoundedCache. (dissoc cache item) threshold)) + (seed [_ base] + (assert (< (count base) threshold) "cache seed is larger than the threshold") + (BoundedCache. base threshold))) + +(defn bounded-cache-factory + "Create a bounded [[clojure.core.cache]]-compatible cache. This is a basic map with no bookkeeping on `hit` but + that will completely discard the cache if it exceeds the `threshold` size. + + Intended to be used with [[metabase.util.memoize/bounded]], rather than called directly." + [base threshold] + {:pre [(map? base)]} + (BoundedCache. base threshold)) diff --git a/src/metabase/util/memoize.cljs b/src/metabase/util/memoize/impl/js.cljs similarity index 68% rename from src/metabase/util/memoize.cljs rename to src/metabase/util/memoize/impl/js.cljs index e30f84748fb4fe7750ed524ce2090b379f9fefbe..38638a32d6bbd934dd7df4b0782078b5487e4108 100644 --- a/src/metabase/util/memoize.cljs +++ b/src/metabase/util/memoize/impl/js.cljs @@ -1,6 +1,9 @@ -(ns metabase.util.memoize - "Copied from clojure.core.memoize." - (:require [cljs.cache :as cache])) +(ns metabase.util.memoize.impl.js + "CLJS-specific implementation code for [[metabase.util.memoize]]. + + Not to be referenced from other code!" + (:require + [cljs.cache :as cache])) ;; Similar to clojure.lang.Delay, but will not memoize an exception and will ;; instead retry. @@ -128,3 +131,39 @@ ([f base key threshold] (assert (= key :lru/threshold) (str "wrong parameter key " key)) (memoizer f (cache/lru-cache-factory {} :threshold threshold) base))) + +(defn memo + "Used as a more flexible alternative to Clojure's core `memoization` + function. Memoized functions built using `memo` will respond to + the core.memo manipulable memoization utilities. As a nice bonus, + you can use `memo` in place of `memoize` without any additional + changes, with the added guarantee that the memoized function will + only be called once for a given sequence of arguments (`memoize` + can call the function multiple times when concurrent calls are + made with the same sequence of arguments). + + The default way to use this function is to simply supply a function + that will be memoized. Additionally, you may also supply a map + of the form `'{[42] 42, [108] 108}` where keys are a vector + mapping expected argument values to arity positions. The map values + are the return values of the memoized function. + + If the supplied function has metadata containing an + `:clojure.core.memoize/args-fn` key, the value is assumed to be a + function that should be applied to the arguments to produce a + subset or transformed sequence of arguments that are used for the + key in the cache (the full, original arguments will still be used + to call the function). This allows you to memoize functions where + one or more arguments are irrelevant for memoization, such as the + `clojure.java.jdbc` functions, whose first argument may include + a (mutable) JDBC `Connection` object: + + (memo/memo (with-meta jdbc/execute! {::memo/args-fn rest})) + + You can access the memoization cache directly via the `:clojure.core.memoize/cache` key + on the memoized function's metadata. However, it is advised to + use the core.memo primitives instead as implementation details may + change over time." + ([f] (memo f {})) + ([f seed] + (memoizer f (cache/basic-cache-factory {}) seed))) diff --git a/test/metabase/util/memoize_test.cljc b/test/metabase/util/memoize_test.cljc new file mode 100644 index 0000000000000000000000000000000000000000..bc5afc7e861909162b2ce180d151bf034792d432 --- /dev/null +++ b/test/metabase/util/memoize_test.cljc @@ -0,0 +1,75 @@ +(ns metabase.util.memoize-test + (:require + [clojure.string :as str] + [clojure.test :refer [deftest is testing]] + [clojure.test.check.generators :as gen] + [metabase.util.memoize :as memo])) + +(defn- call-count-harness [memoizer inner-fn] + (let [misses (atom 0) + calls (atom 0) + wrapped-inner (fn [x] + (swap! misses inc) + (inner-fn x)) + memoized (memoizer wrapped-inner) + wrapped-memo (fn [x] + (swap! calls inc) + (memoized x))] + {:misses misses + :calls calls + :f wrapped-memo + :reset (fn [] + (reset! misses 0) + (reset! calls 0))})) + +(defn- round-robin [times keyspace f exp-fn] + (let [exp-map (into {} (map (juxt identity exp-fn)) keyspace)] + (dotimes [_ times] + (doseq [k keyspace] + (is (= (get exp-map k ::not-found) + (f k))))))) + +(defn- gen-keyspace [n] + (-> (gen/set (gen/such-that seq gen/string) {:num-elements n}) + (gen/sample 1) + first + vec)) + +(defn- never-evicts [memoizer] + (let [keyspace (gen-keyspace 100) + {:keys [f calls misses]} (call-count-harness memoizer str/reverse)] + ;; Now we deliberately call f for each entry in keyspace, ten times around. + (round-robin 10 keyspace f str/reverse) + (is (= 100 @misses) + "should be as many misses as unique inputs") + (is (= 1000 @calls) + "and 10 calls for each unique input"))) + +(deftest ^:parallel memoize-never-evicts-test + (never-evicts memo/memo)) + +(deftest ^:parallel fast-memoize-never-evicts-test + (never-evicts memo/fast-memo)) + +(deftest ^:parallel bounded-never-evicts-if-large-enough-test + (never-evicts #(memo/bounded % :bounded/threshold 100))) + +(deftest ^:parallel fast-bounded-never-evicts-if-large-enough-test + (never-evicts #(memo/fast-bounded % :bounded/threshold 101))) + +(deftest ^:parallel bounded-evicts-when-keyspace-overflows-test + (let [keyspace (gen-keyspace 100) + {:keys [f calls misses reset]} (call-count-harness #(memo/bounded % :bounded/threshold 50) str/reverse)] + ;; 10 round-robin calls will expect to evict twice per lap, and never hit! + (testing "10 round-robin calls will never hit" + (round-robin 10 keyspace f str/reverse) + (is (= 1000 @misses)) + (is (= 1000 @calls))) + (testing "10000 randomly sampled calls will hit sometimes" + ;; This hits 25-30% of the time, empirically. + (reset) + (doseq [k (gen/sample (gen/elements keyspace) 10000)] + (is (= (str/reverse k) + (f k)))) + (is (= @calls 10000)) + (is (< @misses 10000)))))