From d5af9a22f4ec4e6a3ced661722a095fda10f64b5 Mon Sep 17 00:00:00 2001
From: Chris Truter <crisptrutski@users.noreply.github.com>
Date: Thu, 12 Sep 2024 17:49:20 +0200
Subject: [PATCH] Basic batching for search index population (#47892)

---
 src/metabase/search/postgres/index.clj     | 44 ++++++++++++++--------
 src/metabase/search/postgres/ingestion.clj |  9 +++--
 2 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/src/metabase/search/postgres/index.clj b/src/metabase/search/postgres/index.clj
index 9c134994b57..8c992ba10e8 100644
--- a/src/metabase/search/postgres/index.clj
+++ b/src/metabase/search/postgres/index.clj
@@ -78,24 +78,27 @@
     (drop-table! retired-table)
     true))
 
+(defn- entity->entry [entity]
+  (-> entity
+      (select-keys
+       [:model
+        :model_rank
+        :collection_id
+        :database_id
+        :table_id
+        :archived])
+      (assoc
+       :model_id      (:id entity)
+       :search_vector [:to_tsvector
+                       [:inline tsv-language]
+                       [:cast
+                        (:searchable_text entity)
+                        :text]])))
+
 (defn update!
-  "Create or update the given search index trny"
+  "Create the given search index entries"
   [entity]
-  (let [entry (-> entity
-                  (select-keys
-                   [:model
-                    :model_rank
-                    :collection_id
-                    :database_id
-                    :table_id
-                    :archived])
-                  (assoc
-                   :model_id      (:id entity)
-                   :search_vector [:to_tsvector
-                                   [:inline tsv-language]
-                                   [:cast
-                                    (:searchable_text entity)
-                                    :text]]))]
+  (let [entry (entity->entry entity)]
     (when @initialized?
       (t2/insert! active-table entry))
     (when @reindexing?
@@ -150,6 +153,15 @@
          (str/join " | ")
          maybe-complete)))
 
+(defn batch-update!
+  "Create the given search index entries in bulk"
+  [entities]
+  (let [entries (map entity->entry entities)]
+    (when @initialized?
+      (t2/insert! active-table entries))
+    (when @reindexing?
+      (t2/insert! pending-table entries))))
+
 (defn search-query
   "Query fragment for all models corresponding to a query paramter `:search-term`."
   [search-term]
diff --git a/src/metabase/search/postgres/ingestion.clj b/src/metabase/search/postgres/ingestion.clj
index bb50f288b3e..634243ec5e3 100644
--- a/src/metabase/search/postgres/ingestion.clj
+++ b/src/metabase/search/postgres/ingestion.clj
@@ -11,6 +11,8 @@
    [toucan2.core :as t2]
    [toucan2.realize :as t2.realize]))
 
+(def ^:private insert-batch-size 50)
+
 (def ^:private model-rankings
   (zipmap search.config/models-search-order (range)))
 
@@ -40,7 +42,7 @@
 
 (defn- search-items-reducible []
   (-> {:search-string      nil
-       :models             search.config/all-models
+       :models             (disj search.config/all-models "indexed-entity")
        ;; we want to see everything
        :is-superuser?      true
        ;; irrelevant, as we're acting as a super user
@@ -62,5 +64,6 @@
        (eduction
         (comp
          (map t2.realize/realize)
-         (map ->entry)))
-       (run! search.index/update!)))
+         (map ->entry)
+         (partition-all insert-batch-size)))
+       (run! search.index/batch-update!)))
-- 
GitLab