From 775d9c941d938d4aa78f54f16c88da4806f89937 Mon Sep 17 00:00:00 2001
From: Chris Truter <crisptrutski@users.noreply.github.com>
Date: Wed, 18 Sep 2024 17:56:15 +0200
Subject: [PATCH] Some polish around experiment search (#47986)

---
 dev/src/dev/search.clj                      | 33 +++++++++++----------
 src/metabase/search.clj                     | 20 ++++++-------
 src/metabase/search/postgres/core.clj       | 23 ++++++++------
 src/metabase/search/postgres/index.clj      | 12 ++++----
 test/metabase/search/postgres/core_test.clj | 19 +++++++-----
 5 files changed, 60 insertions(+), 47 deletions(-)

diff --git a/dev/src/dev/search.clj b/dev/src/dev/search.clj
index 7472bfe5d67..58dc51aa29d 100644
--- a/dev/src/dev/search.clj
+++ b/dev/src/dev/search.clj
@@ -25,37 +25,38 @@
   (t2/count :search_index)
 
   ;; doesn't work, need to drop to lower level postgres functions
-  (basic-view (search.postgres/hybrid "satis:*"))
+  (basic-view (#'search.postgres/hybrid "satis:*"))
 
   ;; nope, neither get it as the lexeme is not similar enough
-  (basic-view (search.postgres/hybrid "satisfactory"))
+  (basic-view (#'search.postgres/hybrid "satisfactory"))
   (basic-view (legacy-results "satisfactory"))
 
   (defn- mini-bench [n engine search-term & args]
     #_{:clj-kondo/ignore [:discouraged-var]}
-    (time
-     (dotimes [_ n]
-       (vec
-        (apply
-         (case engine
-           :index-only search.index/search
-           :legacy legacy-results
-           :hybrid search.postgres/hybrid
-           :hybrid-multi search.postgres/hybrid-multi)
-         search-term
-         args)))))
+    (let [f (case engine
+              :index-only   search.index/search
+              :legacy       legacy-results
+              :hybrid       @#'search.postgres/hybrid
+              :hybrid-multi @#'search.postgres/hybrid-multi
+              :minimal      @#'search.postgres/minimal)]
+      (time
+       (dotimes [_ n]
+         (doall (apply f search-term args))))))
 
   (mini-bench 500 :legacy nil)
   (mini-bench 500 :legacy "sample")
   ;; 30x speed-up for test-data on my machine
   (mini-bench 500 :index-only "sample")
+  ;; No noticeaable degradation, without permissions and filters
+  (mini-bench 500 :minimal "sample")
 
   ;; but joining to the "hydrated query" reverses the advantage
   (mini-bench 100 :legacy nil)
   (mini-bench 100 :legacy "sample")
   ;; slower than fetching everything...
   (mini-bench 100 :hybrid "sample")
-  ;; doing both filters... still a little bit more overhead with the join
+  ;; using index + LIKE on the join ... still a little bit more overhead
   (mini-bench 100 :hybrid "sample" {:search-string "sample"})
-  ;; oh! this monstrocity is actually 2x faaster than baseline B-)
-  (mini-bench 100 :hybrid-multi "sample"))
+  ;; oh! this monstrocity is actually 2x faster than baseline B-)
+  (mini-bench 100 :hybrid-multi "sample")
+  (mini-bench 100 :minimal "sample"))
diff --git a/src/metabase/search.clj b/src/metabase/search.clj
index 077040c259b..52940b1c480 100644
--- a/src/metabase/search.clj
+++ b/src/metabase/search.clj
@@ -27,17 +27,17 @@
   []
   (= :postgres (metabase.db/db-type)))
 
+(def ^:private default-engine :in-place)
+
 (defn- query-fn [search-engine]
-  (case search-engine
-    :fulltext (if (is-postgres?)
-                search.postgres/search
-                (do (log/warn ":fulltext search not supported for your AppDb, using :in-place")
-                    search.impl/in-place))
-    :minimal  (if (is-postgres?)
-                search.postgres/search-minimal
-                (do (log/warn ":minimal search not supported for your AppDb, using :in-place")
-                    search.impl/in-place))
-    :in-place search.impl/in-place))
+  (or
+   (case search-engine
+     :fulltext (when (is-postgres?) search.postgres/search)
+     :minimal  (when (is-postgres?) search.postgres/search)
+     :in-place search.impl/in-place)
+
+   (log/warnf "%s search not supported for your AppDb, using %s" search-engine default-engine)
+   default-engine))
 
 (defn supports-index?
   "Does this instance support a search index, e.g. has the right kind of AppDb"
diff --git a/src/metabase/search/postgres/core.clj b/src/metabase/search/postgres/core.clj
index de50c0ebc10..2ab4c0962e6 100644
--- a/src/metabase/search/postgres/core.clj
+++ b/src/metabase/search/postgres/core.clj
@@ -40,7 +40,7 @@
      :archived?          archived?
      :model-ancestors?   true})))
 
-(defn hybrid
+(defn- hybrid
   "Use the index for appling the search string, but rely on the legacy code path for rendering
   the display data, applying permissions, additional filtering, etc.
 
@@ -60,7 +60,7 @@
       (sql/format {:quoted true})
       t2/reducible-query))
 
-(defn hybrid-multi
+(defn- hybrid-multi
   "Perform multiple legacy searches to see if its faster. Perverse!"
   [search-term & {:as search-ctx}]
   (when-not @#'search.index/initialized?
@@ -88,17 +88,22 @@
        (map :legacy_input)
        (map #(json/parse-string % keyword))))
 
-(defn search-minimal
-  "Perform a basic search that only uses the index"
-  [search-ctx]
-  (minimal (:search-string search-ctx)
-           (dissoc search-ctx :search-string)))
+(def ^:private default-engine hybrid-multi)
+
+(defn- search-fn [search-engine]
+  (case search-engine
+    :hybrid       hybrid
+    :hubrid-multi hybrid-multi
+    :minimal      minimal
+    :fulltext     default-engine
+    default-engine))
 
 (defn search
   "Return a reducible-query corresponding to searching the entities via a tsvector."
   [search-ctx]
-  (hybrid-multi (:search-string search-ctx)
-                (dissoc search-ctx :search-string)))
+  (let [f (search-fn (:search-engine search-ctx))]
+    (f (:search-string search-ctx)
+       (dissoc search-ctx :search-string))))
 
 (defn init!
   "Ensure that the search index exists, and has been populated with all the entities."
diff --git a/src/metabase/search/postgres/index.clj b/src/metabase/search/postgres/index.clj
index 50f3eb8d405..ba593d0f5fa 100644
--- a/src/metabase/search/postgres/index.clj
+++ b/src/metabase/search/postgres/index.clj
@@ -90,7 +90,7 @@
         :collection_id
         :database_id
         :display_data
-        :legacy_data
+        :legacy_input
         :table_id
         :archived])
       (update :display_data json/generate-string)
@@ -175,10 +175,12 @@
   [search-term]
   {:select [:model_id :model]
    :from   [active-table]
-   :where  [:raw
-            "search_vector @@ to_tsquery('"
-            tsv-language "', "
-            [:lift (to-tsquery-expr search-term)] ")"]})
+   :where  (if-not search-term
+             [:= [:inline 1] [:inline 1]]
+             [:raw
+              "search_vector @@ to_tsquery('"
+              tsv-language "', "
+              [:lift (to-tsquery-expr search-term)] ")"])})
 
 (defn search
   "Use the index table to search for records."
diff --git a/test/metabase/search/postgres/core_test.clj b/test/metabase/search/postgres/core_test.clj
index af8550ea5a1..f296eefd240 100644
--- a/test/metabase/search/postgres/core_test.clj
+++ b/test/metabase/search/postgres/core_test.clj
@@ -9,7 +9,7 @@
    [toucan2.realize :as t2.realize]))
 
 (def ^:private hybrid
-  (comp t2.realize/realize search.postgres/hybrid))
+  (comp t2.realize/realize #'search.postgres/hybrid))
 
 (def ^:private hybrid-multi #'search.postgres/hybrid-multi)
 
@@ -18,14 +18,19 @@
 #_{:clj-kondo/ignore [:metabase/test-helpers-use-non-thread-safe-functions]}
 (defmacro with-setup [& body]
   `(when (is-postgres?)
+     ;; TODO add more extensive data to search
      (mt/dataset ~'test-data
        (search.postgres/init! true)
        ~@body)))
 
+(def ^:private example-terms
+  "Search queries which should give consistent, non-trivial results across engines, for the test data."
+  [#_nil "data" "dash" "peop" "venue" "rasta"])
+
 (deftest hybrid-test
   (with-setup
     (testing "consistent results between all searches for certain queries\n"
-      (doseq [term ["satisfaction" "e-commerce" "example" "rasta" "new" "revenue" "collection"]]
+      (doseq [term example-terms]
         (testing (str "consistent results, but not ordering\n" term)
           (is (= (set (legacy-results term))
                  (set (hybrid term)))))))))
@@ -47,19 +52,19 @@
 
 (deftest hybrid-multi-test
   (with-setup
-    (testing "consistent results between both hybrid implementations"
-      (doseq [term ["satisfaction" "e-commerce" "example" "rasta" "new" "revenue" "collection"]]
+    (testing "consistent results between both hybrid implementations\n"
+      (doseq [term example-terms]
         (testing term
           (is (= (hybrid term)
                  (hybrid-multi term))))))))
 
 (defn- remove-time [m]
-  (dissoc m :create_at))
+  (dissoc m :created_at :updated_at :last_edited_at))
 
 (deftest minimal-test
   (with-setup
-    (testing "consistent results between both hybrid implementations"
-      (doseq [term ["satisfaction" "e-commerce" "example" "new" "revenue"]]
+    (testing "consistent results between both hybrid implementations\n"
+      (doseq [term example-terms]
         (testing term
           ;; Timestamps are not strings after round trip, but this doesn't matter
           (is (= (map remove-time (hybrid term))
-- 
GitLab