Skip to content
Snippets Groups Projects
Unverified Commit 775d9c94 authored by Chris Truter's avatar Chris Truter Committed by GitHub
Browse files

Some polish around experiment search (#47986)

parent 7251ee12
No related branches found
No related tags found
No related merge requests found
......@@ -25,37 +25,38 @@
(t2/count :search_index)
;; doesn't work, need to drop to lower level postgres functions
(basic-view (search.postgres/hybrid "satis:*"))
(basic-view (#'search.postgres/hybrid "satis:*"))
;; nope, neither get it as the lexeme is not similar enough
(basic-view (search.postgres/hybrid "satisfactory"))
(basic-view (#'search.postgres/hybrid "satisfactory"))
(basic-view (legacy-results "satisfactory"))
(defn- mini-bench [n engine search-term & args]
#_{:clj-kondo/ignore [:discouraged-var]}
(dotimes [_ n]
(case engine
:index-only search.index/search
:legacy legacy-results
:hybrid search.postgres/hybrid
:hybrid-multi search.postgres/hybrid-multi)
(let [f (case engine
:index-only search.index/search
:legacy legacy-results
:hybrid @#'search.postgres/hybrid
:hybrid-multi @#'search.postgres/hybrid-multi
:minimal @#'search.postgres/minimal)]
(dotimes [_ n]
(doall (apply f search-term args))))))
(mini-bench 500 :legacy nil)
(mini-bench 500 :legacy "sample")
;; 30x speed-up for test-data on my machine
(mini-bench 500 :index-only "sample")
;; No noticeaable degradation, without permissions and filters
(mini-bench 500 :minimal "sample")
;; but joining to the "hydrated query" reverses the advantage
(mini-bench 100 :legacy nil)
(mini-bench 100 :legacy "sample")
;; slower than fetching everything...
(mini-bench 100 :hybrid "sample")
;; doing both filters... still a little bit more overhead with the join
;; using index + LIKE on the join ... still a little bit more overhead
(mini-bench 100 :hybrid "sample" {:search-string "sample"})
;; oh! this monstrocity is actually 2x faaster than baseline B-)
(mini-bench 100 :hybrid-multi "sample"))
;; oh! this monstrocity is actually 2x faster than baseline B-)
(mini-bench 100 :hybrid-multi "sample")
(mini-bench 100 :minimal "sample"))
......@@ -27,17 +27,17 @@
(= :postgres (metabase.db/db-type)))
(def ^:private default-engine :in-place)
(defn- query-fn [search-engine]
(case search-engine
:fulltext (if (is-postgres?)
(do (log/warn ":fulltext search not supported for your AppDb, using :in-place")
:minimal (if (is-postgres?)
(do (log/warn ":minimal search not supported for your AppDb, using :in-place")
:in-place search.impl/in-place))
(case search-engine
:fulltext (when (is-postgres?) search.postgres/search)
:minimal (when (is-postgres?) search.postgres/search)
:in-place search.impl/in-place)
(log/warnf "%s search not supported for your AppDb, using %s" search-engine default-engine)
(defn supports-index?
"Does this instance support a search index, e.g. has the right kind of AppDb"
......@@ -40,7 +40,7 @@
:archived? archived?
:model-ancestors? true})))
(defn hybrid
(defn- hybrid
"Use the index for appling the search string, but rely on the legacy code path for rendering
the display data, applying permissions, additional filtering, etc.
......@@ -60,7 +60,7 @@
(sql/format {:quoted true})
(defn hybrid-multi
(defn- hybrid-multi
"Perform multiple legacy searches to see if its faster. Perverse!"
[search-term & {:as search-ctx}]
(when-not @#'search.index/initialized?
......@@ -88,17 +88,22 @@
(map :legacy_input)
(map #(json/parse-string % keyword))))
(defn search-minimal
"Perform a basic search that only uses the index"
(minimal (:search-string search-ctx)
(dissoc search-ctx :search-string)))
(def ^:private default-engine hybrid-multi)
(defn- search-fn [search-engine]
(case search-engine
:hybrid hybrid
:hubrid-multi hybrid-multi
:minimal minimal
:fulltext default-engine
(defn search
"Return a reducible-query corresponding to searching the entities via a tsvector."
(hybrid-multi (:search-string search-ctx)
(dissoc search-ctx :search-string)))
(let [f (search-fn (:search-engine search-ctx))]
(f (:search-string search-ctx)
(dissoc search-ctx :search-string))))
(defn init!
"Ensure that the search index exists, and has been populated with all the entities."
......@@ -90,7 +90,7 @@
(update :display_data json/generate-string)
......@@ -175,10 +175,12 @@
{:select [:model_id :model]
:from [active-table]
:where [:raw
"search_vector @@ to_tsquery('"
tsv-language "', "
[:lift (to-tsquery-expr search-term)] ")"]})
:where (if-not search-term
[:= [:inline 1] [:inline 1]]
"search_vector @@ to_tsquery('"
tsv-language "', "
[:lift (to-tsquery-expr search-term)] ")"])})
(defn search
"Use the index table to search for records."
......@@ -9,7 +9,7 @@
[toucan2.realize :as t2.realize]))
(def ^:private hybrid
(comp t2.realize/realize search.postgres/hybrid))
(comp t2.realize/realize #'search.postgres/hybrid))
(def ^:private hybrid-multi #'search.postgres/hybrid-multi)
......@@ -18,14 +18,19 @@
#_{:clj-kondo/ignore [:metabase/test-helpers-use-non-thread-safe-functions]}
(defmacro with-setup [& body]
`(when (is-postgres?)
;; TODO add more extensive data to search
(mt/dataset ~'test-data
(search.postgres/init! true)
(def ^:private example-terms
"Search queries which should give consistent, non-trivial results across engines, for the test data."
[#_nil "data" "dash" "peop" "venue" "rasta"])
(deftest hybrid-test
(testing "consistent results between all searches for certain queries\n"
(doseq [term ["satisfaction" "e-commerce" "example" "rasta" "new" "revenue" "collection"]]
(doseq [term example-terms]
(testing (str "consistent results, but not ordering\n" term)
(is (= (set (legacy-results term))
(set (hybrid term)))))))))
......@@ -47,19 +52,19 @@
(deftest hybrid-multi-test
(testing "consistent results between both hybrid implementations"
(doseq [term ["satisfaction" "e-commerce" "example" "rasta" "new" "revenue" "collection"]]
(testing "consistent results between both hybrid implementations\n"
(doseq [term example-terms]
(testing term
(is (= (hybrid term)
(hybrid-multi term))))))))
(defn- remove-time [m]
(dissoc m :create_at))
(dissoc m :created_at :updated_at :last_edited_at))
(deftest minimal-test
(testing "consistent results between both hybrid implementations"
(doseq [term ["satisfaction" "e-commerce" "example" "new" "revenue"]]
(testing "consistent results between both hybrid implementations\n"
(doseq [term example-terms]
(testing term
;; Timestamps are not strings after round trip, but this doesn't matter
(is (= (map remove-time (hybrid term))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment