Skip to content
Snippets Groups Projects
Unverified Commit 775d9c94 authored by Chris Truter's avatar Chris Truter Committed by GitHub
Browse files

Some polish around experiment search (#47986)

parent 7251ee12
No related branches found
No related tags found
No related merge requests found
...@@ -25,37 +25,38 @@ ...@@ -25,37 +25,38 @@
(t2/count :search_index) (t2/count :search_index)
;; doesn't work, need to drop to lower level postgres functions ;; doesn't work, need to drop to lower level postgres functions
(basic-view (search.postgres/hybrid "satis:*")) (basic-view (#'search.postgres/hybrid "satis:*"))
;; nope, neither get it as the lexeme is not similar enough ;; nope, neither get it as the lexeme is not similar enough
(basic-view (search.postgres/hybrid "satisfactory")) (basic-view (#'search.postgres/hybrid "satisfactory"))
(basic-view (legacy-results "satisfactory")) (basic-view (legacy-results "satisfactory"))
(defn- mini-bench [n engine search-term & args] (defn- mini-bench [n engine search-term & args]
#_{:clj-kondo/ignore [:discouraged-var]} #_{:clj-kondo/ignore [:discouraged-var]}
(time (let [f (case engine
(dotimes [_ n] :index-only search.index/search
(vec :legacy legacy-results
(apply :hybrid @#'search.postgres/hybrid
(case engine :hybrid-multi @#'search.postgres/hybrid-multi
:index-only search.index/search :minimal @#'search.postgres/minimal)]
:legacy legacy-results (time
:hybrid search.postgres/hybrid (dotimes [_ n]
:hybrid-multi search.postgres/hybrid-multi) (doall (apply f search-term args))))))
search-term
args)))))
(mini-bench 500 :legacy nil) (mini-bench 500 :legacy nil)
(mini-bench 500 :legacy "sample") (mini-bench 500 :legacy "sample")
;; 30x speed-up for test-data on my machine ;; 30x speed-up for test-data on my machine
(mini-bench 500 :index-only "sample") (mini-bench 500 :index-only "sample")
;; No noticeaable degradation, without permissions and filters
(mini-bench 500 :minimal "sample")
;; but joining to the "hydrated query" reverses the advantage ;; but joining to the "hydrated query" reverses the advantage
(mini-bench 100 :legacy nil) (mini-bench 100 :legacy nil)
(mini-bench 100 :legacy "sample") (mini-bench 100 :legacy "sample")
;; slower than fetching everything... ;; slower than fetching everything...
(mini-bench 100 :hybrid "sample") (mini-bench 100 :hybrid "sample")
;; doing both filters... still a little bit more overhead with the join ;; using index + LIKE on the join ... still a little bit more overhead
(mini-bench 100 :hybrid "sample" {:search-string "sample"}) (mini-bench 100 :hybrid "sample" {:search-string "sample"})
;; oh! this monstrocity is actually 2x faaster than baseline B-) ;; oh! this monstrocity is actually 2x faster than baseline B-)
(mini-bench 100 :hybrid-multi "sample")) (mini-bench 100 :hybrid-multi "sample")
(mini-bench 100 :minimal "sample"))
...@@ -27,17 +27,17 @@ ...@@ -27,17 +27,17 @@
[] []
(= :postgres (metabase.db/db-type))) (= :postgres (metabase.db/db-type)))
(def ^:private default-engine :in-place)
(defn- query-fn [search-engine] (defn- query-fn [search-engine]
(case search-engine (or
:fulltext (if (is-postgres?) (case search-engine
search.postgres/search :fulltext (when (is-postgres?) search.postgres/search)
(do (log/warn ":fulltext search not supported for your AppDb, using :in-place") :minimal (when (is-postgres?) search.postgres/search)
search.impl/in-place)) :in-place search.impl/in-place)
:minimal (if (is-postgres?)
search.postgres/search-minimal (log/warnf "%s search not supported for your AppDb, using %s" search-engine default-engine)
(do (log/warn ":minimal search not supported for your AppDb, using :in-place") default-engine))
search.impl/in-place))
:in-place search.impl/in-place))
(defn supports-index? (defn supports-index?
"Does this instance support a search index, e.g. has the right kind of AppDb" "Does this instance support a search index, e.g. has the right kind of AppDb"
......
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
:archived? archived? :archived? archived?
:model-ancestors? true}))) :model-ancestors? true})))
(defn hybrid (defn- hybrid
"Use the index for appling the search string, but rely on the legacy code path for rendering "Use the index for appling the search string, but rely on the legacy code path for rendering
the display data, applying permissions, additional filtering, etc. the display data, applying permissions, additional filtering, etc.
...@@ -60,7 +60,7 @@ ...@@ -60,7 +60,7 @@
(sql/format {:quoted true}) (sql/format {:quoted true})
t2/reducible-query)) t2/reducible-query))
(defn hybrid-multi (defn- hybrid-multi
"Perform multiple legacy searches to see if its faster. Perverse!" "Perform multiple legacy searches to see if its faster. Perverse!"
[search-term & {:as search-ctx}] [search-term & {:as search-ctx}]
(when-not @#'search.index/initialized? (when-not @#'search.index/initialized?
...@@ -88,17 +88,22 @@ ...@@ -88,17 +88,22 @@
(map :legacy_input) (map :legacy_input)
(map #(json/parse-string % keyword)))) (map #(json/parse-string % keyword))))
(defn search-minimal (def ^:private default-engine hybrid-multi)
"Perform a basic search that only uses the index"
[search-ctx] (defn- search-fn [search-engine]
(minimal (:search-string search-ctx) (case search-engine
(dissoc search-ctx :search-string))) :hybrid hybrid
:hubrid-multi hybrid-multi
:minimal minimal
:fulltext default-engine
default-engine))
(defn search (defn search
"Return a reducible-query corresponding to searching the entities via a tsvector." "Return a reducible-query corresponding to searching the entities via a tsvector."
[search-ctx] [search-ctx]
(hybrid-multi (:search-string search-ctx) (let [f (search-fn (:search-engine search-ctx))]
(dissoc search-ctx :search-string))) (f (:search-string search-ctx)
(dissoc search-ctx :search-string))))
(defn init! (defn init!
"Ensure that the search index exists, and has been populated with all the entities." "Ensure that the search index exists, and has been populated with all the entities."
......
...@@ -90,7 +90,7 @@ ...@@ -90,7 +90,7 @@
:collection_id :collection_id
:database_id :database_id
:display_data :display_data
:legacy_data :legacy_input
:table_id :table_id
:archived]) :archived])
(update :display_data json/generate-string) (update :display_data json/generate-string)
...@@ -175,10 +175,12 @@ ...@@ -175,10 +175,12 @@
[search-term] [search-term]
{:select [:model_id :model] {:select [:model_id :model]
:from [active-table] :from [active-table]
:where [:raw :where (if-not search-term
"search_vector @@ to_tsquery('" [:= [:inline 1] [:inline 1]]
tsv-language "', " [:raw
[:lift (to-tsquery-expr search-term)] ")"]}) "search_vector @@ to_tsquery('"
tsv-language "', "
[:lift (to-tsquery-expr search-term)] ")"])})
(defn search (defn search
"Use the index table to search for records." "Use the index table to search for records."
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
[toucan2.realize :as t2.realize])) [toucan2.realize :as t2.realize]))
(def ^:private hybrid (def ^:private hybrid
(comp t2.realize/realize search.postgres/hybrid)) (comp t2.realize/realize #'search.postgres/hybrid))
(def ^:private hybrid-multi #'search.postgres/hybrid-multi) (def ^:private hybrid-multi #'search.postgres/hybrid-multi)
...@@ -18,14 +18,19 @@ ...@@ -18,14 +18,19 @@
#_{:clj-kondo/ignore [:metabase/test-helpers-use-non-thread-safe-functions]} #_{:clj-kondo/ignore [:metabase/test-helpers-use-non-thread-safe-functions]}
(defmacro with-setup [& body] (defmacro with-setup [& body]
`(when (is-postgres?) `(when (is-postgres?)
;; TODO add more extensive data to search
(mt/dataset ~'test-data (mt/dataset ~'test-data
(search.postgres/init! true) (search.postgres/init! true)
~@body))) ~@body)))
(def ^:private example-terms
"Search queries which should give consistent, non-trivial results across engines, for the test data."
[#_nil "data" "dash" "peop" "venue" "rasta"])
(deftest hybrid-test (deftest hybrid-test
(with-setup (with-setup
(testing "consistent results between all searches for certain queries\n" (testing "consistent results between all searches for certain queries\n"
(doseq [term ["satisfaction" "e-commerce" "example" "rasta" "new" "revenue" "collection"]] (doseq [term example-terms]
(testing (str "consistent results, but not ordering\n" term) (testing (str "consistent results, but not ordering\n" term)
(is (= (set (legacy-results term)) (is (= (set (legacy-results term))
(set (hybrid term))))))))) (set (hybrid term)))))))))
...@@ -47,19 +52,19 @@ ...@@ -47,19 +52,19 @@
(deftest hybrid-multi-test (deftest hybrid-multi-test
(with-setup (with-setup
(testing "consistent results between both hybrid implementations" (testing "consistent results between both hybrid implementations\n"
(doseq [term ["satisfaction" "e-commerce" "example" "rasta" "new" "revenue" "collection"]] (doseq [term example-terms]
(testing term (testing term
(is (= (hybrid term) (is (= (hybrid term)
(hybrid-multi term)))))))) (hybrid-multi term))))))))
(defn- remove-time [m] (defn- remove-time [m]
(dissoc m :create_at)) (dissoc m :created_at :updated_at :last_edited_at))
(deftest minimal-test (deftest minimal-test
(with-setup (with-setup
(testing "consistent results between both hybrid implementations" (testing "consistent results between both hybrid implementations\n"
(doseq [term ["satisfaction" "e-commerce" "example" "new" "revenue"]] (doseq [term example-terms]
(testing term (testing term
;; Timestamps are not strings after round trip, but this doesn't matter ;; Timestamps are not strings after round trip, but this doesn't matter
(is (= (map remove-time (hybrid term)) (is (= (map remove-time (hybrid term))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment