Skip to content
Snippets Groups Projects
Unverified Commit 56396355 authored by Chris Truter's avatar Chris Truter Committed by GitHub
Browse files

Speed up minimal Postgres fulltext search (#48053)

parent 35d4611c
Branches
Tags
No related merge requests found
(ns dev.search
(:require
[metabase.api.common :as api]
[metabase.search :as search]
[metabase.search.postgres.core :as search.postgres]
[metabase.search.postgres.index :as search.index]
[metabase.search.postgres.index-test :refer [legacy-results]]
[metabase.server.middleware.offset-paging :as mw.offset-paging]
[metabase.test :as mt]
[toucan2.core :as t2]))
(defn- basic-view [xs]
......@@ -60,3 +64,46 @@
;; oh! this monstrocity is actually 2x faster than baseline B-)
(mini-bench 100 :hybrid-multi "sample")
(mini-bench 100 :minimal "sample"))
(defn- test-search [search-string & [search-engine]]
(let [user-id (mt/user->id :crowberto)
user-perms #{"/"}]
(binding [api/*current-user* (atom (t2/select-one :model/User user-id))
api/*current-user-id* user-id
api/*is-superuser?* true
api/*current-user-permissions-set* (atom user-perms)]
(search/search
(search/search-context
{:archived nil
:created-at nil
:created-by #{}
:current-user-id 379
:is-superuser? true
:current-user-perms user-perms
:filter-items-in-personal-collection nil
:last-edited-at nil
:last-edited-by #{}
:limit mw.offset-paging/*limit*
:model-ancestors? nil
:models search/all-models
:offset mw.offset-paging/*offset*
:search-engine search-engine
:search-native-query nil
:search-string search-string
:table-db-id nil
:verified nil
:ids nil})))))
(comment
(require '[clj-async-profiler.core :as prof])
(prof/serve-ui 8080)
(prof/profile
(count
(dotimes [_ 100]
(test-search "trivia"))))
(prof/profile
(count
(dotimes [_ 1000]
(test-search "trivia" "minimal")))))
......@@ -121,7 +121,7 @@
(-> (scoring/top-results
results
1
(map #(scoring/score-and-result % {:search-string search-string})))
(keep #(scoring/score-and-result % {:search-string search-string})))
first
:name))))))
......
......@@ -8,6 +8,7 @@
[metabase.search.config :as search.config]
[metabase.search.impl :as search.impl]
[metabase.search.postgres.core :as search.postgres]
[metabase.search.scoring :as scoring]
[metabase.util.log :as log]
[metabase.util.malli :as mu]
[potemkin :as p]))
......@@ -34,10 +35,33 @@
(case search-engine
:fulltext (when (is-postgres?) search.postgres/search)
:minimal (when (is-postgres?) search.postgres/search)
:in-place search.impl/in-place)
:in-place search.impl/in-place
nil)
(log/warnf "%s search not supported for your AppDb, using %s" search-engine default-engine)
default-engine))
(recur default-engine)))
(defn- model-set-fn [search-engine]
(or
(case search-engine
:fulltext (when (is-postgres?) search.postgres/model-set)
:minimal (when (is-postgres?) search.postgres/model-set)
:in-place search.impl/query-model-set
nil)
(log/warnf "%s search not supported for your AppDb, using %s" search-engine default-engine)
(recur default-engine)))
(defn- score-fn [search-engine]
(or
(case search-engine
:fulltext (when (is-postgres?) search.postgres/no-scoring)
:minimal (when (is-postgres?) search.postgres/no-scoring)
:in-place scoring/score-and-result
nil)
(log/warnf "%s search not supported for your AppDb, using %s" search-engine default-engine)
(recur default-engine)))
(defn supports-index?
"Does this instance support a search index, e.g. has the right kind of AppDb"
......@@ -59,5 +83,12 @@
(mu/defn search
"Builds a search query that includes all the searchable entities and runs it"
[search-ctx :- search.config/SearchContext]
(let [query-fn (query-fn (:search-engine search-ctx :in-place))]
(search.impl/search query-fn search-ctx)))
(let [engine (:search-engine search-ctx :in-place)
query-fn (query-fn engine)
score-fn (score-fn engine)
models-fn (model-set-fn engine)]
(search.impl/search
query-fn
models-fn
score-fn
search-ctx)))
......@@ -676,7 +676,7 @@
(->> (update result :pk_ref json/parse-string)
(add-can-write search-ctx)))
(defn- search-results [search-ctx total-results]
(defn- search-results [search-ctx model-set-fn total-results]
(let [add-perms-for-col (fn [item]
(cond-> item
(mi/instance-of? :model/Collection item)
......@@ -684,7 +684,7 @@
;; We get to do this slicing and dicing with the result data because
;; the pagination of search is for UI improvement, not for performance.
;; We intend for the cardinality of the search results to be below the default max before this slicing occurs
{:available_models (query-model-set search-ctx)
{:available_models (model-set-fn search-ctx)
:data (cond->> total-results
(some? (:offset-int search-ctx)) (drop (:offset-int search-ctx))
(some? (:limit-int search-ctx)) (take (:limit-int search-ctx))
......@@ -699,8 +699,11 @@
(mu/defn search
"Builds a search query that includes all the searchable entities, and runs it."
([search-ctx :- search.config/SearchContext]
(search in-place search-ctx))
([results-fn search-ctx :- search.config/SearchContext]
(search in-place query-model-set scoring/score-and-result search-ctx))
([results-fn
model-set-fn
score-fn
search-ctx :- search.config/SearchContext]
(let [reducible-results (results-fn search-ctx)
scoring-ctx (select-keys search-ctx [:search-string :search-native-query])
xf (comp
......@@ -708,12 +711,10 @@
(map normalize-result)
(filter (partial check-permissions-for-model search-ctx))
(map (partial normalize-result-more search-ctx))
;; scoring - note that this can also filter further!
(map #(scoring/score-and-result % scoring-ctx))
(filter #(pos? (:score %))))
(keep #(score-fn % scoring-ctx)))
total-results (cond->> (scoring/top-results reducible-results search.config/max-filtered-results xf)
true hydrate-user-metadata
(:model-ancestors? search-ctx) (add-dataset-collection-hierarchy)
true (add-collection-effective-location)
true (map serialize))]
(search-results search-ctx total-results))))
(search-results search-ctx model-set-fn total-results))))
......@@ -103,14 +103,41 @@
(update :updated_at parse-datetime)
(update :last_edited_at parse-datetime)))))
;; filters:
;; - the obvious ones in the ui
;; - db-id
;; - personal collection (include / exclude), including sub
(defn- minimal-with-perms
"Search via index, and return potentially stale information, without applying filters,
but applying permissions. Does not perform ranking."
[search-term & {:as search-ctx}]
(when-not @#'search.index/initialized?
(throw (ex-info "Search index is not initialized. Use [[init!]] to ensure it exists."
{:search-engine :postgres})))
(->> (search.impl/add-collection-join-and-where-clauses
(assoc (search.index/search-query search-term)
:select [:legacy_input])
;; we just need this to not be "collection"
"__search_index__"
search-ctx)
(t2/query)
(map :legacy_input)
(map #(json/parse-string % keyword))
(map #(-> %
(update :created_at parse-datetime)
(update :updated_at parse-datetime)
(update :last_edited_at parse-datetime)))))
(def ^:private default-engine hybrid-multi)
(defn- search-fn [search-engine]
(case search-engine
:hybrid hybrid
:hubrid-multi hybrid-multi
:minimal minimal
:fulltext default-engine
:hybrid hybrid
:hubrid-multi hybrid-multi
:minimal minimal
:minimal-with-perms minimal-with-perms
:fulltext default-engine
default-engine))
(defn search
......@@ -120,6 +147,26 @@
(f (:search-string search-ctx)
(dissoc search-ctx :search-string))))
(defn model-set
"Return a set of the models which have at least one result for the given query.
TODO: consider filters and permissions."
[search-ctx]
(set
(filter
;; TODO use a single query
(fn [m]
(t2/exists? :search_index
(-> (search.index/search-query (:search-string search-ctx))
(sql.helpers/where [:= :model m]))))
;; TODO use only the models that apply to the given filters
(:models search-ctx search.config/all-models))))
(defn no-scoring
"Do no scoring, whatsover"
[result _scoring-ctx]
{:score 1
:result (assoc result :all-scores [] :relevant-scores [])})
(defn init!
"Ensure that the search index exists, and has been populated with all the entities."
[& [force-reset?]]
......
......@@ -355,22 +355,19 @@
(defn score-and-result
"Returns a map with the normalized, combined score from relevant-scores as `:score` and `:result`."
[result {:keys [search-string search-native-query]}]
(let [text-matches (-> (text-scores-with-match result {:search-string search-string
:search-native-query search-native-query})
(force-weight text-scores-weight))
all-scores (into (vec (score-result result)) text-matches)
relevant-scores (remove #(= 0 (:score %)) all-scores)
total-score (compute-normalized-score all-scores)]
(let [text-matches (-> (text-scores-with-match result {:search-string search-string
:search-native-query search-native-query})
(force-weight text-scores-weight))
has-text-match? (some (comp pos? :score) text-matches)
all-scores (into (vec (score-result result)) text-matches)
relevant-scores (remove (comp zero? :score) all-scores)
total-score (compute-normalized-score all-scores)]
;; Searches with a blank search string mean "show me everything, ranked";
;; see https://github.com/metabase/metabase/pull/15604 for archived search.
;; If the search string is non-blank, results with no text match have a score of zero.
(if (or (str/blank? search-string)
(pos? (reduce (fn [acc {:keys [score] :or {score 0}}] (+ acc score))
0
text-matches)))
{:score total-score
:result (assoc result :all-scores all-scores :relevant-scores relevant-scores)}
{:score 0})))
(when (or has-text-match? (str/blank? search-string))
{:score total-score
:result (assoc result :all-scores all-scores :relevant-scores relevant-scores)})))
(defn compare-score
"Compare maps of scores and results. Must return -1, 0, or 1. The score is assumed to be a vector, and will be
......
......@@ -2,7 +2,7 @@
(:require
[clojure.string :as str]
[clojure.test :refer [deftest is testing]]
[metabase.search :refer [is-postgres?]]
[metabase.search :as search :refer [is-postgres?]]
[metabase.search.postgres.core :as search.postgres]
[metabase.search.postgres.index-test :refer [legacy-results]]
[metabase.test :as mt]
......@@ -11,10 +11,6 @@
(def ^:private hybrid
(comp t2.realize/realize #'search.postgres/hybrid))
(def ^:private hybrid-multi #'search.postgres/hybrid-multi)
(def ^:private minimal #'search.postgres/minimal)
#_{:clj-kondo/ignore [:metabase/test-helpers-use-non-thread-safe-functions]}
(defmacro with-setup [& body]
`(when (is-postgres?)
......@@ -56,12 +52,28 @@
(doseq [term example-terms]
(testing term
(is (= (hybrid term)
(hybrid-multi term))))))))
(#'search.postgres/hybrid-multi term))))))))
(deftest minimal-test
(with-setup
(testing "consistent results between both hybrid implementations\n"
(testing "consistent results with minimal implementations\n"
(doseq [term example-terms]
(testing term
(is (= (hybrid term)
(minimal term))))))))
(#'search.postgres/minimal term))))))))
(deftest minimal-with-perms-test
(with-setup
(testing "consistent results with minimal implementations\n"
(doseq [term (take 1 example-terms)]
(testing term
(is (= (hybrid term)
(#'search.postgres/minimal-with-perms
term
{:current-user-id (mt/user->id :crowberto)
:is-superuser? true
:archived? false
:current-user-perms #{"/"}
:model-ancestors? false
:models search/all-models
:search-string term}))))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment