Skip to content
Snippets Groups Projects
Unverified Commit f7b482c6 authored by Chris Truter's avatar Chris Truter Committed by GitHub
Browse files

Minimal search using only index (#47918)

parent 43a7ce06
No related branches found
No related tags found
No related merge requests found
......@@ -33,6 +33,10 @@
search.postgres/search
(do (log/warn ":fulltext search not supported for your AppDb, using :in-place")
search.impl/in-place))
:minimal (if (is-postgres?)
search.postgres/search-minimal
(do (log/warn ":minimal search not supported for your AppDb, using :in-place")
search.impl/in-place))
:in-place search.impl/in-place))
(defn supports-index?
......
......@@ -23,7 +23,8 @@
(def search-engines
"Supported search engines."
#{:in-place
:fulltext})
:fulltext
:minimal})
(def ^:dynamic *db-max-results*
"Number of raw results to fetch from the database. This number is in place to prevent massive application DB load by
......
......@@ -531,11 +531,6 @@
:display_name
:effective_parent))))
(defn- add-can-write [row]
(if (some #(mi/instance-of? % row) [:model/Dashboard :model/Card])
(assoc row :can_write (can-write? row))
row))
(defn- bit->boolean
"Coerce a bit returned by some MySQL/MariaDB versions in some situations to Boolean."
[v]
......@@ -548,6 +543,7 @@
(defn- allowed-engine? [engine]
(case engine
:in-place true
:minimal (public-settings/experimental-fulltext-search-enabled)
:fulltext (public-settings/experimental-fulltext-search-enabled)))
(defn- parse-engine [value]
......@@ -647,59 +643,78 @@
(mdb.query/format-sql (first (mdb.query/compile search-query))))
(t2/reducible-query search-query)))
(defn- to-toucan-instance [row]
(let [model (-> row :model search.config/model-to-db-model :db-model)]
(t2.instance/instance model row)))
(defn- map-collection [collection]
(cond-> collection
(:archived_directly collection)
(assoc :location (collection/trash-path))
:always
(assoc :type (:collection_type collection))
:always
collection/maybe-localize-trash-name))
(defn- normalize-result [result]
(let [instance (to-toucan-instance (t2.realize/realize result))]
(-> instance
;; MySQL returns booleans as `1` or `0` so convert those to boolean as needed
(update :bookmark bit->boolean)
(update :archived bit->boolean)
(update :archived_directly bit->boolean)
;; Collections require some transformation before being scored and returned by search.
(cond-> (t2/instance-of? :model/Collection instance) map-collection))))
(defn- add-can-write [row]
(if (some #(mi/instance-of? % row) [:model/Dashboard :model/Card])
(assoc row :can_write (can-write? row))
row))
(defn- normalize-result-more
"Additional normalization that is done after we've filtered by permissions, as its more expensive."
[result]
(-> (update result :pk_ref json/parse-string)
add-can-write))
(defn- search-results [search-ctx total-results]
(let [add-perms-for-col (fn [item]
(cond-> item
(mi/instance-of? :model/Collection item)
(assoc :can_write (can-write? item))))]
;; We get to do this slicing and dicing with the result data because
;; the pagination of search is for UI improvement, not for performance.
;; We intend for the cardinality of the search results to be below the default max before this slicing occurs
{:available_models (query-model-set search-ctx)
:data (cond->> total-results
(some? (:offset-int search-ctx)) (drop (:offset-int search-ctx))
(some? (:limit-int search-ctx)) (take (:limit-int search-ctx))
true (map add-perms-for-col))
:limit (:limit-int search-ctx)
:models (:models search-ctx)
:offset (:offset-int search-ctx)
:table_db_id (:table-db-id search-ctx)
:engine (:search-engine search-ctx)
:total (count total-results)}))
(mu/defn search
"Builds a search query that includes all the searchable entities and runs it"
"Builds a search query that includes all the searchable entities, and runs it."
([search-ctx :- search.config/SearchContext]
(search in-place search-ctx))
([results-fn search-ctx :- search.config/SearchContext]
(let [to-toucan-instance (fn [row]
(let [model (-> row :model search.config/model-to-db-model :db-model)]
(t2.instance/instance model row)))
reducible-results (results-fn search-ctx)
xf (comp
(take search.config/*db-max-results*)
(map t2.realize/realize)
(map to-toucan-instance)
(map #(if (and (t2/instance-of? :model/Collection %)
(:archived_directly %))
(assoc % :location (collection/trash-path))
%))
(map #(cond-> %
(t2/instance-of? :model/Collection %) (assoc :type (:collection_type %))))
(map #(cond-> % (t2/instance-of? :model/Collection %) collection/maybe-localize-trash-name))
;; MySQL returns booleans as `1` or `0` so convert those to boolean as needed
(map #(update % :bookmark bit->boolean))
(map #(update % :archived bit->boolean))
(map #(update % :archived_directly bit->boolean))
(filter (partial check-permissions-for-model search-ctx))
(map #(update % :pk_ref json/parse-string))
(map add-can-write)
(map #(scoring/score-and-result % (select-keys search-ctx [:search-string :search-native-query])))
(filter #(pos? (:score %))))
total-results (cond->> (scoring/top-results reducible-results search.config/max-filtered-results xf)
true hydrate-user-metadata
(:model-ancestors? search-ctx) (add-dataset-collection-hierarchy)
true (add-collection-effective-location)
true (map serialize))
add-perms-for-col (fn [item]
(cond-> item
(mi/instance-of? :model/Collection item)
(assoc :can_write (can-write? item))))]
;; We get to do this slicing and dicing with the result data because
;; the pagination of search is for UI improvement, not for performance.
;; We intend for the cardinality of the search results to be below the default max before this slicing occurs
{:available_models (query-model-set search-ctx)
:data (cond->> total-results
(some? (:offset-int search-ctx)) (drop (:offset-int search-ctx))
(some? (:limit-int search-ctx)) (take (:limit-int search-ctx))
true (map add-perms-for-col))
:limit (:limit-int search-ctx)
:models (:models search-ctx)
:offset (:offset-int search-ctx)
:table_db_id (:table-db-id search-ctx)
:total (count total-results)})))
(let [reducible-results (results-fn search-ctx)
scoring-ctx (select-keys search-ctx [:search-string :search-native-query])
xf (comp
(take search.config/*db-max-results*)
(map normalize-result)
(filter (partial check-permissions-for-model search-ctx))
(map normalize-result-more)
;; scoring - note that this can also filter further!
(map #(scoring/score-and-result % scoring-ctx))
(filter #(pos? (:score %))))
total-results (cond->> (scoring/top-results reducible-results search.config/max-filtered-results xf)
true hydrate-user-metadata
(:model-ancestors? search-ctx) (add-dataset-collection-hierarchy)
true (add-collection-effective-location)
true (map serialize))]
(search-results search-ctx total-results))))
(ns metabase.search.postgres.core
(:require
[cheshire.core :as json]
[honey.sql :as sql]
[honey.sql.helpers :as sql.helpers]
[metabase.api.common :as api]
......@@ -78,6 +79,21 @@
(t2/query <>)
(filter (comp (set ids) :id) <>)))))))
(defn- minimal [search-term & {:as _search-ctx}]
(when-not @#'search.index/initialized?
(throw (ex-info "Search index is not initialized. Use [[init!]] to ensure it exists."
{:search-engine :postgres})))
(->> (assoc (search.index/search-query search-term) :select [:legacy_input])
(t2/query)
(map :legacy_input)
(map #(json/parse-string % keyword))))
(defn search-minimal
"Perform a basic search that only uses the index"
[search-ctx]
(minimal (:search-string search-ctx)
(dissoc search-ctx :search-string)))
(defn search
"Return a reducible-query corresponding to searching the entities via a tsvector."
[search-ctx]
......@@ -99,4 +115,5 @@
(search.index/activate-pending!))
(comment
(init! true))
(init! true)
(t2/select-fn-vec :legacy_input :search_index))
(ns metabase.search.postgres.index
(:require
[cheshire.core :as json]
[clojure.string :as str]
[honey.sql.helpers :as sql.helpers]
[metabase.util :as u]
......@@ -44,6 +45,9 @@
[:model [:varchar 254] :not-null] ;; TODO find the right size
;; search
[:search_vector :tsvector :not-null]
;; results
[:display_data :text]
[:legacy_input :text]
;; scoring related
[:model_rank :int :not-null]
;; permission related entities
......@@ -85,8 +89,12 @@
:model_rank
:collection_id
:database_id
:display_data
:legacy_data
:table_id
:archived])
(update :display_data json/generate-string)
(update :legacy_input json/generate-string)
(assoc
:model_id (:id entity)
:search_vector [:to_tsvector
......
......@@ -26,6 +26,9 @@
(map m)
(str/join " ")))
(defn- display-data [m]
(select-keys m [:name :display_name :description]))
(defn- ->entry [m]
(-> m
(select-keys
......@@ -37,6 +40,8 @@
:table_id])
(update :archived boolean)
(assoc
:display_data (display-data m)
:legacy_input m
:searchable_text (searchable-text m)
:model_rank (model-rank (:model m)))))
......
......@@ -11,8 +11,9 @@
(def ^:private hybrid
(comp t2.realize/realize search.postgres/hybrid))
(def ^:private hybrid-multi
#'search.postgres/hybrid-multi)
(def ^:private hybrid-multi #'search.postgres/hybrid-multi)
(def ^:private minimal #'search.postgres/minimal)
#_{:clj-kondo/ignore [:metabase/test-helpers-use-non-thread-safe-functions]}
(defmacro with-setup [& body]
......@@ -51,3 +52,15 @@
(testing term
(is (= (hybrid term)
(hybrid-multi term))))))))
(defn- remove-time [m]
(dissoc m :create_at))
(deftest minimal-test
(with-setup
(testing "consistent results between both hybrid implementations"
(doseq [term ["satisfaction" "e-commerce" "example" "new" "revenue"]]
(testing term
;; Timestamps are not strings after round trip, but this doesn't matter
(is (= (map remove-time (hybrid term))
(map remove-time (minimal term)))))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment