diff --git a/.clj-kondo/config.edn b/.clj-kondo/config.edn index 03878fdabda1eba91190c6216e22d38ee094afbf..644c7c5e3350380aebe5bbfaf4ed07cffffc8e9c 100644 --- a/.clj-kondo/config.edn +++ b/.clj-kondo/config.edn @@ -341,7 +341,8 @@ metabase.query-analysis.failure-map} metabase.related #{metabase.related} metabase.sample-data #{metabase.sample-data} - metabase.search #{metabase.search} + metabase.search #{metabase.search + metabase.search.spec} metabase.server #{metabase.server metabase.server.handler metabase.server.middleware.auth @@ -665,8 +666,9 @@ metabase.related related metabase.search.config search.config metabase.search.filter search.filter + metabase.search.in-place.filter search.in-place.filter metabase.search.util search.util - metabase.search.scoring scoring + metabase.search.in-place.scoring scoring metabase.server.middleware.auth mw.auth metabase.server.middleware.browser-cookie mw.browser-cookie metabase.server.middleware.exceptions mw.exceptions diff --git a/dev/src/dev/search.clj b/dev/src/dev/search.clj index 0b628cf6182f6549b8b918706d5941f5ee35c14e..f203b2df206f6be52371165cd0f8ff7657bd20d1 100644 --- a/dev/src/dev/search.clj +++ b/dev/src/dev/search.clj @@ -39,9 +39,7 @@ :search.engine/index-only search.index/search :search.engine/legacy legacy-results :search.engine/hybrid @#'search.postgres/hybrid - :search.engine/hybrid-multi @#'search.postgres/hybrid-multi - :search.engine/minimal @#'search.postgres/minimal - :search.engine/minimal-wth-perms @#'search.postgres/minimal-with-perms)] + :search.engine/fulltext @#'search.postgres/fulltext)] (time (dotimes [_ n] (doall (apply f search-term args)))))) @@ -51,7 +49,7 @@ ;; 30x speed-up for test-data on my machine (mini-bench 500 :index-only "sample") ;; No noticeable degradation, without permissions and filters - (mini-bench 500 :minimal "sample") + (mini-bench 500 :fulltext "sample") ;; but joining to the "hydrated query" reverses the advantage (mini-bench 100 :legacy nil) @@ -60,8 +58,6 @@ (mini-bench 100 :hybrid "sample") ;; using index + LIKE on the join ... still a little bit more overhead (mini-bench 100 :hybrid "sample" {:search-string "sample"}) - ;; oh! this monstrosity is actually 2x faster than baseline B-) - (mini-bench 100 :hybrid-multi "sample") (mini-bench 100 :minimal "sample")) (defn- test-search [user search-string & [search-engine]] diff --git a/enterprise/backend/src/metabase_enterprise/search/scoring.clj b/enterprise/backend/src/metabase_enterprise/search/scoring.clj index 72cd5f62eb13fd0ef00b85f0409694be4866a800..8181e6d339d79f13fa99224174cd4c61bf953723 100644 --- a/enterprise/backend/src/metabase_enterprise/search/scoring.clj +++ b/enterprise/backend/src/metabase_enterprise/search/scoring.clj @@ -2,7 +2,7 @@ ;; TODO -- move to `metabase-enterprise.<feature>.*` (:require [metabase.public-settings.premium-features :as premium-features :refer [defenterprise]] - [metabase.search.scoring :as scoring])) + [metabase.search.in-place.scoring :as scoring])) (defn- official-collection-score "A scorer for items in official collections" diff --git a/enterprise/backend/test/metabase_enterprise/search/scoring_test.clj b/enterprise/backend/test/metabase_enterprise/search/scoring_test.clj index 90c4c0d0c4e0051b432950f21e34f8e81b310297..f56a57767c9df38526223a5cbf1483e9d5e0f117 100644 --- a/enterprise/backend/test/metabase_enterprise/search/scoring_test.clj +++ b/enterprise/backend/test/metabase_enterprise/search/scoring_test.clj @@ -7,7 +7,7 @@ [clojure.test :refer :all] [java-time.api :as t] [metabase-enterprise.search.scoring :as ee-scoring] - [metabase.search.scoring :as scoring] + [metabase.search.in-place.scoring :as scoring] [metabase.test :as mt])) (deftest ^:parallel verified-score-test diff --git a/src/metabase/api/common.clj b/src/metabase/api/common.clj index c83a63bb72e8e3e6135c3ee5ec9938f35ad1f60b..68564e3f93d9294a21b5b6c4278b769db4249d26 100644 --- a/src/metabase/api/common.clj +++ b/src/metabase/api/common.clj @@ -734,6 +734,8 @@ (map #(dissoc % ::model))))) (def model->db-model + ;; NOTE search is decoupling itself from this mapping, favoring a self-contained spec in search.spec/define-spec + ;; Once search.legacy is gone, this dependency should be gone as well. "A mapping from the name of a model used in the API to information about it. This is reused in search, and entity_id translation." {"action" {:db-model :model/Action :alias :action} diff --git a/src/metabase/models/action.clj b/src/metabase/models/action.clj index 6651755f872712d5b9a19cc77ca2c3eab2cbb708..e6cd3e99fe2da9a28c4c081075437a6e6a849f8a 100644 --- a/src/metabase/models/action.clj +++ b/src/metabase/models/action.clj @@ -6,6 +6,7 @@ [metabase.models.interface :as mi] [metabase.models.query :as query] [metabase.models.serialization :as serdes] + [metabase.search :as search] [metabase.util :as u] [metabase.util.i18n :refer [tru]] [methodical.core :as methodical] @@ -396,3 +397,25 @@ (defmethod serdes/storage-path "Action" [action _ctx] (let [{:keys [id label]} (-> action serdes/path last)] ["actions" (serdes/storage-leaf-file-name id label)])) + +;;;; ------------------------------------------------- Search ---------------------------------------------------------- + +(search/define-spec "action" + {:model :model/Action + :attrs {:archived true + :collection-id :model.collection_id + :creator-id true + :database-id :query_action.database_id + :native-query :query_action.dataset_query + ;; workaround for actions not having revisions (yet) + :last-edited-at :updated_at + :table-id false + :created-at true + :updated-at true} + :search-terms [:name :description] + :render-terms {:model-id :model.id + :model-name :model.name} + :where [:= :collection.namespace nil] + :joins {:model [:model/Card [:= :model.id :this.model_id]] + :query_action [:model/QueryAction [:= :query_action.action_id :this.id]] + :collection [:model/Collection [:= :collection.id :model.collection_id]]}}) diff --git a/src/metabase/models/card.clj b/src/metabase/models/card.clj index a4e00be52ab1f31cd20a816b4cf760e410cddab1..8d82ac9ef0836c8d9d5a88f776e0f222e0a3a24d 100644 --- a/src/metabase/models/card.clj +++ b/src/metabase/models/card.clj @@ -6,6 +6,7 @@ [clojure.set :as set] [clojure.string :as str] [clojure.walk :as walk] + [honey.sql.helpers :as sql.helpers] [medley.core :as m] [metabase.api.common :as api] [metabase.audit :as audit] @@ -38,6 +39,7 @@ [metabase.public-settings.premium-features :as premium-features :refer [defenterprise]] [metabase.query-analysis :as query-analysis] [metabase.query-processor.util :as qp.util] + [metabase.search :as search] [metabase.util :as u] [metabase.util.embed :refer [maybe-populate-initially-published-at]] [metabase.util.honey-sql-2 :as h2x] @@ -1004,3 +1006,60 @@ (merge (select-keys card [:name :description :database_id :table_id]) ;; Use `model` instead of `dataset` to mirror product terminology {:model? (= (keyword card-type) :model)})) + +;;;; ------------------------------------------------- Search ---------------------------------------------------------- + +(def ^:private base-search-spec + {:model :model/Card + :attrs {:archived true + :collection-id :collection_id + :creator-id true + :database-id false + :native-query [:case [:= "native" :query_type] :dataset_query] + :dashboardcard-count {:select [:%count.*] + :from [:report_dashboardcard] + :where [:= :report_dashboardcard.card_id :this.id]} + :table-id false + :last-edited-at :r.timestamp + :last-editor-id :r.user_id + :pinned [:> [:coalesce :collection_position [:inline 0]] [:inline 0]] + :verified [:= "verified" :mr.status] + :created-at true + :updated-at true} + :search-terms [:name :description] + :render-terms {:archived-directly true + :collection-authority_level :collection.authority_level + :collection-location :collection.location + :collection-name :collection.name + ;; This is used for legacy ranking, in future it will be replaced by :pinned + :collection-position true + :collection-type :collection.type + ;; This field can become stale, unless we change to calculate it just-in-time. + :display true + :moderated-status :mr.status} + :bookmark [:model/CardBookmark [:and + [:= :bookmark.card_id :this.id] + [:= :bookmark.user_id :current_user/id]]] + :where [:= :collection.namespace nil] + :joins {:collection [:model/Collection [:= :collection.id :this.collection_id]] + :r [:model/Revision [:and + [:= :r.model_id :this.id] + ;; Interesting for inversion, another condition on whether to update. + ;; For now, let's just swallow the extra update (2x amplification) + [:= :r.most_recent true] + [:= :r.model "Card"]]] + :mr [:model/ModerationReview [:and + [:= :mr.moderated_item_type "card"] + [:= :mr.moderated_item_id :this.id] + [:= :mr.most_recent true]]] + ;; workaround for dataflow :(((((( + :dashcard [:model/DashboardCard [:= :dashcard.card_id :this.id]]}}) + +(search/define-spec "card" + (-> base-search-spec (sql.helpers/where [:= :this.type "question"]))) + +(search/define-spec "dataset" + (-> base-search-spec (sql.helpers/where [:= :this.type "model"]))) + +(search/define-spec "metric" + (-> base-search-spec (sql.helpers/where [:= :this.type "metric"]))) diff --git a/src/metabase/models/collection.clj b/src/metabase/models/collection.clj index 64ab387e187bd94746f2d29e159b6bf5494af77b..36cae7881c4ee7cb9779e7a9b7c5af169b7e3f4b 100644 --- a/src/metabase/models/collection.clj +++ b/src/metabase/models/collection.clj @@ -21,6 +21,8 @@ [metabase.models.serialization :as serdes] [metabase.permissions.util :as perms.u] [metabase.public-settings.premium-features :as premium-features] + ;; Trying to use metabase.search would cause a circular reference ;_; + [metabase.search.spec :as search.spec] [metabase.util :as u] [metabase.util.honey-sql-2 :as h2x] [metabase.util.i18n :refer [trs tru]] @@ -1730,3 +1732,31 @@ (collection.root/is-root-collection? item))) (:archived item) (mi/can-write? item))))))) + +;;;; ------------------------------------------------- Search ---------------------------------------------------------- + +(search.spec/define-spec "collection" + {:model :model/Collection + :attrs {:collection-id :id + :creator-id false + :database-id false + :table-id false + :archived true + :created-at true + ;; intentionally not tracked + :updated-at false} + :search-terms [:name] + :render-terms {:archived-directly true + ;; Why not make this a search term? I suspect it was just overlooked before. + :description true + :collection_authority_level :authority_level + :collection_name :name + :collection_type :type + :location true} + :where [:= :namespace nil] + ;; depends on the current user, used for rendering and ranking + ;; TODO not sure this is what it'll look like + :bookmark [:model/CollectionBookmark [:and + [:= :bookmark.collection_id :this.id] + ;; a magical alias, or perhaps this clause can be implicit + [:= :bookmark.user_id :current_user/id]]]}) diff --git a/src/metabase/models/dashboard.clj b/src/metabase/models/dashboard.clj index 0a0229b8aaa544d514f8085882ac3989a7327829..368e34d6a320c431992d61166ab851f88950a7ed 100644 --- a/src/metabase/models/dashboard.clj +++ b/src/metabase/models/dashboard.clj @@ -26,6 +26,7 @@ [metabase.moderation :as moderation] [metabase.public-settings :as public-settings] [metabase.query-processor.metadata :as qp.metadata] + [metabase.search :as search] [metabase.util :as u] [metabase.util.embed :refer [maybe-populate-initially-published-at]] [metabase.util.honey-sql-2 :as h2x] @@ -660,3 +661,37 @@ (assoc :card_id card_id)))))) {})) + +;;;; ------------------------------------------------- Search ---------------------------------------------------------- + +(search/define-spec "dashboard" + {:model :model/Dashboard + :attrs {:archived true + :collection-id true + :creator-id true + :database-id false + :last-editor-id :r.user_id + :last-edited-at :r.timestamp + :pinned [:> [:coalesce :collection_position [:inline 0]] [:inline 0]] + :table-id false + :created-at true + :updated-at true} + :search-terms [:name :description] + :render-terms {:collection-name :collection.name + :collection-type :collection.type + :collection-authority_level :collection.authority_level + :archived-directly true + ;; This is used for legacy ranking, in future it will be replaced by :pinned + :collection-position true} + :where [] + :bookmark [:model/DashboardBookmark [:and + [:= :bookmark.dashboard_id :this.id] + ;; a magical alias, or perhaps this clause can be implicit + [:= :bookmark.user_id :current_user/id]]] + :joins {:collection [:model/Collection [:= :collection.id :this.collection_id]] + :r [:model/Revision [:and + [:= :r.model_id :this.id] + ;; Interesting for inversion, another condition on whether to update. + ;; For now, let's just swallow the extra update (2x amplification) + [:= :r.most_recent true] + [:= :r.model "Dashboard"]]]}}) diff --git a/src/metabase/models/database.clj b/src/metabase/models/database.clj index 4977da64aae4c23a4d0262e7b1686f0e0fa64444..81bbe7e005955a5b36518d0b5df9480a87666aa6 100644 --- a/src/metabase/models/database.clj +++ b/src/metabase/models/database.clj @@ -20,6 +20,8 @@ [metabase.public-settings.premium-features :as premium-features :refer [defenterprise]] + ;; Trying to use metabase.search would cause a circular reference ;_; + [metabase.search.spec :as search.spec] [metabase.sync.schedules :as sync.schedules] [metabase.util :as u] [metabase.util.honey-sql-2 :as h2x] @@ -497,3 +499,18 @@ (fn [table-id] {:pre [(integer? table-id)]} (t2/select-one-fn :db_id :model/Table, :id table-id)))) + +;;;; ------------------------------------------------- Search ---------------------------------------------------------- + +(search.spec/define-spec "database" + {:model :model/Database + :attrs {:archived false + :collection-id false + :creator-id false + ;; not sure if this is another bug + :database-id false + :table-id false + :created-at true + :updated-at true} + :search-terms [:name :description] + :render-terms {:initial-sync-status true}}) diff --git a/src/metabase/models/model_index.clj b/src/metabase/models/model_index.clj index 63df9817d7f0c4723ea1e18fbcf0ff83cbe26292..5b03ed901789658984db90ce5a1d0b0fb2551731 100644 --- a/src/metabase/models/model_index.clj +++ b/src/metabase/models/model_index.clj @@ -9,6 +9,7 @@ [metabase.models.card :refer [Card]] [metabase.models.interface :as mi] [metabase.query-processor :as qp] + [metabase.search :as search] [metabase.sync.schedules :as sync.schedules] [metabase.util.cron :as u.cron] [metabase.util.log :as log] @@ -179,3 +180,27 @@ :schedule (default-schedule) :state "initial" :creator_id creator-id}]))) + +;;;; ------------------------------------------------- Search ---------------------------------------------------------- + +(search/define-spec "indexed-entity" + {:model :model/ModelIndexValue + :attrs {:id :model_pk + :collection-id :collection.id + :creator-id false + :database-id :model.database_id + :table-id false + ;; this seems wrong, I'd expect it to track whether the model is archived. + :archived false + :created-at false + :updated-at false} + :search-terms [:name] + :render-terms {:collection-name :collection.name + :collection-type :collection.type + :model-id :model.id + :model-name :model.name + :pk-ref :model_index.pk_ref + :model-index-id :model_index.id} + :joins {:model_index [:model/ModelIndex [:= :model_index.id :this.model_index_id]] + :model [:model/Card [:= :model.id :model_index.model_id]] + :collection [:model/Collection [:= :collection.id :model.collection_id]]}}) diff --git a/src/metabase/models/segment.clj b/src/metabase/models/segment.clj index ab3355d9c59eb3659607437e404267c353c140a1..760c2515b899db38d791e5c01f7a348fb2229bb0 100644 --- a/src/metabase/models/segment.clj +++ b/src/metabase/models/segment.clj @@ -19,6 +19,7 @@ [metabase.models.interface :as mi] [metabase.models.revision :as revision] [metabase.models.serialization :as serdes] + [metabase.search :as search] [metabase.util :as u] [metabase.util.i18n :refer [tru]] [metabase.util.log :as log] @@ -185,3 +186,21 @@ (select-keys metric [:name :description :revision_message]) :table_id table-id :database_id db-id))) + +;;;; ------------------------------------------------- Search ---------------------------------------------------------- + +(search/define-spec "segment" + {:model :model/Segment + :attrs {:collection-id :id + :creator-id false + :database-id :table.db_id + :table-id :table_id + :archived true + ;; Matching legacy behavior, where this cannot be filtered on. + ;:created-at true + :updated-at true} + :search-terms [:name :description] + :render-terms {:table_description :table.description + :table_name :table.name + :table_schema :table.schema} + :joins {:table [:model/Table [:= :table.id :this.table_id]]}}) diff --git a/src/metabase/models/table.clj b/src/metabase/models/table.clj index d05d06e558c934685fb59c1d41ce714b2c177ec0..cd0209783f83222c6ccf33f696eaf56395069c88 100644 --- a/src/metabase/models/table.clj +++ b/src/metabase/models/table.clj @@ -11,6 +11,7 @@ [metabase.models.permissions-group :as perms-group] [metabase.models.serialization :as serdes] [metabase.public-settings.premium-features :refer [defenterprise]] + [metabase.search :as search] [metabase.util :as u] [methodical.core :as methodical] [toucan2.core :as t2])) @@ -296,3 +297,28 @@ (defmethod audit-log/model-details Table [table _event-type] (select-keys table [:id :name :db_id])) + +;;;; ------------------------------------------------- Search ---------------------------------------------------------- + +(search/define-spec "table" + {:model :model/Table + :attrs {:collection-id false + :creator-id false + :database-id :db_id + :table-id :id + ;; legacy search uses :active for this, but then has a rule to only ever show active tables + ;; so we moved that to the where clause + :archived false + :created-at true + :updated-at true} + :search-terms [:name :description :display_name] + :render-terms {:initial-sync-status true + :table-description :description + :table-name :name + :table-schema :schema + :database-name :db.name} + :where [:and + :active + [:= :visibility_type nil] + [:not= :db_id [:inline audit/audit-db-id]]] + :joins {:db [:model/Database [:= :db.id :this.db_id]]}}) diff --git a/src/metabase/search.clj b/src/metabase/search.clj index 5fca5ec963211fc33185128e540a76b61b339c5d..cd81b823b0ee3aa54d83a84d05d804e5fd1015ba 100644 --- a/src/metabase/search.clj +++ b/src/metabase/search.clj @@ -7,6 +7,7 @@ [metabase.search.fulltext :as search.fulltext] [metabase.search.impl :as search.impl] [metabase.search.postgres.core :as search.postgres] + [metabase.search.spec :as search.spec] [potemkin :as p])) (set! *warn-on-reflection* true) @@ -14,7 +15,8 @@ (comment search.api/keep-me search.config/keep-me - search.impl/keep-me) + search.impl/keep-me + search.spec/keep-me) (p/import-vars [search.config @@ -25,7 +27,9 @@ [search.impl search ;; We could avoid exposing this by wrapping `query-model-set` and `search` with it. - search-context]) + search-context] + [search.spec + define-spec]) ;; TODO The following need to be cleaned up to use multimethods. diff --git a/src/metabase/search/config.clj b/src/metabase/search/config.clj index 72bfff03bae584a3eff0426265e4ccc66455b388..a9ca61f33a1e54af0d7496b563999d7bbea1b007 100644 --- a/src/metabase/search/config.clj +++ b/src/metabase/search/config.clj @@ -1,8 +1,6 @@ (ns metabase.search.config (:require [cheshire.core :as json] - [clojure.string :as str] - [flatland.ordered.map :as ordered-map] [metabase.api.common :as api] [metabase.models.setting :refer [defsetting]] [metabase.permissions.util :as perms.u] @@ -39,12 +37,13 @@ (def ^:const dashboard-count-ceiling "Results in more dashboards than this are all considered to be equally popular." - 50) + 10) (def ^:const surrounding-match-context "Show this many words of context before/after matches in long search results" 2) +;; We won't need this once fully migrated to specs, but kept for now in case legacy cod falls out of sync (def excluded-models "Set of models that should not be included in search results." #{"dashboard-card" @@ -58,6 +57,9 @@ "timeline" "user"}) +;; TODO we could almost replace this using the spec, but there are two blockers +;; - We do not cover index-entity yet +;; - We also need to provide an alias (and this must match the API one for legacy) (def model-to-db-model "Mapping from string model to the Toucan model backing it." (apply dissoc api/model->db-model excluded-models)) @@ -73,13 +75,17 @@ (assert (= all-models (set models-search-order)) "The models search order has to include all models") -(defn search-model->revision-model - "Return the apporpriate revision model given a search model." - [model] - (case model - "dataset" (recur "card") - "metric" (recur "card") - (str/capitalize model))) +(def weights + "Strength of the various scorers. Copied from metabase.search.in-place.scoring, but allowing divergence." + {:pinned 2 ;; simple field + :bookmarked 2 ;; join with multi-table entity + :recency 1.5 ;; date formula + :dashboard 1 ;; simple field + :model 0.5 ;; simple field + :official-collection 2 ;; a field we can calculate + :verified 2 ;; a simple field + :text 10 ;; strength of text-scores-weight previously + }) (defn model->alias "Given a model string returns the model alias" @@ -131,234 +137,6 @@ [:verified {:optional true} true?] [:ids {:optional true} [:set {:min 1} ms/PositiveInt]]]) -(def all-search-columns - "All columns that will appear in the search results, and the types of those columns. The generated search query is a - `UNION ALL` of the queries for each different entity; it looks something like: - - SELECT 'card' AS model, id, cast(NULL AS integer) AS table_id, ... - FROM report_card - UNION ALL - SELECT 'metric' as model, id, table_id, ... - FROM metric - - Columns that aren't used in any individual query are replaced with `SELECT cast(NULL AS <type>)` statements. (These - are cast to the appropriate type because Postgres will assume `SELECT NULL` is `TEXT` by default and will refuse to - `UNION` two columns of two different types.)" - (ordered-map/ordered-map - ;; returned for all models. Important to be first for changing model for dataset - :model :text - :id :integer - :name :text - :display_name :text - :description :text - :archived :boolean - ;; returned for Card, Dashboard, and Collection - :collection_id :integer - :collection_name :text - :collection_type :text - :collection_location :text - :collection_authority_level :text - :archived_directly :boolean - ;; returned for Card and Dashboard - :collection_position :integer - :creator_id :integer - :created_at :timestamp - :bookmark :boolean - ;; returned for everything except Collection - :updated_at :timestamp - ;; returned only for Collection - :location :text - ;; returned for Card only, used for scoring and displays - :dashboardcard_count :integer - :last_edited_at :timestamp - :last_editor_id :integer - :moderated_status :text - :display :text - ;; returned for Metric and Segment - :table_id :integer - :table_schema :text - :table_name :text - :table_description :text - ;; returned for Metric, Segment, and Action - :database_id :integer - ;; returned for Database and Table - :initial_sync_status :text - :database_name :text - ;; returned for Action - :model_id :integer - :model_name :text - ;; returned for indexed-entity - :pk_ref :text - :model_index_id :integer - ;; returned for Card and Action - :dataset_query :text)) - -(def ^:const displayed-columns - "All of the result components that by default are displayed by the frontend." - #{:name :display_name :collection_name :description}) - -(defmulti searchable-columns - "The columns that can be searched for each model." - {:arglists '([model search-native-query])} - (fn [model _] model)) - -(defmethod searchable-columns :default - [_ _] - [:name]) - -(defmethod searchable-columns "action" - [_ search-native-query] - (cond-> [:name - :description] - search-native-query - (conj :dataset_query))) - -(defmethod searchable-columns "card" - [_ search-native-query] - (cond-> [:name - :description] - search-native-query - (conj :dataset_query))) - -(defmethod searchable-columns "dataset" - [_ search-native-query] - (searchable-columns "card" search-native-query)) - -(defmethod searchable-columns "metric" - [_ search-native-query] - (searchable-columns "card" search-native-query)) - -(defmethod searchable-columns "dashboard" - [_ _] - [:name - :description]) - -(defmethod searchable-columns "page" - [_ search-native-query] - (searchable-columns "dashboard" search-native-query)) - -(defmethod searchable-columns "database" - [_ _] - [:name - :description]) - -(defmethod searchable-columns "table" - [_ _] - [:name - :display_name - :description]) - -(defmethod searchable-columns "indexed-entity" - [_ _] - [:name]) - -(def ^:private default-columns - "Columns returned for all models." - [:id :name :description :archived :created_at :updated_at]) - -(def ^:private bookmark-col - "Case statement to return boolean values of `:bookmark` for Card, Collection and Dashboard." - [[:case [:not= :bookmark.id nil] true :else false] :bookmark]) - -(def ^:private dashboardcard-count-col - "Subselect to get the count of associated DashboardCards" - [{:select [:%count.*] - :from [:report_dashboardcard] - :where [:= :report_dashboardcard.card_id :card.id]} - :dashboardcard_count]) - -(def ^:private table-columns - "Columns containing information about the Table this model references. Returned for Metrics and Segments." - [:table_id - :created_at - [:table.db_id :database_id] - [:table.schema :table_schema] - [:table.name :table_name] - [:table.description :table_description]]) - -(defmulti columns-for-model - "The columns that will be returned by the query for `model`, excluding `:model`, which is added automatically. - This is not guaranteed to be the final list of columns, new columns can be added by calling [[api.search/replace-select]]" - {:arglists '([model])} - (fn [model] model)) - -(defmethod columns-for-model "action" - [_] - (conj default-columns :model_id - :creator_id - [:model.collection_id :collection_id] - [:model.id :model_id] - [:model.name :model_name] - [:query_action.database_id :database_id] - [:query_action.dataset_query :dataset_query])) - -(defmethod columns-for-model "card" - [_] - (conj default-columns :collection_id :archived_directly :collection_position :dataset_query :display :creator_id - [:collection.name :collection_name] - [:collection.type :collection_type] - [:collection.location :collection_location] - [:collection.authority_level :collection_authority_level] - bookmark-col dashboardcard-count-col)) - -(defmethod columns-for-model "indexed-entity" [_] - [[:model-index-value.name :name] - [:model-index-value.model_pk :id] - [:model-index.pk_ref :pk_ref] - [:model-index.id :model_index_id] - [:collection.name :collection_name] - [:collection.type :collection_type] - [:model.collection_id :collection_id] - [:model.id :model_id] - [:model.name :model_name] - [:model.database_id :database_id]]) - -(defmethod columns-for-model "dashboard" - [_] - (conj default-columns :archived_directly :collection_id :collection_position :creator_id bookmark-col - [:collection.name :collection_name] - [:collection.type :collection_type] - [:collection.authority_level :collection_authority_level])) - -(defmethod columns-for-model "database" - [_] - [:id :name :description :created_at :updated_at :initial_sync_status]) - -(defmethod columns-for-model "collection" - [_] - (conj (remove #{:updated_at} default-columns) - [:collection.id :collection_id] - [:name :collection_name] - [:type :collection_type] - [:authority_level :collection_authority_level] - :archived_directly - :location - bookmark-col)) - -(defmethod columns-for-model "segment" - [_] - (concat default-columns table-columns [:creator_id])) - -(defmethod columns-for-model "metric" - [_] - (concat default-columns table-columns [:creator_id])) - -(defmethod columns-for-model "table" - [_] - [[:table.id :id] - [:table.name :name] - [:table.created_at :created_at] - [:table.display_name :display_name] - [:table.description :description] - [:table.updated_at :updated_at] - [:table.initial_sync_status :initial_sync_status] - [:table.id :table_id] - [:table.db_id :database_id] - [:table.schema :table_schema] - [:table.name :table_name] - [:table.description :table_description] - [:metabase_database.name :database_name]]) - (defmulti column->string "Turn a complex column into a string" (fn [_column-value model column-name] diff --git a/src/metabase/search/filter.clj b/src/metabase/search/filter.clj index 05e8022c79d544f47836f9b54ba3de89479310d8..dc2762f2ad16cc06d903409b1c95ac11babfe6bb 100644 --- a/src/metabase/search/filter.clj +++ b/src/metabase/search/filter.clj @@ -1,162 +1,61 @@ -(ns metabase.search.filter - "Namespace that defines the filters that are applied to the search results. - - There are required filters and optional filters. - Archived is an required filters and is always applied, the reason because by default we want to hide archived/inactive entities. - - But there are OPTIONAL FILTERS like :created-by, :created-at, when these filters are provided, the results will return only - results of models that have these filters. - - The multi method for optional filters should have the default implementation to throw for unsupported models, and then each model - that supports the filter should define its own method for the filter." +(ns ^:mb/once metabase.search.filter (:require - [clojure.set :as set] - [clojure.string :as str] [honey.sql.helpers :as sql.helpers] - [metabase.audit :as audit] [metabase.driver.common.parameters.dates :as params.dates] [metabase.public-settings.premium-features :as premium-features] - [metabase.search.config - :as search.config - :refer [SearchableModel SearchContext]] - [metabase.search.util :as search.util] + [metabase.search.in-place.filter :as search.in-place.filter] + [metabase.search.spec :as search.spec] + [metabase.util :as u] [metabase.util.date-2 :as u.date] - [metabase.util.i18n :refer [tru]] - [metabase.util.malli :as mu]) + [metabase.util.i18n :refer [tru]]) (:import (java.time LocalDate))) -(def ^:private true-clause [:inline [:= 1 1]]) -(def ^:private false-clause [:inline [:= 0 1]]) - -;; ------------------------------------------------------------------------------------------------;; -;; Required Filters ; -;; ------------------------------------------------------------------------------------------------;; - -(defmulti ^:private archived-clause - "Clause to filter by the archived status of the entity." - {:arglists '([model archived?])} - (fn [model _] model)) - -(defmethod archived-clause :default - [model archived?] - [:= (search.config/column-with-model-alias model :archived) archived?]) - -;; Databases can't be archived -(defmethod archived-clause "database" - [_model archived?] - (if archived? - false-clause - true-clause)) - -(defmethod archived-clause "indexed-entity" - [_model archived?] - (if-not archived? - true-clause - false-clause)) - -;; Table has an `:active` flag, but no `:archived` flag; never return inactive Tables -(defmethod archived-clause "table" - [model archived?] - (if archived? - false-clause ; No tables should appear in archive searches - [:and - [:= (search.config/column-with-model-alias model :active) true] - [:= (search.config/column-with-model-alias model :visibility_type) nil]])) - -(defn- sandboxed-or-impersonated-user? [] - ;; TODO FIXME -- search actually currently still requires [[metabase.api.common/*current-user*]] to be bound, - ;; because [[metabase.public-settings.premium-features/sandboxed-or-impersonated-user?]] requires it to be bound. - ;; Since it's part of the search context it would be nice if we could run search without having to bind that stuff at - ;; all. - (assert @@(requiring-resolve 'metabase.api.common/*current-user*) - "metabase.api.common/*current-user* must be bound in order to use search for an indexed entity") - (premium-features/sandboxed-or-impersonated-user?)) - -(mu/defn- search-string-clause-for-model - [model :- SearchableModel - search-context :- SearchContext - search-native-query :- [:maybe true?]] - (when-let [query (:search-string search-context)] - (into - [:or] - (for [column (->> (search.config/searchable-columns model search-native-query) - (map #(search.config/column-with-model-alias model %))) - wildcarded-token (->> (search.util/normalize query) - search.util/tokenize - (map search.util/wildcard-match))] - (cond - (and (= model "indexed-entity") (sandboxed-or-impersonated-user?)) - [:= 0 1] - - (and (#{"card" "dataset"} model) (= column (search.config/column-with-model-alias model :dataset_query))) - [:and - [:= (search.config/column-with-model-alias model :query_type) "native"] - [:like [:lower column] wildcarded-token]] - - (and (#{"action"} model) - (= column (search.config/column-with-model-alias model :dataset_query))) - [:like [:lower :query_action.dataset_query] wildcarded-token] - - :else - [:like [:lower column] wildcarded-token]))))) - -;; ------------------------------------------------------------------------------------------------;; -;; Optional filters ;; -;; ------------------------------------------------------------------------------------------------;; - -(defmulti ^:private build-optional-filter-query - "Build the query to filter by `filter`. - Dispatch with an array of [filter model-name]." - {:arglists '([model filter query filter-value])} - (fn [filter model _query _filter-value] - [filter model])) - -(defmethod build-optional-filter-query :default - [filter model _query _filter-value] - (throw (ex-info (format "%s filter for %s is not supported" filter model) {:filter filter :model model}))) - -;; Created by filters -(defn- default-created-by-filter-clause - [model creator-ids] - (if (= 1 (count creator-ids)) - [:= (search.config/column-with-model-alias model :creator_id) (first creator-ids)] - [:in (search.config/column-with-model-alias model :creator_id) creator-ids])) - -(doseq [model ["card" "dataset" "metric" "dashboard" "action"]] - (defmethod build-optional-filter-query [:created-by model] - [_filter model query creator-ids] - (sql.helpers/where query (default-created-by-filter-clause model creator-ids)))) - -(doseq [model ["card" "dataset" "metric" "dashboard" "action"]] - (defmethod build-optional-filter-query [:id model] - [_filter model query ids] - (sql.helpers/where query [:in (search.config/column-with-model-alias model :id) ids]))) - -;; Verified filters - -(defmethod build-optional-filter-query [:verified "card"] - [_filter model query verified] - (assert (true? verified) "filter for non-verified cards is not supported") - (if (premium-features/has-feature? :content-verification) - (-> query - (sql.helpers/join :moderation_review - [:= :moderation_review.moderated_item_id - (search.config/column-with-model-alias model :id)]) - (sql.helpers/where [:= :moderation_review.status "verified"] - [:= :moderation_review.moderated_item_type "card"] - [:= :moderation_review.most_recent true])) - (sql.helpers/where query false-clause))) - -(defmethod build-optional-filter-query [:verified "dataset"] - [filter _model query verified] - (build-optional-filter-query filter "card" query verified)) - -(defmethod build-optional-filter-query [:verified "metric"] - [filter _model query verified] - (build-optional-filter-query filter "card" query verified)) +(def ^:private filter->type + {:archived? ::single-value + :created-at ::date-range + :created-by ::list + :last-edited-at ::date-range + :last-edited-by ::list + :table-db-id ::single-value + :verified ::verified}) + +(def ^:private context-key->attr + {:archived? :archived + :created-at :created-at + :created-by :creator-id + :last-edited-at :last-edited-at + :last-edited-by :last-editor-id + :search-native-query :native-query + ;; this actually has nothing to do with tables anymore, as we also filter cards. + :table-db-id :database-id + :verified :verified}) + +;; TODO dry this alias up with the index hydration code +(def ^:private field-alias {:created-at :model-created-at}) + +(def ^:private attr->index-key + (into {} (for [k (vals context-key->attr)] + [k (keyword (str "search_index." (u/->snake_case_en (name (get field-alias k k)))))]))) + +(defn- remove-if-falsey [m k] + (if (m k) m (dissoc m k))) + +(defn search-context->applicable-models + "Returns a set of models that are applicable given the search context. -;; Created at filters + If the context has optional filters, the models will be restricted for the set of supported models only." + [search-ctx] + (if (= :search.engine/in-place (:search-engine search-ctx)) + (search.in-place.filter/search-context->applicable-models search-ctx) + ;; Archived is an eccentric one - we treat it as false for models that don't map it + (let [required (->> (remove-if-falsey search-ctx :archived?) keys (keep context-key->attr))] + (into #{} + (remove nil?) + (for [search-model (:models search-ctx) + :let [spec (search.spec/spec search-model)]] + (when (every? (:attrs spec) required) + (:name spec))))))) (defn- date-range-filter-clause [dt-col dt-val] @@ -182,161 +81,44 @@ :else [:and [:>= dt-col start] [:< dt-col end]]))) -(doseq [model ["collection" "database" "table" "dashboard" "card" "dataset" "metric" "action"]] - (defmethod build-optional-filter-query [:created-at model] - [_filter model query created-at] - (sql.helpers/where query (date-range-filter-clause - (search.config/column-with-model-alias model :created_at) - created-at)))) - -;; Last edited by filter - -(defn- joined-with-table? - "Check if the query have a join with `table`. - Note: this does a very shallow check by only checking if the join-clause is the same. - Using the same table with a different alias will return false. - - (-> (sql.helpers/select :*) - (sql.helpers/from [:a]) - (sql.helpers/join :b [:= :a.id :b.id]) - (joined-with-table? :join :b)) - - ;; => true" - [query join-type table] - (->> (get query join-type) (partition 2) (map first) (some #(= % table)) boolean)) - -(defn search-model->revision-model - "Return the apporpriate revision model given a search model." - [model] - (case model - "dataset" (recur "card") - (str/capitalize model))) - -(doseq [model ["dashboard" "card" "dataset" "metric"]] - (defmethod build-optional-filter-query [:last-edited-by model] - [_filter model query editor-ids] - (cond-> query - ;; both last-edited-by and last-edited-at join with revision, so we should be careful not to join twice - (not (joined-with-table? query :join :revision)) - (-> (sql.helpers/join :revision [:= :revision.model_id (search.config/column-with-model-alias model :id)]) - (sql.helpers/where [:= :revision.most_recent true] - [:= :revision.model (search.config/search-model->revision-model model)])) - (= 1 (count editor-ids)) - (sql.helpers/where [:= :revision.user_id (first editor-ids)]) - - (> (count editor-ids) 1) - (sql.helpers/where [:in :revision.user_id editor-ids])))) - -(doseq [model ["dashboard" "card" "dataset" "metric"]] - (defmethod build-optional-filter-query [:last-edited-at model] - [_filter model query last-edited-at] - (cond-> query - ;; both last-edited-by and last-edited-at join with revision, so we should be careful not to join twice - (not (joined-with-table? query :join :revision)) - (-> (sql.helpers/join :revision [:= :revision.model_id (search.config/column-with-model-alias model :id)]) - (sql.helpers/where [:= :revision.most_recent true] - [:= :revision.model (search.config/search-model->revision-model model)])) - true - ;; on UI we showed the the last edit info from revision.timestamp - ;; not the model.updated_at column - ;; to be consistent we use revision.timestamp to do the filtering - (sql.helpers/where (date-range-filter-clause :revision.timestamp last-edited-at))))) - -;; TODO: once we record revision for actions, we should update this to use the same approach with dashboard/card -(defmethod build-optional-filter-query [:last-edited-at "action"] - [_filter model query last-edited-at] - (sql.helpers/where query (date-range-filter-clause - (search.config/column-with-model-alias model :updated_at) - last-edited-at))) - -(defn- feature->supported-models - "Return A map of filter to its support models. - - E.g: {:created-by #{\"card\" \"dataset\" \"dashboard\" \"action\"}} - - This is function instead of a def so that optional-filter-clause can be defined anywhere in the codebase." - [] - (merge - ;; models support search-native-query if there are additional columns to search when the `search-native-query` - ;; argument is true - {:search-native-query (->> (dissoc (methods search.config/searchable-columns) :default) - (filter (fn [[model f]] - (seq (set/difference (set (f model true)) (set (f model false)))))) - (map first) - set)} - (->> (dissoc (methods build-optional-filter-query) :default) - keys - (reduce (fn [acc [filter model]] - (update acc filter set/union #{model})) - {})))) - -;; ------------------------------------------------------------------------------------------------;; -;; Public functions ;; -;; ------------------------------------------------------------------------------------------------;; - -(mu/defn search-context->applicable-models :- [:set SearchableModel] - "Returns a set of models that are applicable given the search context. - - If the context has optional filters, the models will be restricted for the set of supported models only." - [search-context :- SearchContext] - (let [{:keys [created-at - created-by - last-edited-at - last-edited-by - models - search-native-query - verified]} search-context - feature->supported-models (feature->supported-models)] - (cond-> models - (some? created-at) (set/intersection (:created-at feature->supported-models)) - (some? created-by) (set/intersection (:created-by feature->supported-models)) - (some? last-edited-at) (set/intersection (:last-edited-at feature->supported-models)) - (some? last-edited-by) (set/intersection (:last-edited-by feature->supported-models)) - (true? search-native-query) (set/intersection (:search-native-query feature->supported-models)) - (true? verified) (set/intersection (:verified feature->supported-models))))) - -(mu/defn build-filters :- :map - "Build the search filters for a model." - [honeysql-query :- :map - model :- SearchableModel - search-context :- SearchContext] - (let [{:keys [models - archived? - created-at - created-by - last-edited-at - last-edited-by - search-string - search-native-query - verified - ids]} search-context] - (cond-> honeysql-query - (not (str/blank? search-string)) - (sql.helpers/where (search-string-clause-for-model model search-context search-native-query)) - - (some? archived?) - (sql.helpers/where (archived-clause model archived?)) - - ;; build optional filters - (some? created-at) - (#(build-optional-filter-query :created-at model % created-at)) - - (some? created-by) - (#(build-optional-filter-query :created-by model % created-by)) - - (some? last-edited-at) - (#(build-optional-filter-query :last-edited-at model % last-edited-at)) - - (some? last-edited-by) - (#(build-optional-filter-query :last-edited-by model % last-edited-by)) - - (some? verified) - (#(build-optional-filter-query :verified model % verified)) - - (and (some? ids) - (contains? models model)) - (#(build-optional-filter-query :id model % ids)) - - (= "table" model) - (sql.helpers/where - [:not [:= (search.config/column-with-model-alias "table" :db_id) audit/audit-db-id]])))) +(defmulti ^:private where-clause* (fn [context-key _column _v] context-key)) + +(defmethod where-clause* ::single-value [_ k v] [:= k v]) + +(defmethod where-clause* ::date-range [_ k v] (date-range-filter-clause k v)) + +(defmethod where-clause* ::list [_ k v] [:in k v]) + +(defmethod where-clause* ::verified [_ k v] + (assert (true? v) "filter for non-verified cards is not supported") + (when (premium-features/has-feature? :content-verification) + [:= k v])) + +(def ^:private true-clause + [:inline [:= 1 1]]) + +(assert (= (disj (set (keys context-key->attr)) :search-native-query) + (set (keys filter->type))) + "All filters have been implemented.") + +(defn with-filters + "Return a HoneySQL clause corresponding to all the optional search filters." + [search-context qry] + (as-> qry qry + (sql.helpers/where qry (when (seq (:models search-context)) + [:in :model (:models search-context)])) + (sql.helpers/where qry (when-let [ids (:ids search-context)] + [:and + [:in :model_id ids] + ;; NOTE: we limit id-based search to only a subset of the models + ;; TODO this should just become part of the spec e.g. :search-by-id? + [:in :model ["card" "dataset" "metric" "dashboard" "action"]]])) + (reduce (fn [qry [ctx-key attr-key]] + (let [v (get search-context ctx-key)] + (if (some? v) + (sql.helpers/where qry (or (where-clause* (filter->type ctx-key) + (attr->index-key attr-key) v) + true-clause)) + qry))) + qry + (dissoc context-key->attr :search-native-query)))) diff --git a/src/metabase/search/fulltext.clj b/src/metabase/search/fulltext.clj index a37a4de13cb0e5fbb0be2f900751a53b751b82ba..00675903e209228570ad96a6a6db8c48227f3fa7 100644 --- a/src/metabase/search/fulltext.clj +++ b/src/metabase/search/fulltext.clj @@ -4,11 +4,9 @@ [metabase.search.api :as search.api] [metabase.search.postgres.core :as search.postgres])) -;; We have a bunch of experimental flavors! 🧄🌶🊠-(derive :search.engine/hybrid :search.engine/fulltext) -(derive :search.engine/hybrid-multi :search.engine/fulltext) -(derive :search.engine/minimal :search.engine/fulltext) -(derive :search.engine/minimal-with-perms :search.engine/fulltext) +;; We have a bunch of experimental flavors! (well, not really anymore) +(doseq [flavor [:search.engine/hybrid]] + (derive flavor :search.engine/fulltext)) (defmulti supported-db? "Does the app db support fulltext search?" identity) @@ -17,7 +15,7 @@ (defmethod supported-db? :postgres [_] (public-settings/experimental-fulltext-search-enabled)) -;; For now we now that the app db is postgres. We can make these multimethods when that changes. +;; For now, we know that the app db is postgres. We can extract multimethods if this changes. (defmethod search.api/results :search.engine/fulltext [search-ctx] diff --git a/src/metabase/search/impl.clj b/src/metabase/search/impl.clj index 47836bb89e0236014bdbbe9520e90b02b808d5f0..fababae5877117abd56545ac8749a9f348b33fce 100644 --- a/src/metabase/search/impl.clj +++ b/src/metabase/search/impl.clj @@ -18,7 +18,7 @@ :refer [SearchableModel SearchContext]] [metabase.search.filter :as search.filter] [metabase.search.fulltext :as search.fulltext] - [metabase.search.scoring :as scoring] + [metabase.search.in-place.scoring :as scoring] [metabase.util.i18n :refer [tru deferred-tru]] [metabase.util.log :as log] [metabase.util.malli :as mu] @@ -141,6 +141,10 @@ (assoc search-result :effective_location nil))) search-results))) +(def ^:private ^:const displayed-columns + "All the result components that by default are displayed by the frontend." + #{:name :display_name :collection_name :description}) + ;;; TODO OMG mix of kebab-case and snake_case here going to make me throw up, we should use all kebab-case in Clojure ;;; land and then convert the stuff that actually gets sent over the wire in the REST API to snake_case in the API ;;; endpoint itself, not in the search impl. @@ -149,8 +153,8 @@ [{:as result :keys [all-scores relevant-scores name display_name collection_id collection_name collection_authority_level collection_type collection_effective_ancestors effective_parent archived_directly model]}] - (let [matching-columns (into #{} (remove nil? (map :column relevant-scores))) - match-context-thunk (first (keep :match-context-thunk relevant-scores)) + (let [matching-columns (into #{} (keep :column relevant-scores)) + match-context-thunk (some :match-context-thunk relevant-scores) remove-thunks (partial mapv #(dissoc % :match-context-thunk))] (-> result (assoc @@ -159,7 +163,7 @@ name) :context (when (and match-context-thunk (empty? - (remove matching-columns search.config/displayed-columns))) + (remove matching-columns displayed-columns))) (match-context-thunk)) :collection (if (and archived_directly (not= "collection" model)) (select-keys (collection/trash-collection) @@ -224,14 +228,20 @@ ;; This forwarding is here for tests, we should clean those up. +(defn- apply-default-engine [{:keys [search-engine] :as search-ctx}] + (when (= default-engine search-engine) + (throw (ex-info "Missing implementation for default search-engine" {:search-engine search-engine}))) + (log/debugf "Missing implementation for %s so instead using %s" search-engine default-engine) + (assoc search-ctx :search-engine default-engine)) + (defmethod search.api/results :default [search-ctx] - (search.api/results (assoc search-ctx :search-engine default-engine))) + (search.api/results (apply-default-engine search-ctx))) (defmethod search.api/model-set :default [search-ctx] - (search.api/model-set (assoc search-ctx :search-engine default-engine))) + (search.api/model-set (apply-default-engine search-ctx))) (defmethod search.api/score :default [results search-ctx] - (search.api/score results (assoc search-ctx :search-engine default-engine))) + (search.api/score results (apply-default-engine search-ctx))) (mr/def ::search-context.input [:map {:closed true} @@ -371,7 +381,7 @@ "Builds a search query that includes all the searchable entities, and runs it." [search-ctx :- search.config/SearchContext] (let [reducible-results (search.api/results search-ctx) - scoring-ctx (select-keys search-ctx [:search-string :search-native-query]) + scoring-ctx (select-keys search-ctx [:search-engine :search-string :search-native-query]) xf (comp (take search.config/*db-max-results*) (map normalize-result) diff --git a/src/metabase/search/in_place/filter.clj b/src/metabase/search/in_place/filter.clj new file mode 100644 index 0000000000000000000000000000000000000000..8f4619845c52be68a113c46716bab06d650bbe53 --- /dev/null +++ b/src/metabase/search/in_place/filter.clj @@ -0,0 +1,332 @@ +(ns metabase.search.in-place.filter + "Namespace that defines the filters that are applied to the search results. + + There are required filters and optional filters. + Archived is an required filters and is always applied, the reason because by default we want to hide archived/inactive entities. + + But there are OPTIONAL FILTERS like :created-by, :created-at, when these filters are provided, the results will return only + results of models that have these filters. + + The multi method for optional filters should have the default implementation to throw for unsupported models, and then each model + that supports the filter should define its own method for the filter." + (:require + [clojure.set :as set] + [clojure.string :as str] + [honey.sql.helpers :as sql.helpers] + [metabase.audit :as audit] + [metabase.driver.common.parameters.dates :as params.dates] + [metabase.public-settings.premium-features :as premium-features] + [metabase.search.config + :as search.config + :refer [SearchableModel SearchContext]] + [metabase.search.permissions :as search.permissions] + [metabase.search.util :as search.util] + [metabase.util.date-2 :as u.date] + [metabase.util.i18n :refer [tru]] + [metabase.util.malli :as mu]) + (:import + (java.time LocalDate))) + +(def ^:private true-clause [:inline [:= 1 1]]) +(def ^:private false-clause [:inline [:= 0 1]]) + +;; ------------------------------------------------------------------------------------------------;; +;; Required Filters ; +;; ------------------------------------------------------------------------------------------------;; + +(defmulti ^:private archived-clause + "Clause to filter by the archived status of the entity." + {:arglists '([model archived?])} + (fn [model _] model)) + +(defmethod archived-clause :default + [model archived?] + [:= (search.config/column-with-model-alias model :archived) archived?]) + +;; Databases can't be archived +(defmethod archived-clause "database" + [_model archived?] + (if archived? + false-clause + true-clause)) + +(defmethod archived-clause "indexed-entity" + [_model archived?] + (if-not archived? + true-clause + false-clause)) + +;; Table has an `:active` flag, but no `:archived` flag; never return inactive Tables +(defmethod archived-clause "table" + [model archived?] + (if archived? + false-clause ; No tables should appear in archive searches + [:and + [:= (search.config/column-with-model-alias model :active) true] + [:= (search.config/column-with-model-alias model :visibility_type) nil]])) + +(mu/defn- search-string-clause-for-model + [model :- SearchableModel + search-context :- SearchContext + search-native-query :- [:maybe true?]] + (when-let [query (:search-string search-context)] + (into + [:or] + (for [column (->> (let [search-columns-fn (requiring-resolve 'metabase.search.legacy/searchable-columns)] + (search-columns-fn model search-native-query)) + (map #(search.config/column-with-model-alias model %))) + wildcarded-token (->> (search.util/normalize query) + search.util/tokenize + (map search.util/wildcard-match))] + (cond + (and (= model "indexed-entity") (search.permissions/sandboxed-or-impersonated-user?)) + [:= 0 1] + + (and (#{"card" "dataset"} model) (= column (search.config/column-with-model-alias model :dataset_query))) + [:and + [:= (search.config/column-with-model-alias model :query_type) "native"] + [:like [:lower column] wildcarded-token]] + + (and (#{"action"} model) + (= column (search.config/column-with-model-alias model :dataset_query))) + [:like [:lower :query_action.dataset_query] wildcarded-token] + + :else + [:like [:lower column] wildcarded-token]))))) + +;; ------------------------------------------------------------------------------------------------;; +;; Optional filters ;; +;; ------------------------------------------------------------------------------------------------;; + +(defmulti ^:private build-optional-filter-query + "Build the query to filter by `filter`. + Dispatch with an array of [filter model-name]." + {:arglists '([model filter query filter-value])} + (fn [filter model _query _filter-value] + [filter model])) + +(defmethod build-optional-filter-query :default + [filter model _query _filter-value] + (throw (ex-info (format "%s filter for %s is not supported" filter model) {:filter filter :model model}))) + +;; Created by filters +(defn- default-created-by-filter-clause + [model creator-ids] + (if (= 1 (count creator-ids)) + [:= (search.config/column-with-model-alias model :creator_id) (first creator-ids)] + [:in (search.config/column-with-model-alias model :creator_id) creator-ids])) + +(doseq [model ["card" "dataset" "metric" "dashboard" "action"]] + (defmethod build-optional-filter-query [:created-by model] + [_filter model query creator-ids] + (sql.helpers/where query (default-created-by-filter-clause model creator-ids)))) + +(doseq [model ["card" "dataset" "metric" "dashboard" "action"]] + (defmethod build-optional-filter-query [:id model] + [_filter model query ids] + (sql.helpers/where query [:in (search.config/column-with-model-alias model :id) ids]))) + +;; Verified filters + +(defmethod build-optional-filter-query [:verified "card"] + [_filter model query verified] + (assert (true? verified) "filter for non-verified cards is not supported") + (if (premium-features/has-feature? :content-verification) + (-> query + (sql.helpers/join :moderation_review + [:= :moderation_review.moderated_item_id + (search.config/column-with-model-alias model :id)]) + (sql.helpers/where [:= :moderation_review.status "verified"] + [:= :moderation_review.moderated_item_type "card"] + [:= :moderation_review.most_recent true])) + (sql.helpers/where query false-clause))) + +(defmethod build-optional-filter-query [:verified "dataset"] + [filter _model query verified] + (build-optional-filter-query filter "card" query verified)) + +(defmethod build-optional-filter-query [:verified "metric"] + [filter _model query verified] + (build-optional-filter-query filter "card" query verified)) + +;; Created at filters + +(defn- date-range-filter-clause + [dt-col dt-val] + (let [date-range (try + (params.dates/date-string->range dt-val {:inclusive-end? false}) + (catch Exception _e + (throw (ex-info (tru "Failed to parse datetime value: {0}" dt-val) {:status-code 400})))) + start (some-> (:start date-range) u.date/parse) + end (some-> (:end date-range) u.date/parse) + dt-col (if (some #(instance? LocalDate %) [start end]) + [:cast dt-col :date] + dt-col)] + (cond + (= start end) + [:= dt-col start] + + (nil? start) + [:< dt-col end] + + (nil? end) + [:> dt-col start] + + :else + [:and [:>= dt-col start] [:< dt-col end]]))) + +(doseq [model ["collection" "database" "table" "dashboard" "card" "dataset" "metric" "action"]] + (defmethod build-optional-filter-query [:created-at model] + [_filter model query created-at] + (sql.helpers/where query (date-range-filter-clause + (search.config/column-with-model-alias model :created_at) + created-at)))) + +;; Last edited by filter + +(defn- joined-with-table? + "Check if the query have a join with `table`. + Note: this does a very shallow check by only checking if the join-clause is the same. + Using the same table with a different alias will return false. + + (-> (sql.helpers/select :*) + (sql.helpers/from [:a]) + (sql.helpers/join :b [:= :a.id :b.id]) + (joined-with-table? :join :b)) + + ;; => true" + [query join-type table] + (->> (get query join-type) (partition 2) (map first) (some #(= % table)) boolean)) + +;; We won't need this post-legacy as it defines the joins à la carte. +(defn- search-model->revision-model [model] + ((requiring-resolve 'metabase.search.legacy/search-model->revision-model) model)) + +(doseq [model ["dashboard" "card" "dataset" "metric"]] + (defmethod build-optional-filter-query [:last-edited-by model] + [_filter model query editor-ids] + (cond-> query + ;; both last-edited-by and last-edited-at join with revision, so we should be careful not to join twice + (not (joined-with-table? query :join :revision)) + (-> (sql.helpers/join :revision [:= :revision.model_id (search.config/column-with-model-alias model :id)]) + (sql.helpers/where [:= :revision.most_recent true] + [:= :revision.model (search-model->revision-model model)])) + (= 1 (count editor-ids)) + (sql.helpers/where [:= :revision.user_id (first editor-ids)]) + + (> (count editor-ids) 1) + (sql.helpers/where [:in :revision.user_id editor-ids])))) + +(doseq [model ["dashboard" "card" "dataset" "metric"]] + (defmethod build-optional-filter-query [:last-edited-at model] + [_filter model query last-edited-at] + (cond-> query + ;; both last-edited-by and last-edited-at join with revision, so we should be careful not to join twice + (not (joined-with-table? query :join :revision)) + (-> (sql.helpers/join :revision [:= :revision.model_id (search.config/column-with-model-alias model :id)]) + (sql.helpers/where [:= :revision.most_recent true] + [:= :revision.model (search-model->revision-model model)])) + true + ;; on UI we showed the the last edit info from revision.timestamp + ;; not the model.updated_at column + ;; to be consistent we use revision.timestamp to do the filtering + (sql.helpers/where (date-range-filter-clause :revision.timestamp last-edited-at))))) + +;; TODO: once we record revision for actions, we should update this to use the same approach with dashboard/card +(defmethod build-optional-filter-query [:last-edited-at "action"] + [_filter model query last-edited-at] + (sql.helpers/where query (date-range-filter-clause + (search.config/column-with-model-alias model :updated_at) + last-edited-at))) + +(defn- feature->supported-models + "Return A map of filter to its support models. + + E.g: {:created-by #{\"card\" \"dataset\" \"dashboard\" \"action\"}} + + This is function instead of a def so that optional-filter-clause can be defined anywhere in the codebase." + [] + (merge + ;; models support search-native-query if there are additional columns to search when the `search-native-query` + ;; argument is true + {:search-native-query (->> (dissoc (methods @(requiring-resolve 'metabase.search.legacy/searchable-columns)) :default) + (filter (fn [[model f]] + (seq (set/difference (set (f model true)) (set (f model false)))))) + (map first) + set)} + (->> (dissoc (methods build-optional-filter-query) :default) + keys + (reduce (fn [acc [filter model]] + (update acc filter set/union #{model})) + {})))) + +;; ------------------------------------------------------------------------------------------------;; +;; Public functions ;; +;; ------------------------------------------------------------------------------------------------;; + +(defn search-context->applicable-models + "Returns a set of models that are applicable given the search context. + + If the context has optional filters, the models will be restricted for the set of supported models only." + [search-context] + (let [{:keys [created-at + created-by + last-edited-at + last-edited-by + models + search-native-query + verified]} search-context + feature->supported-models (feature->supported-models)] + (cond-> models + (some? created-at) (set/intersection (:created-at feature->supported-models)) + (some? created-by) (set/intersection (:created-by feature->supported-models)) + (some? last-edited-at) (set/intersection (:last-edited-at feature->supported-models)) + (some? last-edited-by) (set/intersection (:last-edited-by feature->supported-models)) + (true? search-native-query) (set/intersection (:search-native-query feature->supported-models)) + (true? verified) (set/intersection (:verified feature->supported-models))))) + +(mu/defn build-filters :- :map + "Build the search filters for a model." + [honeysql-query :- :map + model :- SearchableModel + search-context :- SearchContext] + (let [{:keys [models + archived? + created-at + created-by + last-edited-at + last-edited-by + search-string + search-native-query + verified + ids]} search-context] + (cond-> honeysql-query + (not (str/blank? search-string)) + (sql.helpers/where (search-string-clause-for-model model search-context search-native-query)) + + (some? archived?) + (sql.helpers/where (archived-clause model archived?)) + + ;; build optional filters + (some? created-at) + (#(build-optional-filter-query :created-at model % created-at)) + + (some? created-by) + (#(build-optional-filter-query :created-by model % created-by)) + + (some? last-edited-at) + (#(build-optional-filter-query :last-edited-at model % last-edited-at)) + + (some? last-edited-by) + (#(build-optional-filter-query :last-edited-by model % last-edited-by)) + + (some? verified) + (#(build-optional-filter-query :verified model % verified)) + + (and (some? ids) + (contains? models model)) + (#(build-optional-filter-query :id model % ids)) + + (= "table" model) + (sql.helpers/where + [:not [:= (search.config/column-with-model-alias "table" :db_id) audit/audit-db-id]])))) diff --git a/src/metabase/search/scoring.clj b/src/metabase/search/in_place/scoring.clj similarity index 95% rename from src/metabase/search/scoring.clj rename to src/metabase/search/in_place/scoring.clj index c4c3def411997418937246b2db53160c1bdda8fa..934c15becb1023b70953edba4f831590233b90cf 100644 --- a/src/metabase/search/scoring.clj +++ b/src/metabase/search/in_place/scoring.clj @@ -101,7 +101,7 @@ ;; ;; <hr /> -(ns metabase.search.scoring +(ns metabase.search.in-place.scoring "Computes a relevancy score for search results using the weighted average of various scorers. Scores are determined by various ways of comparing the text of the search string and the item's title or description, as well as by Metabase-specific features such as how many dashboards a card appears in or whether an item is pinned. @@ -158,15 +158,16 @@ the text match, if there is one. If there is no match, the score is 0." [search-native-query weighted-scorers query-tokens search-result] ;; TODO is pmap over search-result worth it? - (let [scores (for [column (search.config/searchable-columns (:model search-result) search-native-query) + (let [scores (for [column (let [search-columns-fn (requiring-resolve 'metabase.search.legacy/searchable-columns)] + (search-columns-fn (:model search-result) search-native-query)) {:keys [scorer name weight] :as _ws} weighted-scorers - :let [matched-text (-> search-result - (get column) - (search.config/column->string (:model search-result) column)) - match-tokens (some-> matched-text search.util/normalize search.util/tokenize) - raw-score (scorer query-tokens match-tokens)] - :when (and matched-text (pos? raw-score))] + :let [matched-text (-> search-result + (get column) + (search.config/column->string (:model search-result) column)) + match-tokens (some-> matched-text search.util/normalize search.util/tokenize) + raw-score (scorer query-tokens match-tokens)] + :when (and matched-text (pos? raw-score))] {:score raw-score :name (str "text-" name) :weight weight diff --git a/src/metabase/search/legacy.clj b/src/metabase/search/legacy.clj index 3ff5e0581be9a1c875f3aa17881f6779956fc87e..85736dca42ffdb2bd0ece9df11cce7ee64fd1e5f 100644 --- a/src/metabase/search/legacy.clj +++ b/src/metabase/search/legacy.clj @@ -1,17 +1,19 @@ (ns metabase.search.legacy (:require + [clojure.string :as str] + [flatland.ordered.map :as ordered-map] [honey.sql.helpers :as sql.helpers] [medley.core :as m] [metabase.db :as mdb] [metabase.db.query :as mdb.query] [metabase.models.collection :as collection] - [metabase.models.permissions :as perms] [metabase.search.api :as search.api] [metabase.search.config :as search.config :refer [SearchContext SearchableModel]] - [metabase.search.filter :as search.filter] - [metabase.search.scoring :as scoring] + [metabase.search.in-place.filter :as search.in-place.filter] + [metabase.search.in-place.scoring :as scoring] + [metabase.search.permissions :as search.permissions] [metabase.search.util :as search.util] [metabase.util :as u] [metabase.util.honey-sql-2 :as h2x] @@ -25,6 +27,14 @@ :keyword [:tuple :any :keyword]]) +(defn search-model->revision-model + "Return the appropriate revision model given a search model." + [model] + (case model + "dataset" (recur "card") + "metric" (recur "card") + (str/capitalize model))) + (mu/defn- ->column-alias :- keyword? "Returns the column name. If the column is aliased, i.e. [`:original_name` `:aliased_name`], return the aliased column name" @@ -33,12 +43,74 @@ (second column-or-aliased) column-or-aliased)) +(def all-search-columns + "All columns that will appear in the search results, and the types of those columns. The generated search query is a + `UNION ALL` of the queries for each different entity; it looks something like: + + SELECT 'card' AS model, id, cast(NULL AS integer) AS table_id, ... + FROM report_card + UNION ALL + SELECT 'metric' as model, id, table_id, ... + FROM metric + + Columns that aren't used in any individual query are replaced with `SELECT cast(NULL AS <type>)` statements. (These + are cast to the appropriate type because Postgres will assume `SELECT NULL` is `TEXT` by default and will refuse to + `UNION` two columns of two different types.)" + (ordered-map/ordered-map + ;; returned for all models. Important to be first for changing model for dataset + :model :text + :id :integer + :name :text + :display_name :text + :description :text + :archived :boolean + ;; returned for Card, Dashboard, and Collection + :collection_id :integer + :collection_name :text + :collection_type :text + :collection_location :text + :collection_authority_level :text + :archived_directly :boolean + ;; returned for Card and Dashboard + :collection_position :integer + :creator_id :integer + :created_at :timestamp + :bookmark :boolean + ;; returned for everything except Collection + :updated_at :timestamp + ;; returned only for Collection + :location :text + ;; returned for Card only, used for scoring and displays + :dashboardcard_count :integer + :last_edited_at :timestamp + :last_editor_id :integer + :moderated_status :text + :display :text + ;; returned for Metric and Segment + :table_id :integer + :table_schema :text + :table_name :text + :table_description :text + ;; returned for Metric, Segment, and Action + :database_id :integer + ;; returned for Database and Table + :initial_sync_status :text + :database_name :text + ;; returned for Action + :model_id :integer + :model_name :text + ;; returned for indexed-entity + :pk_ref :text + :model_index_id :integer + ;; returned for Card and Action + :dataset_query :text)) + (mu/defn- canonical-columns :- [:sequential HoneySQLColumn] "Returns a seq of lists of canonical columns for the search query with the given `model` Will return column names prefixed with the `model` name so that it can be used in criteria. Projects a `nil` for columns the `model` doesn't have and doesn't modify aliases." [model :- SearchableModel, col-alias->honeysql-clause :- [:map-of :keyword HoneySQLColumn]] - (for [[search-col col-type] search.config/all-search-columns + (for [[search-col col-type] all-search-columns :let [maybe-aliased-col (get col-alias->honeysql-clause search-col)]] (cond (= search-col :model) @@ -112,7 +184,7 @@ (sql.helpers/left-join [:revision :r] [:and [:= :r.model_id (search.config/column-with-model-alias model :id)] [:= :r.most_recent true] - [:= :r.model (search.config/search-model->revision-model model)]]))) + [:= :r.model (search-model->revision-model model)]]))) (mu/defn- with-moderated-status :- :map [query :- :map @@ -129,7 +201,7 @@ "CASE expression that lets the results be ordered by whether they're an exact (non-fuzzy) match or not" [query] (let [match (search.util/wildcard-match (search.util/normalize query)) - columns-to-search (->> search.config/all-search-columns + columns-to-search (->> all-search-columns (filter (fn [[_k v]] (= v :text))) (map first) (remove #{:collection_authority_level :moderated_status @@ -147,12 +219,174 @@ {:arglists '([model search-context])} (fn [model _] model)) +(defmulti searchable-columns + "The columns that can be searched for each model." + {:arglists '([model search-native-query])} + (fn [model _] model)) + +(defmethod searchable-columns :default + [_ _] + [:name]) + +(defmethod searchable-columns "action" + [_ search-native-query] + (cond-> [:name + :description] + search-native-query + (conj :dataset_query))) + +(defmethod searchable-columns "card" + [_ search-native-query] + (cond-> [:name + :description] + search-native-query + (conj :dataset_query))) + +(defmethod searchable-columns "dataset" + [_ search-native-query] + (searchable-columns "card" search-native-query)) + +(defmethod searchable-columns "metric" + [_ search-native-query] + (searchable-columns "card" search-native-query)) + +(defmethod searchable-columns "dashboard" + [_ _] + [:name + :description]) + +(defmethod searchable-columns "page" + [_ search-native-query] + (searchable-columns "dashboard" search-native-query)) + +(defmethod searchable-columns "database" + [_ _] + [:name + :description]) + +(defmethod searchable-columns "table" + [_ _] + [:name + :display_name + :description]) + +(defmethod searchable-columns "indexed-entity" + [_ _] + [:name]) + +(def ^:private default-columns + "Columns returned for all models." + [:id :name :description :archived :created_at :updated_at]) + +(def ^:private bookmark-col + "Case statement to return boolean values of `:bookmark` for Card, Collection and Dashboard." + [[:case [:not= :bookmark.id nil] true :else false] :bookmark]) + +(def ^:private dashboardcard-count-col + "Subselect to get the count of associated DashboardCards" + [{:select [:%count.*] + :from [:report_dashboardcard] + :where [:= :report_dashboardcard.card_id :card.id]} + :dashboardcard_count]) + +(def ^:private table-columns + "Columns containing information about the Table this model references. Returned for Metrics and Segments." + [:table_id + :created_at + [:table.db_id :database_id] + [:table.schema :table_schema] + [:table.name :table_name] + [:table.description :table_description]]) + +(defmulti columns-for-model + "The columns that will be returned by the query for `model`, excluding `:model`, which is added automatically. + This is not guaranteed to be the final list of columns, new columns can be added by calling [[api.search/replace-select]]" + {:arglists '([model])} + (fn [model] model)) + +(defmethod columns-for-model "action" + [_] + (conj default-columns :model_id + :creator_id + [:model.collection_id :collection_id] + [:model.id :model_id] + [:model.name :model_name] + [:query_action.database_id :database_id] + [:query_action.dataset_query :dataset_query])) + +(defmethod columns-for-model "card" + [_] + (conj default-columns :collection_id :archived_directly :collection_position :dataset_query :display :creator_id + [:collection.name :collection_name] + [:collection.type :collection_type] + [:collection.location :collection_location] + [:collection.authority_level :collection_authority_level] + bookmark-col dashboardcard-count-col)) + +(defmethod columns-for-model "indexed-entity" [_] + [[:model-index-value.name :name] + [:model-index-value.model_pk :id] + [:model-index.pk_ref :pk_ref] + [:model-index.id :model_index_id] + [:collection.name :collection_name] + [:collection.type :collection_type] + [:model.collection_id :collection_id] + [:model.id :model_id] + [:model.name :model_name] + [:model.database_id :database_id]]) + +(defmethod columns-for-model "dashboard" + [_] + (conj default-columns :archived_directly :collection_id :collection_position :creator_id bookmark-col + [:collection.name :collection_name] + [:collection.type :collection_type] + [:collection.authority_level :collection_authority_level])) + +(defmethod columns-for-model "database" + [_] + [:id :name :description :created_at :updated_at :initial_sync_status]) + +(defmethod columns-for-model "collection" + [_] + (conj (remove #{:updated_at} default-columns) + [:collection.id :collection_id] + [:name :collection_name] + [:type :collection_type] + [:authority_level :collection_authority_level] + :archived_directly + :location + bookmark-col)) + +(defmethod columns-for-model "segment" + [_] + (concat default-columns table-columns [:creator_id])) + +(defmethod columns-for-model "metric" + [_] + (concat default-columns table-columns [:creator_id])) + +(defmethod columns-for-model "table" + [_] + [[:table.id :id] + [:table.name :name] + [:table.created_at :created_at] + [:table.display_name :display_name] + [:table.description :description] + [:table.updated_at :updated_at] + [:table.initial_sync_status :initial_sync_status] + [:table.id :table_id] + [:table.db_id :database_id] + [:table.schema :table_schema] + [:table.name :table_name] + [:table.description :table_description] + [:metabase_database.name :database_name]]) + (mu/defn- select-clause-for-model :- [:sequential HoneySQLColumn] "The search query uses a `union-all` which requires that there be the same number of columns in each of the segments of the query. This function will take the columns for `model` and will inject constant `nil` values for any column - missing from `entity-columns` but found in `search.config/all-search-columns`." + missing from `entity-columns` but found in `all-search-columns`." [model :- SearchableModel] - (let [entity-columns (search.config/columns-for-model model) + (let [entity-columns (columns-for-model model) column-alias->honeysql-clause (m/index-by ->column-alias entity-columns) cols-or-nils (canonical-columns model column-alias->honeysql-clause)] cols-or-nils)) @@ -173,57 +407,7 @@ [model :- SearchableModel context :- SearchContext] (-> {:select (select-clause-for-model model) :from (from-clause-for-model model)} - (search.filter/build-filters model context))) - -(mu/defn add-collection-join-and-where-clauses - "Add a `WHERE` clause to the query to only return Collections the Current User has access to; join against Collection, - so we can return its `:name`." - [honeysql-query :- ms/Map - model :- :string - {:keys [filter-items-in-personal-collection - archived - current-user-id - is-superuser?]} :- SearchContext] - (let [collection-id-col (if (= model "collection") - :collection.id - :collection_id) - collection-filter-clause (collection/visible-collection-filter-clause - collection-id-col - {:include-archived-items :all - :include-trash-collection? true - :permission-level (if archived - :write - :read)} - {:current-user-id current-user-id - :is-superuser? is-superuser?})] - (cond-> honeysql-query - true - (sql.helpers/where collection-filter-clause (perms/audit-namespace-clause :collection.namespace nil)) - ;; add a JOIN against Collection *unless* the source table is already Collection - (not= model "collection") - (sql.helpers/left-join [:collection :collection] - [:= collection-id-col :collection.id]) - - (some? filter-items-in-personal-collection) - (sql.helpers/where - (case filter-items-in-personal-collection - "only" - (concat [:or] - ;; sub personal collections - (for [id (t2/select-pks-set :model/Collection :personal_owner_id [:not= nil])] - [:like :collection.location (format "/%d/%%" id)]) - ;; top level personal collections - [[:and - [:= :collection.location "/"] - [:not= :collection.personal_owner_id nil]]]) - - "exclude" - (conj [:or] - (into - [:and [:= :collection.personal_owner_id nil]] - (for [id (t2/select-pks-set :model/Collection :personal_owner_id [:not= nil])] - [:not-like :collection.location (format "/%d/%%" id)])) - [:= collection-id-col nil])))))) + (search.in-place.filter/build-filters model context))) (mu/defn- shared-card-impl [model :- :metabase.models.card/type @@ -234,7 +418,7 @@ [:and [:= :bookmark.card_id :card.id] [:= :bookmark.user_id (:current-user-id search-ctx)]]) - (add-collection-join-and-where-clauses "card" search-ctx) + (search.permissions/add-collection-join-and-where-clauses "card" search-ctx) (add-card-db-id-clause (:table-db-id search-ctx)) (with-last-editing-info "card") (with-moderated-status "card"))) @@ -246,7 +430,7 @@ [:= :model.id :action.model_id]) (sql.helpers/left-join :query_action [:= :query_action.action_id :action.id]) - (add-collection-join-and-where-clauses model search-ctx))) + (search.permissions/add-collection-join-and-where-clauses model search-ctx))) (defmethod search-query-for-model "card" [_model search-ctx] @@ -271,7 +455,7 @@ [:and [:= :bookmark.collection_id :collection.id] [:= :bookmark.user_id (:current-user-id search-ctx)]]) - (add-collection-join-and-where-clauses model search-ctx))) + (search.permissions/add-collection-join-and-where-clauses model search-ctx))) (defmethod search-query-for-model "database" [model search-ctx] @@ -285,7 +469,7 @@ [:= :bookmark.dashboard_id :dashboard.id] [:= :bookmark.user_id (:current-user-id search-ctx)]]) (with-moderated-status "dashboard") - (add-collection-join-and-where-clauses model search-ctx) + (search.permissions/add-collection-join-and-where-clauses model search-ctx) (with-last-editing-info "dashboard"))) (defn- add-model-index-permissions-clause @@ -321,7 +505,7 @@ (defmethod search.api/model-set :search.engine/in-place [search-ctx] - (let [model-queries (for [model (search.filter/search-context->applicable-models + (let [model-queries (for [model (search.in-place.filter/search-context->applicable-models ;; It's unclear why we don't use the existing :models (assoc search-ctx :models search.config/all-models))] {:nest (sql.helpers/limit (search-query-for-model model search-ctx) 1)}) diff --git a/src/metabase/search/permissions.clj b/src/metabase/search/permissions.clj new file mode 100644 index 0000000000000000000000000000000000000000..5de100ef749762088c87fc24b26b929d67f006d8 --- /dev/null +++ b/src/metabase/search/permissions.clj @@ -0,0 +1,73 @@ +(ns metabase.search.permissions + (:require + [honey.sql.helpers :as sql.helpers] + [metabase.models.collection :as collection] + [metabase.models.permissions :as perms] + [metabase.public-settings.premium-features :as premium-features] + [metabase.search.config :refer [SearchContext]] + [metabase.util.malli :as mu] + [toucan2.core :as t2])) + +(defn sandboxed-or-impersonated-user? + "Is the current user sandboxed or impersonated?" + ;; TODO take the current user as a parameter, and override the binding if necessary. + [] + ;; TODO FIXME -- search actually currently still requires [[metabase.api.common/*current-user*]] to be bound, + ;; because [[metabase.public-settings.premium-features/sandboxed-or-impersonated-user?]] requires it to be bound. + ;; Since it's part of the search context it would be nice if we could run search without having to bind that stuff at + ;; all. + (assert @@(requiring-resolve 'metabase.api.common/*current-user*) + "metabase.api.common/*current-user* must be bound in order to use search for an indexed entity") + (premium-features/sandboxed-or-impersonated-user?)) + +(mu/defn add-collection-join-and-where-clauses + "Add a `WHERE` clause to the query to only return Collections the Current User has access to; join against Collection, + so we can return its `:name`." + [honeysql-query :- :map + model :- [:maybe :string] + {:keys [filter-items-in-personal-collection + archived + current-user-id + is-superuser?]} :- SearchContext] + (let [collection-id-col (case model + "collection" :collection.id + "search-index" :search_index.collection_id + :collection_id) + collection-filter-clause (collection/visible-collection-filter-clause + collection-id-col + {:include-archived-items :all + :include-trash-collection? true + :permission-level (if archived + :write + :read)} + {:current-user-id current-user-id + :is-superuser? is-superuser?})] + (cond-> honeysql-query + true + (sql.helpers/where collection-filter-clause (perms/audit-namespace-clause :collection.namespace nil)) + ;; add a JOIN against Collection *unless* the source table is already Collection + (not= model "collection") + (sql.helpers/left-join [:collection :collection] + [:= collection-id-col :collection.id]) + + ;; TODO This is not really about permissions, it should really be handled in search.filter + (some? filter-items-in-personal-collection) + (sql.helpers/where + (case filter-items-in-personal-collection + "only" + (concat [:or] + ;; sub personal collections + (for [id (t2/select-pks-set :model/Collection :personal_owner_id [:not= nil])] + [:like :collection.location (format "/%d/%%" id)]) + ;; top level personal collections + [[:and + [:= :collection.location "/"] + [:not= :collection.personal_owner_id nil]]]) + + "exclude" + (conj [:or] + (into + [:and [:= :collection.personal_owner_id nil]] + (for [id (t2/select-pks-set :model/Collection :personal_owner_id [:not= nil])] + [:not-like :collection.location (format "/%d/%%" id)])) + [:= collection-id-col nil])))))) diff --git a/src/metabase/search/postgres/core.clj b/src/metabase/search/postgres/core.clj index 7502b25f709f55f9c0934aedc94b1a1e7b79254e..6d143c81698c5c5567eefd48e158b3e626b1a654 100644 --- a/src/metabase/search/postgres/core.clj +++ b/src/metabase/search/postgres/core.clj @@ -5,9 +5,12 @@ [honey.sql.helpers :as sql.helpers] [metabase.api.common :as api] [metabase.search.config :as search.config] + [metabase.search.filter :as search.filter] [metabase.search.legacy :as search.legacy] + [metabase.search.permissions :as search.permissions] [metabase.search.postgres.index :as search.index] [metabase.search.postgres.ingestion :as search.ingestion] + [metabase.search.postgres.scoring :as search.scoring] [toucan2.core :as t2]) (:import (java.time OffsetDateTime))) @@ -54,7 +57,7 @@ (when-not @#'search.index/initialized? (throw (ex-info "Search index is not initialized. Use [[init!]] to ensure it exists." {:search-engine :postgres}))) - (-> (sql.helpers/with [:index-query (search.index/search-query search-term)] + (-> (sql.helpers/with [:index-query (search.index/search-query search-term search-ctx)] [:source-query (in-place-query search-ctx)]) (sql.helpers/select :sq.*) (sql.helpers/from [:source-query :sq]) @@ -64,80 +67,37 @@ (sql/format {:quoted true}) t2/reducible-query)) -(defn- hybrid-multi - "Perform multiple legacy searches to see if its faster. Perverse!" - [search-term & {:as search-ctx}] - (when-not @#'search.index/initialized? - (throw (ex-info "Search index is not initialized. Use [[init!]] to ensure it exists." - {:search-engine :postgres}))) - (->> (search.index/search-query search-term) - t2/query - (group-by :model) - (mapcat (fn [[model results]] - (let [ids (map :model_id results)] - ;; Something is very wrong here, this also returns items with other ids. - (as-> search-ctx <> - (assoc <> :models #{model} :ids ids) - (dissoc <> :search-string) - (in-place-query <>) - (t2/query <>) - (filter (comp (set ids) :id) <>))))))) - (defn- parse-datetime [s] (when s (OffsetDateTime/parse s))) -(defn- minimal - "Search via index, and return potentially stale information, without applying filters or - restricting to collections we have access to." - [search-term & {:as _search-ctx}] - (when-not @#'search.index/initialized? - (throw (ex-info "Search index is not initialized. Use [[init!]] to ensure it exists." - {:search-engine :postgres}))) - (->> (assoc (search.index/search-query search-term) :select [:legacy_input]) - (t2/query) - (map :legacy_input) - (map #(json/parse-string % keyword)) - (map #(-> % - (update :created_at parse-datetime) - (update :updated_at parse-datetime) - (update :last_edited_at parse-datetime))))) - -;; filters: -;; - the obvious ones in the ui -;; - db-id -;; - personal collection (include / exclude), including sub - -(defn- minimal-with-perms - "Search via index, and return potentially stale information, without applying filters, - but applying permissions. Does not perform ranking." +(defn- rehydrate [index-row] + (-> (merge + (json/parse-string (:legacy_input index-row) keyword) + (select-keys index-row [:total_score :pinned])) + (update :created_at parse-datetime) + (update :updated_at parse-datetime) + (update :last_edited_at parse-datetime))) + +(defn- fulltext + "Search purely using the index." [search-term & {:as search-ctx}] (when-not @#'search.index/initialized? (throw (ex-info "Search index is not initialized. Use [[init!]] to ensure it exists." {:search-engine :postgres}))) - (->> (search.legacy/add-collection-join-and-where-clauses - (assoc (search.index/search-query search-term) - :select [:legacy_input]) - ;; we just need this to not be "collection" - "__search_index__" - search-ctx) + (->> (let [base-query (search.index/search-query search-term search-ctx [:legacy_input])] + (search.permissions/add-collection-join-and-where-clauses base-query "search-index" search-ctx)) + (search.scoring/with-scores search-ctx) + (search.filter/with-filters search-ctx) (t2/query) - (map :legacy_input) - (map #(json/parse-string % keyword)) - (map #(-> % - (update :created_at parse-datetime) - (update :updated_at parse-datetime) - (update :last_edited_at parse-datetime))))) + (map rehydrate))) -(def ^:private default-engine hybrid-multi) +(def ^:private default-engine fulltext) (defn- search-fn [search-engine] (case search-engine :search.engine/hybrid hybrid - :search.engine/hybrid-multi hybrid-multi - :search.engine/minimal minimal - :search.engine/minimal-with-perms minimal-with-perms - :search.engine/fulltext default-engine + :search.engine/fulltext fulltext default-engine)) (defn search @@ -148,23 +108,20 @@ (dissoc search-ctx :search-string)))) (defn model-set - "Return a set of the models which have at least one result for the given query. - TODO: consider filters and permissions." + "Return a set of the models which have at least one result for the given query." [search-ctx] - (set - (filter - ;; TODO use a single query - (fn [m] - (t2/exists? :search_index - (-> (search.index/search-query (:search-string search-ctx)) - (sql.helpers/where [:= :model m])))) - ;; TODO use only the models that apply to the given filters - (:models search-ctx search.config/all-models)))) + ;; We ignore any current models filter + (let [search-ctx (assoc search-ctx :models search.config/all-models)] + (->> (-> (search.index/search-query (:search-string search-ctx) search-ctx [[[:distinct :model] :model]]) + (search.permissions/add-collection-join-and-where-clauses "search-index" search-ctx)) + (search.filter/with-filters search-ctx) + t2/query + (into #{} (map :model))))) (defn no-scoring "Do no scoring, whatsoever" [result _scoring-ctx] - {:score 1 + {:score (:total_score result 1) :result (assoc result :all-scores [] :relevant-scores [])}) (defn init! diff --git a/src/metabase/search/postgres/index.clj b/src/metabase/search/postgres/index.clj index 726b3a238741de87a72f71cf1bef8fa2ad61fd28..bfe35def584465439fe773e4942b5b715bcaa855 100644 --- a/src/metabase/search/postgres/index.clj +++ b/src/metabase/search/postgres/index.clj @@ -3,6 +3,7 @@ [cheshire.core :as json] [clojure.string :as str] [honey.sql.helpers :as sql.helpers] + [metabase.search.spec :as search.spec] [metabase.util :as u] [toucan2.core :as t2])) @@ -16,7 +17,7 @@ (defonce ^:private reindexing? (atom false)) -(def ^:private tsv-language "english") +(def ^:private tsv-language "simple") (defn- exists? [table-name] (t2/exists? :information_schema.tables :table_name (name table-name))) @@ -43,31 +44,48 @@ ;; entity [:model_id :int :not-null] [:model [:varchar 254] :not-null] ;; TODO We could shrink this to just what we need. + [:name :text] ;; search [:search_vector :tsvector :not-null] + [:with_native_query_vector :tsvector :not-null] ;; results - [:display_data :text] - [:legacy_input :text] + [:display_data :text :not-null] + [:legacy_input :text :not-null] ;; scoring related + [:dashboardcard_count :int] [:model_rank :int :not-null] + [:pinned :boolean] + [:verified :boolean] ;; permission related entities [:collection_id :int] [:database_id :int] [:table_id :int] ;; filter related - [:archived :boolean] + [:archived :boolean :not-null [:default false]] + [:creator_id :int] + [:last_edited_at :timestamp] + [:last_editor_id :int] + [:model_created_at :timestamp] + [:model_updated_at :timestamp] ;; useful for tracking the speed and age of the index [:created_at :timestamp [:default [:raw "CURRENT_TIMESTAMP"]] - :not-null]]) + :not-null] + [:updated_at :timestamp :not-null]]) t2/query) ;; TODO I strongly suspect that there are more indexes that would help performance, we should examine EXPLAIN. - (t2/query - (format "CREATE INDEX IF NOT EXISTS %s_tsvector_idx ON %s USING gin (search_vector)" - (str/replace (str (name active-table) "_" (random-uuid)) #"-" "_") - (name pending-table)))) + (let [idx-prefix (str/replace (str (name active-table) "_" (random-uuid)) #"-" "_") + table-name (name pending-table)] + (doseq [stmt ["CREATE UNIQUE INDEX IF NOT EXISTS %s_identity_idx ON %s (model, model_id)" + "CREATE INDEX IF NOT EXISTS %s_tsvector_idx ON %s USING gin (search_vector)" + "CREATE INDEX IF NOT EXISTS %s_native_tsvector_idx ON %s USING gin (with_native_query_vector)" + ;; Spam all the indexes for now, let's see if they get used on Stats / Ephemeral envs. + "CREATE INDEX IF NOT EXISTS %s_model_archived_idx ON %s (model, archived)" + "CREATE INDEX IF NOT EXISTS %s_archived_idx ON %s (archived)"]] + (t2/query (format stmt idx-prefix table-name))))) + (reset! reindexing? true))) (defn activate-pending! @@ -86,50 +104,97 @@ (defn- entity->entry [entity] (-> entity (select-keys - [:model - :model_rank - :collection_id - :database_id - :display_data - :legacy_input - :table_id - :archived]) + ;; remove attrs that get aliased + (remove #{:id :created_at :updated_at :native_query} + (conj search.spec/attr-columns + :model :model_rank + :display_data :legacy_input))) (update :display_data json/generate-string) (update :legacy_input json/generate-string) (assoc - :model_id (:id entity) - :search_vector [:to_tsvector - [:inline tsv-language] - [:cast - (:searchable_text entity) - :text]]))) + :updated_at :%now + :model_id (:id entity) + :model_created_at (:created_at entity) + :model_updated_at (:updated_at entity) + :search_vector [:|| + [:setweight [:to_tsvector [:inline tsv-language] [:cast (:name entity) :text]] + [:inline "A"]] + [:setweight [:to_tsvector + [:inline tsv-language] + [:cast + (:searchable_text entity "") + :text]] + [:inline "B"]]] + :with_native_query_vector [:|| + [:setweight [:to_tsvector [:inline tsv-language] [:cast (:name entity) :text]] + [:inline "A"]] + [:setweight [:to_tsvector + [:inline tsv-language] + [:cast + (str (:searchable_text entity) " " (:native_query entity)) + :text]] + [:inline "B"]]]))) + +(defn- upsert! [table entry] + (t2/query + {:insert-into table + :values [entry] + :on-conflict [:model :model_id] + :do-update-set entry})) + +(defn- batch-upsert! [table entries] + (when (seq entries) + (t2/query + ;; The cost of dynamically calculating these keys should be small compared to the IO cost, so unoptimized. + (let [update-keys (vec (disj (set (keys (first entries))) :id :model :model_id)) + excluded-kw (fn [column] (keyword (str "excluded." (name column))))] + {:insert-into table + :values entries + :on-conflict [:model :model_id] + :do-update-set (zipmap update-keys (map excluded-kw update-keys))})))) (defn update! "Create the given search index entries" [entity] (let [entry (entity->entry entity)] (when @initialized? - (t2/insert! active-table entry)) + (upsert! active-table entry)) + (when @reindexing? + (upsert! pending-table entry)))) + +(defn delete! + "Remove any entries corresponding directly to a given model instance." + [id search-models] + ;; In practice, we expect this to be 1-1, but the data model does not preclude it. + (when (seq search-models) + (when @initialized? + (t2/delete! active-table :model_id id :model [:in search-models])) (when @reindexing? - (t2/insert! pending-table entry)))) + (t2/delete! pending-table :model_id id :model [:in search-models])))) -(defn- process-negation [term] - (if (str/starts-with? term "-") - (str "!" (subs term 1)) - term)) +(defn- quote* [s] + (str "'" (str/replace s "'" "''") "'")) (defn- process-phrase [word-or-phrase] ;; a phrase is quoted even if the closing quotation mark has not been typed yet - (if (str/starts-with? word-or-phrase "\"") + (cond ;; quoted phrases must be matched sequentially + (str/starts-with? word-or-phrase "\"") (as-> word-or-phrase <> ;; remove the quote mark(s) (str/replace <> #"^\"|\"$" "") (str/trim <>) (str/split <> #"\s+") + (map quote* <>) (str/join " <-> " <>)) + + ;; negation + (str/starts-with? word-or-phrase "-") + (str "!" (quote* (subs word-or-phrase 1))) + ;; just a regular word - word-or-phrase)) + :else + (quote* word-or-phrase))) (defn- split-preserving-quotes "Break up the words in the search input, preserving quoted and partially quoted segments." @@ -139,8 +204,7 @@ (defn- process-clause [words-and-phrases] (->> words-and-phrases (remove #{"and"}) - (map (comp process-phrase - process-negation)) + (map process-phrase) (str/join " & "))) (defn- complete-last-word @@ -151,44 +215,55 @@ (defn- to-tsquery-expr "Given the user input, construct a query in the Postgres tsvector query language." [input] - (let [trimmed (str/trim input) - complete? (not (str/ends-with? trimmed "\"")) - ;; TODO also only complete if search-typeahead-enabled and the context is the search palette - maybe-complete (if complete? complete-last-word identity)] - (->> (split-preserving-quotes trimmed) - (remove str/blank?) - (partition-by #{"or"}) - (remove #(= (first %) "or")) - (map process-clause) - (str/join " | ") - maybe-complete))) + (str + (when input + (let [trimmed (str/trim input) + complete? (not (str/ends-with? trimmed "\"")) + ;; TODO also only complete if search-typeahead-enabled and the context is the search palette + maybe-complete (if complete? complete-last-word identity)] + (->> (split-preserving-quotes trimmed) + (remove str/blank?) + (partition-by #{"or"}) + (remove #(= (first %) "or")) + (map process-clause) + (str/join " | ") + maybe-complete))))) (defn batch-update! "Create the given search index entries in bulk" [entities] (let [entries (map entity->entry entities)] (when @initialized? - (t2/insert! active-table entries)) + (batch-upsert! active-table entries)) (when @reindexing? - (t2/insert! pending-table entries)))) + (batch-upsert! pending-table entries)))) (defn search-query "Query fragment for all models corresponding to a query parameter `:search-term`." - [search-term] - {:select [:model_id :model] - :from [active-table] - :where (if-not search-term - [:= [:inline 1] [:inline 1]] - [:raw - "search_vector @@ to_tsquery('" - tsv-language "', " - [:lift (to-tsquery-expr search-term)] ")"])}) + ([search-term search-ctx] + (search-query search-term search-ctx [:model_id :model])) + ([search-term search-ctx select-items] + {:select select-items + :from [active-table] + ;; Using a join allows us to share the query expression between our SELECT and WHERE clauses. + :join [[[:raw "to_tsquery('" + tsv-language "', " + [:lift (to-tsquery-expr search-term)] ")"] + :query] [:= 1 1]] + :where (if (str/blank? search-term) + [:= [:inline 1] [:inline 1]] + [:raw + (str + (if (:search-native-query search-ctx) + "with_native_query_vector" + "search_vector") + " @@ query")])})) (defn search "Use the index table to search for records." - [search-term] + [search-term & [search-ctx]] (map (juxt :model_id :model) - (t2/query (search-query search-term)))) + (t2/query (search-query search-term search-ctx)))) (defn reset-index! "Ensure we have a blank slate; in case the table schema or stored data format has changed." diff --git a/src/metabase/search/postgres/ingestion.clj b/src/metabase/search/postgres/ingestion.clj index c7ba83b5711e70eff33846727fd34ecbc68827f1..2c7dc309a9224abd656646607b148ce4dd711198 100644 --- a/src/metabase/search/postgres/ingestion.clj +++ b/src/metabase/search/postgres/ingestion.clj @@ -1,17 +1,18 @@ (ns metabase.search.postgres.ingestion - "Use to populate the search. For now it leverage the legacy search code, to avoid duplication. - Unfortunately, this makes it difficult to share logic for re-indexing individual models efficiently, - and to determine when changes to related entities should cause an item to be re-indexed. - For this reason we'll want to move to using a spec-based approach next." (:require [clojure.string :as str] + [honey.sql.helpers :as sql.helpers] + [medley.core :as m] [metabase.search.config :as search.config] - [metabase.search.legacy :as search.legacy] [metabase.search.postgres.index :as search.index] + [metabase.search.spec :as search.spec] + [metabase.util :as u] [toucan2.core :as t2] [toucan2.realize :as t2.realize])) -(def ^:private insert-batch-size 50) +(set! *warn-on-reflection* true) + +(def ^:private insert-batch-size 150) (def ^:private model-rankings (zipmap search.config/models-search-order (range))) @@ -22,51 +23,123 @@ (defn- searchable-text [m] ;; For now, we never index the native query content - (->> (search.config/searchable-columns (:model m) false) + (->> (:search-terms (search.spec/spec (:model m))) (map m) (str/join " "))) (defn- display-data [m] - (select-keys m [:name :display_name :description])) + (select-keys m [:name :display_name :description :collection_name])) (defn- ->entry [m] (-> m (select-keys - [:id - :model - :archived - :collection_id - :database_id - :table_id]) + (into [:id :model] search.spec/attr-columns)) (update :archived boolean) (assoc - :display_data (display-data m) - :legacy_input m + :display_data (display-data m) + :legacy_input m :searchable_text (searchable-text m) - :model_rank (model-rank (:model m))))) + :model_rank (model-rank (:model m))))) + +(defn- attrs->select-items [attrs] + (for [[k v] attrs :when v] + (let [as (keyword (u/->snake_case_en (name k)))] + (if (true? v) as [v as])))) + +(defn- spec-index-query* + [search-model] + (let [spec (search.spec/spec search-model)] + (u/remove-nils + {:select (search.spec/qualify-columns :this + (concat + (:search-terms spec) + (mapcat (fn [k] (attrs->select-items (get spec k))) + [:attrs :render-terms]))) + :from [[(t2/table-name (:model spec)) :this]] + :where (:where spec [:inline [:= 1 1]]) + :left-join (when (:joins spec) + (into [] + cat + (for [[join-alias [join-model join-condition]] (:joins spec)] + [[(t2/table-name join-model) join-alias] + join-condition])))}))) + +(def ^{:private true, :arglists '([search-model])} spec-index-query + (memoize spec-index-query*)) + +(defn- spec-index-query-where [search-model where-clause] + (-> (spec-index-query search-model) + (sql.helpers/where where-clause))) + +(defn- spec-index-reducible [search-model & [where-clause]] + (->> (spec-index-query-where search-model where-clause) + t2/reducible-query + (eduction (map #(assoc % :model search-model))))) (defn- search-items-reducible [] - (-> {:search-string nil - :models (disj search.config/all-models "indexed-entity") - ;; we want to see everything - :is-superuser? true - :current-user-id (t2/select-one-pk :model/User :is_superuser true) - :current-user-perms #{"/"} - ;; include both achived and non-archived items. - :archived? nil - ;; only need this for display data - :model-ancestors? false} - search.legacy/full-search-query - (dissoc :limit) - t2/reducible-query)) + (reduce u/rconcat [] (map spec-index-reducible (keys (methods search.spec/spec))))) -(defn populate-index! - "Go over all searchable items and populate the index with them." - [] - (->> (search-items-reducible) +(defn- batch-update! [search-items-reducible] + (->> search-items-reducible (eduction (comp (map t2.realize/realize) + ;; It's possible to get redundant entries from the indexed-entities table. + ;; We remove duplicates to avoid creating invalid insert statements. + (m/distinct-by (juxt :id :model)) (map ->entry) (partition-all insert-batch-size))) (run! search.index/batch-update!))) + +(defn populate-index! + "Go over all searchable items and populate the index with them." + [] + (batch-update! (search-items-reducible))) + +(def ^:dynamic *force-sync* + "Force ingestion to happen immediately, on the same thread." + false) + +(defmacro ^:private run-on-thread [& body] + `(if *force-sync* + (do ~@body) + (doto (Thread. ^Runnable (fn [] ~@body)) + (.start)))) + +(defn update-index! + "Given a new or updated instance, create or update all the corresponding search entries if needed." + [instance] + (when-let [updates (seq (search.spec/search-models-to-update instance))] + ;; We need to delay execution to handle deletes, which alert us *before* updating the database. + ;; TODO It's dangerous to simply unleash threads on the world, this should use a queue in future. + (run-on-thread + (Thread/sleep 100) + (->> (for [[search-model where-clause] updates] + (spec-index-reducible search-model where-clause)) + ;; init collection is only for clj-kondo, as we know that the list is non-empty + (reduce u/rconcat []) + (batch-update!))) + nil)) + +;; TODO think about how we're going to handle cascading deletes. +;; Ideas: +;; - Queue full re-index (rather expensive) +;; - Queue "purge" (empty left join to the model) - needs special case for indexed-entity +;; - Pre-delete hook using pre-calculated PK-based graph +(defn delete-model! + "Given a deleted instance, delete all the corresponding search entries." + [instance] + (let [model (t2/model instance) + id (:id instance) + ;; TODO this could use some precalculation into a look-up map + search-models (->> (methods search.spec/spec) + (map (fn [[search-model spec-fn]] (spec-fn search-model))) + (filter #(= model (:model %))) + (map :name) + seq)] + (when search-models + (search.index/delete! id search-models)))) + +(comment + (t2/query + (spec-index-query-where "table" [:= 1 :this.db_id]))) diff --git a/src/metabase/search/postgres/scoring.clj b/src/metabase/search/postgres/scoring.clj new file mode 100644 index 0000000000000000000000000000000000000000..16d7e09af97e8190657604ab2085189088067ff9 --- /dev/null +++ b/src/metabase/search/postgres/scoring.clj @@ -0,0 +1,95 @@ +(ns metabase.search.postgres.scoring + (:require + [honey.sql.helpers :as sql.helpers] + [metabase.search.config :as search.config])) + +(def ^:private seconds-in-a-day 86400) + +(defn- truthy + "Prefer it when a (potentially nullable) boolean is true." + [column] + [:coalesce [:cast column :integer] [:inline 0]]) + +(defn- size + "Prefer items whose value is larger, up to some saturation point. Items beyond that point are equivalent." + [column ceiling] + [:least [:/ [:coalesce column [:inline 0]] [:inline (double ceiling)]] [:inline 1]]) + +(defn- inverse-duration + "Score at item based on the duration between two dates, where less is better." + [from-column to-column ceiling-in-days] + (let [ceiling [:inline ceiling-in-days]] + [:/ + [:greatest + [:- ceiling + [:/ + ;; Use seconds for granularity in the fraction. + [[:raw "EXTRACT(epoch FROM (" [:- to-column from-column] [:raw "))"]]] + [:inline (double seconds-in-a-day)]]] + [:inline 0]] + ceiling])) + +(defn- idx-rank + "Prefer items whose value is earlier in some list." + [idx-col len] + (if (pos? len) + [:/ [:- [:inline (dec len)] idx-col] [:inline len]] + [:inline 1])) + +(defn- sum-columns [column-names] + (if (seq column-names) + (reduce (fn [expr col] [:+ expr col]) + (first column-names) + (rest column-names)) + [:inline 1])) + +(defn- weighted-score [[column-alias expr]] + [:* [:inline (search.config/weights column-alias 0)] expr]) + +(defn- select-items [scorers] + (concat + (for [[column-alias expr] scorers] + [expr column-alias]) + [[(sum-columns (map weighted-score scorers)) + :total_score]])) + +;; Divides rank by log(len(doc)) +;; See https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-RANKING +(def ^:private ts-rank-normalization 1) + +;; TODO move these to the spec definitions +(def ^:private bookmarked-models [:card :collection :dashboard]) + +(def ^:private bookmark-score-expr + (let [match-clause (fn [m] [[:and [:= :model m] [:!= nil (keyword (str m "_bookmark." m "_id"))]] + [:inline 1]])] + (into [:case] (concat (mapcat (comp match-clause name) bookmarked-models) [:else [:inline 0]])))) + +(def ^:private scorers + {:text [:ts_rank :search_vector :query [:inline ts-rank-normalization]] + :pinned (truthy :pinned) + :bookmarked bookmark-score-expr + :recency (inverse-duration :model_updated_at [:now] search.config/stale-time-in-days) + :dashboard (size :dashboardcard_count search.config/dashboard-count-ceiling) + :model (idx-rank :model_rank (count search.config/all-models))}) + +(def ^:private precalculated-select-items (select-items scorers)) + +(defn- bookmark-join [model user-id] + (let [model-name (name model) + table-name (str model-name "_bookmark")] + [(keyword table-name) + [:and + [:= :model [:inline model-name]] + [:= (keyword (str table-name ".user_id")) user-id] + [:= :search_index.model_id (keyword (str table-name "." model-name "_id"))]]])) + +(defn- join-bookmarks [qry user-id] + (apply sql.helpers/left-join qry (mapcat #(bookmark-join % user-id) bookmarked-models))) + +(defn with-scores + "Add a bunch of SELECT columns for the individual and total scores, and a corresponding ORDER BY." + [search-ctx qry] + (-> (apply sql.helpers/select qry precalculated-select-items) + (join-bookmarks (:current-user-id search-ctx)) + (sql.helpers/order-by [:total_score :desc]))) diff --git a/src/metabase/search/spec.clj b/src/metabase/search/spec.clj new file mode 100644 index 0000000000000000000000000000000000000000..74014434e70abc79b0b14765c3c4e8fabab5d09c --- /dev/null +++ b/src/metabase/search/spec.clj @@ -0,0 +1,268 @@ +(ns metabase.search.spec + (:require + [clojure.set :as set] + [clojure.string :as str] + [clojure.walk :as walk] + [malli.core :as mc] + [malli.error :as me] + [metabase.config :as config] + [metabase.util :as u] + [toucan2.core :as t2])) + +(def ^:private SearchModel + [:enum "dashboard" "table" "dataset" "segment" "collection" "database" "action" "indexed-entity" "metric" "card"]) + +(def ^:private AttrValue + "Key must be present, to show it's been explicitly considered. + + - false: not present [not: consider making the nil instead, since it implies writing NULL to the column] + - true: given by a column with the same name (snake case) [note: consider removing this sugar, just repeat the column] + - keyword: given by the corresponding column + - vector: calculated by the given expression + - map: a sub-select" + [:union :boolean :keyword vector? :map]) + +(def ^:private explicit-attrs + "These attributes must be explicitly defined, omitting them could be a source of bugs." + [:archived + :collection-id + :database-id + :table-id]) + +(def ^:private optional-attrs + "These attributes may be omitted (for now) in the interest of brevity in the definitions." + [:id + :name + :created-at + :creator-id + :native-query + :dashboardcard-count + :last-edited-at + :last-editor-id + :pinned + :verified + :updated-at]) + +(def ^:private default-attrs + {:id true + :name true}) + +(def ^:private attr-keys + "Keys of a search-model that correspond to concrete columns in the index" + (into explicit-attrs optional-attrs)) + +(def attr-columns + "Columns of an ingestion query that correspond to concrete columns in the index" + (mapv (comp keyword u/->snake_case_en name) attr-keys)) + +(assert (not-any? (set explicit-attrs) optional-attrs) "Attribute must only be mentioned in one list") + +(def ^:private Attrs + (into [:map {:closed true}] + (concat (for [k explicit-attrs] [k AttrValue]) + (for [k optional-attrs] [k {:optional true} AttrValue])))) + +(def ^:private NonAttrKey + ;; This is rather slow, not great for REPL development. + (if config/is-dev? + :keyword + [:and :keyword [:not (into [:enum] attr-columns)]])) + +(def ^:private JoinMap + "We use our own schema instead of raw HoneySQL, so that we can invert it to calculate the update hooks." + [:map-of :keyword [:tuple :keyword vector?]]) + +(def ^:private Specification + [:map {:closed true} + [:name SearchModel] + [:model :keyword] + [:attrs Attrs] + [:search-terms [:sequential {:min 1} :keyword]] + [:render-terms [:map-of NonAttrKey AttrValue]] + [:where {:optional true} vector?] + [:bookmark {:optional true} vector?] + [:joins {:optional true} JoinMap]]) + +(defn- qualify-column* [table column] + (if (str/includes? (name column) ".") + column + (keyword (str (name table) "." (name column))))) + +(defn- qualify-column + "Given a select-item, qualify the (potentially nested) column reference if it is naked." + [table select-item] + (cond + (keyword? select-item) + (let [qualified (qualify-column* table select-item)] + (if (= select-item qualified) + select-item + [qualified select-item])) + + (and (vector? select-item) (keyword? (first select-item))) + (assoc select-item 0 (qualify-column* table (first select-item))) + + :else + select-item)) + +(defn- has-table? [table kw] + (and (not (namespace kw)) + (if table + (str/starts-with? (name kw) (str (name table) ".")) + (not (str/includes? (name kw) "."))))) + +(defn- get-table [kw] + (let [parts (str/split (name kw) #"\.")] + (when (> (count parts) 1) + (keyword (first parts))))) + +(defn- remove-table [table kw] + (if (and table (not (namespace kw))) + (keyword (subs (name kw) (inc (count (name table))))) + kw)) + +(defn- find-fields-kw [kw] + ;; Filter out SQL functions + (when-not (str/starts-with? (name kw) "%") + (let [table (get-table kw)] + (list [(or table :this) (remove-table table kw)])))) + +(defn- find-fields-expr [expr] + (cond + (keyword? expr) + (find-fields-kw expr) + + (vector? expr) + (mapcat find-fields-expr (rest expr)))) + +(defn- find-fields-attr [[k v]] + (when v + (if (true? v) + [[:this (keyword (u/->snake_case_en (name k)))]] + (find-fields-expr v)))) + +(defn- find-fields-select-item [x] + (cond + (keyword? x) + (find-fields-kw x) + + (vector? x) + (find-fields-expr (first x)))) + +(defn- find-fields-top [x] + (cond + (map? x) + (mapcat find-fields-attr x) + + (sequential? x) + (mapcat find-fields-select-item x) + + :else + (throw (ex-info "Unexpected format for fields" {:x x})))) + +(defn- find-fields + "Search within a definition for all the fields referenced on the given table alias." + [spec] + (u/group-by first second conj #{} + (concat + (mapcat + find-fields-top + ;; Remove the keys with special meanings (should probably switch this to an allowlist rather) + (vals (dissoc spec :name :native-query :where :joins :bookmark :model))) + (find-fields-expr (:where spec))))) + +(defn- replace-qualification [expr from to] + (cond + (and (keyword? expr) (has-table? from expr)) + (keyword (str/replace (name expr) (str (name from) ".") (str (name to) "."))) + + (sequential? expr) + (into (empty expr) (map #(replace-qualification % from to) expr)) + + :else + expr)) + +(defn- insert-values [expr table m] + (walk/postwalk + (fn [x] + (if (and (keyword? x) (has-table? table x)) + (get m (remove-table table x)) + x)) + expr)) + +(defn- search-model-hooks + "Generate a map indicating which search-models to update based on which fields are modified for a given model." + [spec] + (let [s (:name spec) + fields (find-fields spec)] + (into {} + (cons + [(:model spec) #{{:search-model s + :fields (:this (find-fields spec)) + :where [:= :updated.id :this.id]}}] + (for [[table-alias [model join-condition]] (:joins spec)] + (let [table-fields (fields table-alias)] + [model #{{:search-model s + :fields table-fields + :where (replace-qualification join-condition table-alias :updated)}}])))))) + +(defn- merge-hooks + "Combine the search index hooks corresponding to different search models." + [hooks] + (reduce (partial merge-with set/union) {} hooks)) + +(defn qualify-columns + "Given a list of select-item, qualify all naked column references to refer to the given table." + [table select-item] + (for [column select-item + :when (and column (or (not (vector? column)) + (some? (first column))))] + (qualify-column table column))) + +(defmulti spec + "Register a metabase model as a search-model. + Once we're trying up the fulltext search project, we can inline a detailed explanation. + For now, see its schema, and the existing definitions that use it." + (fn [search-model] search-model)) + +(defn specifications + "A mapping from each search-model to its specification." + [] + (into {} (for [[s spec-fn] (methods spec)] [s (spec-fn s)]))) + +(defn validate-spec! + "Check whether a given specification is valid" + [spec] + (when-let [info (mc/explain Specification spec)] + (throw (ex-info (str "Invalid search specification for " (:name spec) ": " (me/humanize info)) info))) + (doseq [table (keys (find-fields spec)) + :when (not= :this table)] + (assert (contains? (:joins spec) table) (str "Reference to table without a join: " table)))) + +(defmacro define-spec + "Define a spec for a search model." + [search-model spec] + `(let [spec# (-> ~spec + (assoc :name ~search-model) + (update :attrs #(merge ~default-attrs %)))] + (validate-spec! spec#) + (defmethod spec ~search-model [~'_] spec#))) + +;; TODO we should memoize this for production (based on spec values) +(defn model-hooks + "Return an inverted map of data dependencies to search models, used for updating them based on underlying models." + [] + (merge-hooks + (for [[search-model spec-fn] (methods spec)] + (search-model-hooks (spec-fn search-model))))) + +(defn search-models-to-update + "Given an updated or created instance, return a description of which search-models to (re)index." + [instance] + (into #{} + (keep + (fn [{:keys [search-model fields where]}] + ;; If there are no changes, treat it as if everything has changed. + ;; Likewise, if there are no field dependencies, always do it - this is a hack for dashcards to cards. + (when (or (not fields) (some fields (keys (or (t2/changes instance) instance)))) + [search-model (insert-values where :updated instance)]))) + (get (model-hooks) (t2/model instance)))) diff --git a/src/metabase/util.cljc b/src/metabase/util.cljc index 3d60ad1ca6c99c607d9a5bdab324263fa4b95797..4c4936711a800fc8827788a7f74560c1fb17e30e 100644 --- a/src/metabase/util.cljc +++ b/src/metabase/util.cljc @@ -2,7 +2,8 @@ "Common utility functions useful throughout the codebase." (:refer-clojure :exclude [group-by]) (:require - #?@(:clj ([clojure.math.numeric-tower :as math] + #?@(:clj ([clojure.core.protocols :as core.protocols] + [clojure.math.numeric-tower :as math] [me.flowthing.pp :as pp] [metabase.config :as config] #_{:clj-kondo/ignore [:discouraged-namespace]} @@ -27,6 +28,7 @@ [weavejester.dependency :as dep]) #?(:clj (:import (clojure.lang Reflector) + (clojure.core.protocols CollReduce) (java.text Normalizer Normalizer$Form) (java.util Locale) (org.apache.commons.validator.routines RegexValidator UrlValidator))) @@ -1118,3 +1120,28 @@ "Return first item from Reducible" [reducible] (reduce (fn [_ fst] (reduced fst)) nil reducible)) + +(defn rconcat + "Concatenate two Reducibles" + [r1 r2] + #?(:clj + (reify CollReduce + (coll-reduce [_ f] + #_{:clj-kondo/ignore [:reduce-without-init]} + (let [acc1 (reduce f r1) + acc2 (reduce f acc1 r2)] + acc2)) + (coll-reduce [_ f init] + (let [acc1 (reduce f init r1) + acc2 (reduce f acc1 r2)] + acc2))) + :cljs + (reify IReduce + (-reduce [_ f] + (let [acc1 (reduce f r1) + acc2 (reduce f acc1 r2)] + acc2)) + (-reduce [_ f init] + (let [acc1 (reduce f init r1) + acc2 (reduce f acc1 r2)] + acc2))))) diff --git a/test/metabase/api/search_test.clj b/test/metabase/api/search_test.clj index b6807088cf01d027240675a9a981abfd3b99c73d..7e0f89cc5e55091e90f41afbdd582e705ea3b83b 100644 --- a/test/metabase/api/search_test.clj +++ b/test/metabase/api/search_test.clj @@ -23,7 +23,7 @@ [metabase.search :as search] [metabase.search.config :as search.config] [metabase.search.fulltext :as search.fulltext] - [metabase.search.scoring :as scoring] + [metabase.search.in-place.scoring :as scoring] [metabase.test :as mt] [metabase.util :as u] [toucan2.core :as t2] @@ -842,7 +842,7 @@ (mt/with-temp [Table _ {:name "RoundTable"}] (do-test-users [user [:crowberto :rasta]] (is (= [(default-table-search-row "RoundTable")] - (search-request-data user :q "RoundTable"))))))) + (search-request-data user :q "RoundTable" :models "table"))))))) (deftest table-test-2 (testing "You should not see hidden tables" diff --git a/test/metabase/search/filter_test.clj b/test/metabase/search/filter_test.clj index 4a6adbb7b7a1f08f85f2ed47f424a85c5f5c3b05..410ef6e971c287ceb16cf59e88aeceb8f90e33fc 100644 --- a/test/metabase/search/filter_test.clj +++ b/test/metabase/search/filter_test.clj @@ -1,441 +1,79 @@ -(ns ^:mb/once metabase.search.filter-test +(ns metabase.search.filter-test (:require + [clojure.math.combinatorics :as math.combo] [clojure.test :refer :all] - [metabase.audit :as audit] + [metabase.models] [metabase.search.config :as search.config] [metabase.search.filter :as search.filter] - [metabase.test :as mt])) - -(def default-search-ctx - {:search-string nil - :archived? false - :models search.config/all-models - :model-ancestors? false - :current-user-id 1 - :is-superuser? true - :current-user-perms #{"/"} - :calculate-available-models? false}) - -(deftest ^:parallel ->applicable-models-test - (testing "without optional filters" - (testing "return :models as is" - (is (= search.config/all-models - (search.filter/search-context->applicable-models - default-search-ctx))) - (is (= #{} - (search.filter/search-context->applicable-models - (assoc default-search-ctx :models #{})))) - (is (= search.config/all-models - (search.filter/search-context->applicable-models - (merge default-search-ctx - {:archived? true}))))))) - -(deftest ^:parallel ->applicable-models-test-2 - (testing "optional filters will return intersection of support models and provided models\n" - (testing "created by" - (is (= #{"dashboard" "dataset" "action" "card" "metric"} - (search.filter/search-context->applicable-models - (merge default-search-ctx - {:created-by #{1}})))) - - (is (= #{"dashboard" "dataset"} - (search.filter/search-context->applicable-models - (merge default-search-ctx - {:models #{"dashboard" "dataset" "table"} - :created-by #{1}}))))) - - (testing "created at" - (is (= #{"dashboard" "table" "dataset" "collection" "database" "action" "card" "metric"} - (search.filter/search-context->applicable-models - (merge default-search-ctx - {:created-at "past3days"})))) - - (is (= #{"dashboard" "table" "dataset"} - (search.filter/search-context->applicable-models - (merge default-search-ctx - {:models #{"dashboard" "dataset" "table"} - :created-at "past3days"}))))) - - (testing "verified" - (is (= #{"dataset" "card" "metric"} - (search.filter/search-context->applicable-models - (merge default-search-ctx - {:verified true})))) - - (is (= #{"dataset"} - (search.filter/search-context->applicable-models - (merge default-search-ctx - {:models #{"dashboard" "dataset" "table"} - :verified true}))))) - - (testing "last edited by" - (is (= #{"dashboard" "dataset" "card" "metric"} - (search.filter/search-context->applicable-models - (merge default-search-ctx - {:last-edited-by #{1}})))) - - (is (= #{"dashboard" "dataset"} - (search.filter/search-context->applicable-models - (merge default-search-ctx - {:models #{"dashboard" "dataset" "table"} - :last-edited-by #{1}}))))) - - (testing "last edited at" - (is (= #{"dashboard" "dataset" "action" "metric" "card"} - (search.filter/search-context->applicable-models - (merge default-search-ctx - {:last-edited-at "past3days"})))) - - (is (= #{"dashboard" "dataset"} - (search.filter/search-context->applicable-models - (merge default-search-ctx - {:models #{"dashboard" "dataset" "table"} - :last-edited-at "past3days"}))))) - - (testing "search native query" - (is (= #{"dataset" "action" "card" "metric"} - (search.filter/search-context->applicable-models - (merge default-search-ctx - {:search-native-query true}))))))) - -(deftest joined-with-table?-test - (are [expected args] - (= expected (apply #'search.filter/joined-with-table? args)) - - false - [{} :join :a] - - true - [{:join [:a [:= :a.b :c.d]]} :join :a] - - false - [{:join [:a [:= :a.b :c.d]]} :join :d] - - ;; work with multiple join types - false - [{:join [:a [:= :a.b :c.d]]} :left-join :d] - - ;; do the same with other join types too - true - [{:left-join [:a [:= :a.b :c.d]]} :left-join :a] - - false - [{:left-join [:a [:= :a.b :c.d]]} :left-join :d])) - -(def ^:private base-search-query - {:select [:*] - :from [:table]}) - -(deftest ^:parallel build-archived-filter-test - (testing "archived filters" - (is (= [:= :card.archived false] - (:where (search.filter/build-filters - base-search-query "card" default-search-ctx)))) - - (is (= [:and - [:= :table.active true] - [:= :table.visibility_type nil] - [:not [:= :table.db_id audit/audit-db-id]]] - (:where (search.filter/build-filters - base-search-query "table" default-search-ctx)))))) - -(deftest ^:parallel build-table-filter-always-ignores-audit-tables - (is (contains? - (set (:where (search.filter/build-filters - base-search-query "table" default-search-ctx))) - [:not [:= :table.db_id audit/audit-db-id]]))) - -(deftest ^:parallel build-filter-with-search-string-test - (testing "with search string" - (is (= [:and - [:or - [:like [:lower :card.name] "%a%"] - [:like [:lower :card.name] "%string%"] - [:like [:lower :card.description] "%a%"] - [:like [:lower :card.description] "%string%"]] - [:= :card.archived false]] - (:where (search.filter/build-filters - base-search-query "card" - (merge default-search-ctx {:search-string "a string"}))))))) - -(deftest date-range-filter-clause-test - (mt/with-clock #t "2023-05-04T10:02:05Z[UTC]" - (are [created-at expected-where] - (= expected-where (#'search.filter/date-range-filter-clause :card.created_at created-at)) - ;; absolute datetime - "Q1-2023" [:and [:>= [:cast :card.created_at :date] #t "2023-01-01"] - [:< [:cast :card.created_at :date] #t "2023-04-01"]] - "2016-04-18~2016-04-23" [:and [:>= [:cast :card.created_at :date] #t "2016-04-18"] - [:< [:cast :card.created_at :date] #t "2016-04-24"]] - "2016-04-18" [:and [:>= [:cast :card.created_at :date] #t "2016-04-18"] - [:< [:cast :card.created_at :date] #t "2016-04-19"]] - "2023-05-04~" [:> [:cast :card.created_at :date] #t "2023-05-04"] - "~2023-05-04" [:< [:cast :card.created_at :date] #t "2023-05-05"] - "2016-04-18T10:30:00~2016-04-23T11:30:00" [:and [:>= :card.created_at #t "2016-04-18T10:30"] - [:< :card.created_at #t "2016-04-23T11:31:00"]] - "2016-04-23T10:00:00" [:and [:>= :card.created_at #t "2016-04-23T10:00"] - [:< :card.created_at #t "2016-04-23T10:01"]] - "2016-04-18T10:30:00~" [:> :card.created_at #t "2016-04-18T10:30"] - "~2016-04-18T10:30:00" [:< :card.created_at #t "2016-04-18T10:31"] - ;; relative datetime - "past3days" [:and [:>= [:cast :card.created_at :date] #t "2023-05-01"] - [:< [:cast :card.created_at :date] #t "2023-05-04"]] - "past3days~" [:and [:>= [:cast :card.created_at :date] #t "2023-05-01"] - [:< [:cast :card.created_at :date] #t "2023-05-05"]] - "past3hours~" [:and [:>= :card.created_at #t "2023-05-04T07:00"] - [:< :card.created_at #t "2023-05-04T11:00"]] - "next3days" [:and [:>= [:cast :card.created_at :date] #t "2023-05-05"] - [:< [:cast :card.created_at :date] #t "2023-05-08"]] - "thisminute" [:and [:>= :card.created_at #t "2023-05-04T10:02"] - [:< :card.created_at #t "2023-05-04T10:03"]] - "lasthour" [:and [:>= :card.created_at #t "2023-05-04T09:00"] - [:< :card.created_at #t "2023-05-04T10:00"]] - "past1months-from-36months" [:and [:>= [:cast :card.created_at :date] #t "2020-04-01"] - [:< [:cast :card.created_at :date] #t "2020-05-01"]] - "today" [:and [:>= [:cast :card.created_at :date] #t "2023-05-04"] - [:< [:cast :card.created_at :date] #t "2023-05-05"]] - "yesterday" [:and [:>= [:cast :card.created_at :date] #t "2023-05-03"] - [:< [:cast :card.created_at :date] #t "2023-05-04"]]))) - -;; both created at and last-edited-at use [[search.filter/date-range-filter-clause]] -;; to generate the filter clause so for the full test cases, check [[date-range-filter-clause-test]] -;; these 2 tests are for checking the shape of the query -(deftest ^:parallel created-at-filter-test - (testing "created-at filter" - (is (= {:select [:*] - :from [:table] - :where [:and - [:= :card.archived false] - [:>= [:cast :card.created_at :date] #t "2016-04-18"] - [:< [:cast :card.created_at :date] #t "2016-04-24"]]} - (search.filter/build-filters - base-search-query "card" - (merge default-search-ctx {:created-at "2016-04-18~2016-04-23"})))))) - -(deftest ^:parallel last-edited-at-filter-test - (testing "last edited at filter" - (is (= {:select [:*] - :from [:table] - :join [:revision [:= :revision.model_id :card.id]] - :where [:and - [:= :card.archived false] - [:= :revision.most_recent true] - [:= :revision.model "Card"] - [:>= [:cast :revision.timestamp :date] #t "2016-04-18"] - [:< [:cast :revision.timestamp :date] #t "2016-04-24"]]} - (search.filter/build-filters - base-search-query "dataset" - (merge default-search-ctx {:last-edited-at "2016-04-18~2016-04-23"})))) - - (testing "do not join twice if has both last-edited-at and last-edited-by" - (is (= {:select [:*] - :from [:table] - :join [:revision [:= :revision.model_id :card.id]] - :where [:and - [:= :card.archived false] - [:= :revision.most_recent true] - [:= :revision.model "Card"] - [:>= [:cast :revision.timestamp :date] #t "2016-04-18"] - [:< [:cast :revision.timestamp :date] #t "2016-04-24"] - [:= :revision.user_id 1]]} - (search.filter/build-filters - base-search-query "dataset" - (merge default-search-ctx {:last-edited-at "2016-04-18~2016-04-23" - :last-edited-by #{1}}))))) - - (testing "for actiion" - (is (= {:select [:*] - :from [:table] - :where [:and [:= :action.archived false] - [:>= [:cast :action.updated_at :date] #t "2016-04-18"] - [:< [:cast :action.updated_at :date] #t "2016-04-24"]]} - (search.filter/build-filters - base-search-query "action" - (merge default-search-ctx {:last-edited-at "2016-04-18~2016-04-23"}))))))) - -(deftest ^:parallel build-created-by-filter-test - (testing "created-by filter" - (is (= [:and [:= :card.archived false] [:= :card.creator_id 1]] - (:where (search.filter/build-filters - base-search-query "card" - (merge default-search-ctx - {:created-by #{1}}))))) - (is (= [:and [:= :card.archived false] [:in :card.creator_id #{1 2}]] - (:where (search.filter/build-filters - base-search-query "card" - (merge default-search-ctx - {:created-by #{1 2}}))))))) - -(deftest ^:parallel build-last-edited-by-filter-test - (testing "last edited by filter" - (is (= {:select [:*] - :from [:table] - :where [:and - [:= :card.archived false] - [:= :revision.most_recent true] - [:= :revision.model "Card"] - [:= :revision.user_id 1]] - :join [:revision [:= :revision.model_id :card.id]]} - (search.filter/build-filters - base-search-query "dataset" - (merge default-search-ctx - {:last-edited-by #{1}})))))) - -(deftest ^:parallel build-last-edited-by-filter-test-2 - (testing "last edited by filter" - (is (= {:select [:*] - :from [:table] - :where [:and - [:= :card.archived false] - [:= :revision.most_recent true] - [:= :revision.model "Card"] - [:in :revision.user_id #{1 2}]] - :join [:revision [:= :revision.model_id :card.id]]} - (search.filter/build-filters - base-search-query "dataset" - (merge default-search-ctx - {:last-edited-by #{1 2}})))))) - -(deftest ^:parallel build-verified-filter-test - (testing "verified filter" - (mt/with-premium-features #{:content-verification} - (testing "for cards" - (is (= (merge - base-search-query - {:where [:and - [:= :card.archived false] - [:= :moderation_review.status "verified"] - [:= :moderation_review.moderated_item_type "card"] - [:= :moderation_review.most_recent true]] - :join [:moderation_review [:= :moderation_review.moderated_item_id :card.id]]}) - (search.filter/build-filters - base-search-query "card" - (merge default-search-ctx {:verified true})))))))) - -(deftest ^:parallel build-verified-filter-test-1b - (testing "verified filter" - (mt/with-premium-features #{:content-verification} - (testing "for models" - (is (= (merge - base-search-query - {:where [:and - [:= :card.archived false] - [:= :moderation_review.status "verified"] - [:= :moderation_review.moderated_item_type "card"] - [:= :moderation_review.most_recent true]] - :join [:moderation_review [:= :moderation_review.moderated_item_id :card.id]]}) - (search.filter/build-filters - base-search-query "dataset" - (merge default-search-ctx {:verified true})))))))) - -(deftest ^:parallel build-verified-filter-test-2 - (testing "verified filter" - (mt/with-premium-features #{} - (testing "for cards without ee features" - (is (= (merge - base-search-query - {:where [:and - [:= :card.archived false] - [:inline [:= 0 1]]]}) - (search.filter/build-filters - base-search-query "card" - (merge default-search-ctx {:verified true})))))))) - -(deftest ^:parallel build-verified-filter-test-2b - (testing "verified filter" - (mt/with-premium-features #{} - (testing "for models without ee features" - (is (= (merge - base-search-query - {:where [:and - [:= :card.archived false] - [:inline [:= 0 1]]]}) - (search.filter/build-filters - base-search-query "dataset" - (merge default-search-ctx {:verified true})))))))) - -(deftest ^:parallel build-filter-throw-error-for-unsuported-filters-test - (testing "throw error for filtering with unsupport models" - (is (thrown-with-msg? - clojure.lang.ExceptionInfo - #":created-by filter for database is not supported" - (search.filter/build-filters - base-search-query - "database" - (merge default-search-ctx - {:created-by #{1}})))))) - -(deftest build-filters-indexed-entity-test - (testing "users that are not sandboxed or impersonated can search for indexed entity" - (with-redefs [search.filter/sandboxed-or-impersonated-user? (constantly false)] - (is (= [:and - [:or [:like [:lower :model-index-value.name] "%foo%"]] - [:inline [:= 1 1]]] - (:where (search.filter/build-filters - base-search-query - "indexed-entity" - (merge default-search-ctx {:search-string "foo"})))))))) - -(deftest build-filters-indexed-entity-test-2 - (testing "otherwise search result is empty" - (with-redefs [search.filter/sandboxed-or-impersonated-user? (constantly true)] - (is (= [:and - [:or [:= 0 1]] - [:inline [:= 1 1]]] - (:where (search.filter/build-filters - base-search-query - "indexed-entity" - (merge default-search-ctx {:search-string "foo"})))))))) - -(deftest ^:parallel build-filters-search-native-query - (doseq [model ["dataset" "card"]] - (testing model - (testing "do not search for native query by default" - (is (= [:and - [:or [:like [:lower :card.name] "%foo%"] [:like [:lower :card.description] "%foo%"]] - [:= :card.archived false]] - (:where (search.filter/build-filters - base-search-query - model - (merge default-search-ctx {:search-string "foo"}))))))))) - -(deftest ^:parallel build-filters-search-native-query-2 - (doseq [model ["dataset" "card"]] - (testing model - (testing "search in both name, description and dataset_query if is enabled" - (is (= [:and [:or - [:like [:lower :card.name] "%foo%"] - [:like [:lower :card.description] "%foo%"] - [:and - [:= :card.query_type "native"] - [:like [:lower :card.dataset_query] "%foo%"]]] - [:= :card.archived false]] - (:where (search.filter/build-filters - base-search-query - model - (merge default-search-ctx {:search-string "foo" :search-native-query true}))))))))) - -(deftest ^:parallel build-filters-search-native-query-3 - (testing "action" - (testing "do not search for native query by default" - (is (= [:and - [:or [:like [:lower :action.name] "%foo%"] [:like [:lower :action.description] "%foo%"]] - [:= :action.archived false]] - (:where (search.filter/build-filters - base-search-query - "action" - (merge default-search-ctx {:search-string "foo"})))))))) - -(deftest ^:parallel build-filters-search-native-query-4 - (testing "action" - (testing "search in both name, description and dataset_query if is enabled" - (is (= [:and - [:or - [:like [:lower :action.name] "%foo%"] - [:like [:lower :action.description] "%foo%"] - [:like [:lower :query_action.dataset_query] "%foo%"]] - [:= :action.archived false]] - (:where (search.filter/build-filters - base-search-query - "action" - (merge default-search-ctx {:search-string "foo" :search-native-query true})))))))) + [metabase.search.in-place.filter :as search.in-place.filter])) + +(comment + ;; We load this to ensure all the search-models are registered + metabase.models/keep-me) + +(defn- filter-keys [] + (keys @#'search.filter/context-key->attr)) + +(defn- active-filter-combinations [] + ;; We ignore :archived? as we've moved some of these filters to the `:where` clause as a simplifying optimization. + ;; We ignore :card-db-id as legacy search implements this filter sneakily inside the models themselves. + (math.combo/subsets (remove #{:archived? :table-db-id} (filter-keys)))) + +(defn with-all-models [search-ctx] + (assoc search-ctx :models search.config/all-models)) + +(deftest search-context->applicable-models-test + (testing "All models are relevant if we're not looking in the trash" + (is (= search.config/all-models + (search.filter/search-context->applicable-models (with-all-models {:archived? false}))))) + + (testing "We only search for certain models in the trash" + (is (= #{"dashboard" "dataset" "segment" "collection" "action" "metric" "card"} + (search.filter/search-context->applicable-models (with-all-models {:archived? true}))))) + + (doseq [active-filters (active-filter-combinations)] + (testing (str "Consistent models included when filtering on " (vec active-filters)) + (let [search-ctx (with-all-models (zipmap active-filters (repeat true)))] + (is (= (search.in-place.filter/search-context->applicable-models search-ctx) + (search.filter/search-context->applicable-models search-ctx))))))) + +(def kitchen-sink-filter-context + {:archived? true + :created-at "2024-10-01" + :created-by [123] + :table-db-id 231 + :last-edited-by [321] + :last-edited-at "2024-10-02" + :search-native-query true + :verified true + :ids [1 2 3 4] + :models (disj search.config/all-models "dataset")}) + +(deftest with-filters-test + (testing "The kitchen sink context is complete" + (is (empty? (remove kitchen-sink-filter-context (filter-keys))))) + (testing "We leave the query alone if there are no filters" + (is (= {:select [:some :stuff] + :from :somewhere} + (search.filter/with-filters {} {:select [:some :stuff], :from :somewhere})))) + (testing "We can insert appropriate constraints for all the filters" + (is (= {:select [:some :stuff] + :from :somewhere + ;; This :where clause is a set to avoid flakes, since the clause order will be non-deterministic. + :where #{:and + [:in :model #{"dashboard" "table" "segment" "collection" "database" "action" "indexed-entity" "metric" "card"}] + [:in :model_id [1 2 3 4]] + [:in :model ["card" "dataset" "metric" "dashboard" "action"]] + [:= :search_index.archived true] + [:>= [:cast :search_index.model_created_at :date] #t"2024-10-01"] + [:< [:cast :search_index.model_created_at :date] #t"2024-10-02"] + ;; depends on whether :content-verification is enabled + #_[:= :search_index.verified true] + [:inline [:= 1 1]] + [:in :search_index.creator_id [123]] + [:= :search_index.database_id 231] + [:>= [:cast :search_index.last_edited_at :date] #t"2024-10-02"] + [:< [:cast :search_index.last_edited_at :date] #t"2024-10-03"] + [:in :search_index.last_editor_id [321]]}} + (-> (search.filter/with-filters kitchen-sink-filter-context {:select [:some :stuff], :from :somewhere}) + (update :where set)))))) diff --git a/test/metabase/search/impl_test.clj b/test/metabase/search/impl_test.clj index 73554a5180f813b2946633b0c86d973faaf26c08..3ce1354e513180edcf2f91b439af32d41da520e2 100644 --- a/test/metabase/search/impl_test.clj +++ b/test/metabase/search/impl_test.clj @@ -28,8 +28,7 @@ (is (= :search.engine/fulltext (#'search.impl/parse-engine "fulltext"))))) (when (search/supports-index?) (testing "Subclasses" - (is (= :search.engine/hybrid (#'search.impl/parse-engine "hybrid"))) - (is (= :search.engine/minimal (#'search.impl/parse-engine "minimal")))))) + (is (= :search.engine/hybrid (#'search.impl/parse-engine "hybrid")))))) (deftest ^:parallel order-clause-test (testing "it includes all columns and normalizes the query" diff --git a/test/metabase/search/in_place/filter_test.clj b/test/metabase/search/in_place/filter_test.clj new file mode 100644 index 0000000000000000000000000000000000000000..79e0764327954fa1451a1af961e8c97c3faa2397 --- /dev/null +++ b/test/metabase/search/in_place/filter_test.clj @@ -0,0 +1,445 @@ +(ns ^:mb/once metabase.search.in-place.filter-test + ;; Left renaming search.filter out of this PR to save a ton of noise. + ;; This comment is way up here, because cljfmt doesn't like it in the middle of the :require. + #_{:clj-kondo/ignore [:consistent-alias]} + (:require + [clojure.test :refer :all] + [metabase.audit :as audit] + [metabase.search.config :as search.config] + [metabase.search.in-place.filter :as search.filter] + [metabase.search.permissions :as search.permissions] + [metabase.test :as mt])) + +(def default-search-ctx + {:search-string nil + :archived? false + :models search.config/all-models + :model-ancestors? false + :current-user-id 1 + :is-superuser? true + :current-user-perms #{"/"} + :calculate-available-models? false}) + +(deftest ^:parallel ->applicable-models-test + (testing "without optional filters" + (testing "return :models as is" + (is (= search.config/all-models + (search.filter/search-context->applicable-models + default-search-ctx))) + (is (= #{} + (search.filter/search-context->applicable-models + (assoc default-search-ctx :models #{})))) + (is (= search.config/all-models + (search.filter/search-context->applicable-models + (merge default-search-ctx + {:archived? true}))))))) + +(deftest ^:parallel ->applicable-models-test-2 + (testing "optional filters will return intersection of support models and provided models\n" + (testing "created by" + (is (= #{"dashboard" "dataset" "action" "card" "metric"} + (search.filter/search-context->applicable-models + (merge default-search-ctx + {:created-by #{1}})))) + + (is (= #{"dashboard" "dataset"} + (search.filter/search-context->applicable-models + (merge default-search-ctx + {:models #{"dashboard" "dataset" "table"} + :created-by #{1}}))))) + + (testing "created at" + (is (= #{"dashboard" "table" "dataset" "collection" "database" "action" "card" "metric"} + (search.filter/search-context->applicable-models + (merge default-search-ctx + {:created-at "past3days"})))) + + (is (= #{"dashboard" "table" "dataset"} + (search.filter/search-context->applicable-models + (merge default-search-ctx + {:models #{"dashboard" "dataset" "table"} + :created-at "past3days"}))))) + + (testing "verified" + (is (= #{"dataset" "card" "metric"} + (search.filter/search-context->applicable-models + (merge default-search-ctx + {:verified true})))) + + (is (= #{"dataset"} + (search.filter/search-context->applicable-models + (merge default-search-ctx + {:models #{"dashboard" "dataset" "table"} + :verified true}))))) + + (testing "last edited by" + (is (= #{"dashboard" "dataset" "card" "metric"} + (search.filter/search-context->applicable-models + (merge default-search-ctx + {:last-edited-by #{1}})))) + + (is (= #{"dashboard" "dataset"} + (search.filter/search-context->applicable-models + (merge default-search-ctx + {:models #{"dashboard" "dataset" "table"} + :last-edited-by #{1}}))))) + + (testing "last edited at" + (is (= #{"dashboard" "dataset" "action" "metric" "card"} + (search.filter/search-context->applicable-models + (merge default-search-ctx + {:last-edited-at "past3days"})))) + + (is (= #{"dashboard" "dataset"} + (search.filter/search-context->applicable-models + (merge default-search-ctx + {:models #{"dashboard" "dataset" "table"} + :last-edited-at "past3days"}))))) + + (testing "search native query" + (is (= #{"dataset" "action" "card" "metric"} + (search.filter/search-context->applicable-models + (merge default-search-ctx + {:search-native-query true}))))))) + +(deftest joined-with-table?-test + (are [expected args] + (= expected (apply #'search.filter/joined-with-table? args)) + + false + [{} :join :a] + + true + [{:join [:a [:= :a.b :c.d]]} :join :a] + + false + [{:join [:a [:= :a.b :c.d]]} :join :d] + + ;; work with multiple join types + false + [{:join [:a [:= :a.b :c.d]]} :left-join :d] + + ;; do the same with other join types too + true + [{:left-join [:a [:= :a.b :c.d]]} :left-join :a] + + false + [{:left-join [:a [:= :a.b :c.d]]} :left-join :d])) + +(def ^:private base-search-query + {:select [:*] + :from [:table]}) + +(deftest ^:parallel build-archived-filter-test + (testing "archived filters" + (is (= [:= :card.archived false] + (:where (search.filter/build-filters + base-search-query "card" default-search-ctx)))) + + (is (= [:and + [:= :table.active true] + [:= :table.visibility_type nil] + [:not [:= :table.db_id audit/audit-db-id]]] + (:where (search.filter/build-filters + base-search-query "table" default-search-ctx)))))) + +(deftest ^:parallel build-table-filter-always-ignores-audit-tables + (is (contains? + (set (:where (search.filter/build-filters + base-search-query "table" default-search-ctx))) + [:not [:= :table.db_id audit/audit-db-id]]))) + +(deftest ^:parallel build-filter-with-search-string-test + (testing "with search string" + (is (= [:and + [:or + [:like [:lower :card.name] "%a%"] + [:like [:lower :card.name] "%string%"] + [:like [:lower :card.description] "%a%"] + [:like [:lower :card.description] "%string%"]] + [:= :card.archived false]] + (:where (search.filter/build-filters + base-search-query "card" + (merge default-search-ctx {:search-string "a string"}))))))) + +(deftest date-range-filter-clause-test + (mt/with-clock #t "2023-05-04T10:02:05Z[UTC]" + (are [created-at expected-where] + (= expected-where (#'search.filter/date-range-filter-clause :card.created_at created-at)) + ;; absolute datetime + "Q1-2023" [:and [:>= [:cast :card.created_at :date] #t "2023-01-01"] + [:< [:cast :card.created_at :date] #t "2023-04-01"]] + "2016-04-18~2016-04-23" [:and [:>= [:cast :card.created_at :date] #t "2016-04-18"] + [:< [:cast :card.created_at :date] #t "2016-04-24"]] + "2016-04-18" [:and [:>= [:cast :card.created_at :date] #t "2016-04-18"] + [:< [:cast :card.created_at :date] #t "2016-04-19"]] + "2023-05-04~" [:> [:cast :card.created_at :date] #t "2023-05-04"] + "~2023-05-04" [:< [:cast :card.created_at :date] #t "2023-05-05"] + "2016-04-18T10:30:00~2016-04-23T11:30:00" [:and [:>= :card.created_at #t "2016-04-18T10:30"] + [:< :card.created_at #t "2016-04-23T11:31:00"]] + "2016-04-23T10:00:00" [:and [:>= :card.created_at #t "2016-04-23T10:00"] + [:< :card.created_at #t "2016-04-23T10:01"]] + "2016-04-18T10:30:00~" [:> :card.created_at #t "2016-04-18T10:30"] + "~2016-04-18T10:30:00" [:< :card.created_at #t "2016-04-18T10:31"] + ;; relative datetime + "past3days" [:and [:>= [:cast :card.created_at :date] #t "2023-05-01"] + [:< [:cast :card.created_at :date] #t "2023-05-04"]] + "past3days~" [:and [:>= [:cast :card.created_at :date] #t "2023-05-01"] + [:< [:cast :card.created_at :date] #t "2023-05-05"]] + "past3hours~" [:and [:>= :card.created_at #t "2023-05-04T07:00"] + [:< :card.created_at #t "2023-05-04T11:00"]] + "next3days" [:and [:>= [:cast :card.created_at :date] #t "2023-05-05"] + [:< [:cast :card.created_at :date] #t "2023-05-08"]] + "thisminute" [:and [:>= :card.created_at #t "2023-05-04T10:02"] + [:< :card.created_at #t "2023-05-04T10:03"]] + "lasthour" [:and [:>= :card.created_at #t "2023-05-04T09:00"] + [:< :card.created_at #t "2023-05-04T10:00"]] + "past1months-from-36months" [:and [:>= [:cast :card.created_at :date] #t "2020-04-01"] + [:< [:cast :card.created_at :date] #t "2020-05-01"]] + "today" [:and [:>= [:cast :card.created_at :date] #t "2023-05-04"] + [:< [:cast :card.created_at :date] #t "2023-05-05"]] + "yesterday" [:and [:>= [:cast :card.created_at :date] #t "2023-05-03"] + [:< [:cast :card.created_at :date] #t "2023-05-04"]]))) + +;; both created at and last-edited-at use [[search.filter/date-range-filter-clause]] +;; to generate the filter clause so for the full test cases, check [[date-range-filter-clause-test]] +;; these 2 tests are for checking the shape of the query +(deftest ^:parallel created-at-filter-test + (testing "created-at filter" + (is (= {:select [:*] + :from [:table] + :where [:and + [:= :card.archived false] + [:>= [:cast :card.created_at :date] #t "2016-04-18"] + [:< [:cast :card.created_at :date] #t "2016-04-24"]]} + (search.filter/build-filters + base-search-query "card" + (merge default-search-ctx {:created-at "2016-04-18~2016-04-23"})))))) + +(deftest ^:parallel last-edited-at-filter-test + (testing "last edited at filter" + (is (= {:select [:*] + :from [:table] + :join [:revision [:= :revision.model_id :card.id]] + :where [:and + [:= :card.archived false] + [:= :revision.most_recent true] + [:= :revision.model "Card"] + [:>= [:cast :revision.timestamp :date] #t "2016-04-18"] + [:< [:cast :revision.timestamp :date] #t "2016-04-24"]]} + (search.filter/build-filters + base-search-query "dataset" + (merge default-search-ctx {:last-edited-at "2016-04-18~2016-04-23"})))) + + (testing "do not join twice if has both last-edited-at and last-edited-by" + (is (= {:select [:*] + :from [:table] + :join [:revision [:= :revision.model_id :card.id]] + :where [:and + [:= :card.archived false] + [:= :revision.most_recent true] + [:= :revision.model "Card"] + [:>= [:cast :revision.timestamp :date] #t "2016-04-18"] + [:< [:cast :revision.timestamp :date] #t "2016-04-24"] + [:= :revision.user_id 1]]} + (search.filter/build-filters + base-search-query "dataset" + (merge default-search-ctx {:last-edited-at "2016-04-18~2016-04-23" + :last-edited-by #{1}}))))) + + (testing "for actiion" + (is (= {:select [:*] + :from [:table] + :where [:and [:= :action.archived false] + [:>= [:cast :action.updated_at :date] #t "2016-04-18"] + [:< [:cast :action.updated_at :date] #t "2016-04-24"]]} + (search.filter/build-filters + base-search-query "action" + (merge default-search-ctx {:last-edited-at "2016-04-18~2016-04-23"}))))))) + +(deftest ^:parallel build-created-by-filter-test + (testing "created-by filter" + (is (= [:and [:= :card.archived false] [:= :card.creator_id 1]] + (:where (search.filter/build-filters + base-search-query "card" + (merge default-search-ctx + {:created-by #{1}}))))) + (is (= [:and [:= :card.archived false] [:in :card.creator_id #{1 2}]] + (:where (search.filter/build-filters + base-search-query "card" + (merge default-search-ctx + {:created-by #{1 2}}))))))) + +(deftest ^:parallel build-last-edited-by-filter-test + (testing "last edited by filter" + (is (= {:select [:*] + :from [:table] + :where [:and + [:= :card.archived false] + [:= :revision.most_recent true] + [:= :revision.model "Card"] + [:= :revision.user_id 1]] + :join [:revision [:= :revision.model_id :card.id]]} + (search.filter/build-filters + base-search-query "dataset" + (merge default-search-ctx + {:last-edited-by #{1}})))))) + +(deftest ^:parallel build-last-edited-by-filter-test-2 + (testing "last edited by filter" + (is (= {:select [:*] + :from [:table] + :where [:and + [:= :card.archived false] + [:= :revision.most_recent true] + [:= :revision.model "Card"] + [:in :revision.user_id #{1 2}]] + :join [:revision [:= :revision.model_id :card.id]]} + (search.filter/build-filters + base-search-query "dataset" + (merge default-search-ctx + {:last-edited-by #{1 2}})))))) + +(deftest ^:parallel build-verified-filter-test + (testing "verified filter" + (mt/with-premium-features #{:content-verification} + (testing "for cards" + (is (= (merge + base-search-query + {:where [:and + [:= :card.archived false] + [:= :moderation_review.status "verified"] + [:= :moderation_review.moderated_item_type "card"] + [:= :moderation_review.most_recent true]] + :join [:moderation_review [:= :moderation_review.moderated_item_id :card.id]]}) + (search.filter/build-filters + base-search-query "card" + (merge default-search-ctx {:verified true})))))))) + +(deftest ^:parallel build-verified-filter-test-1b + (testing "verified filter" + (mt/with-premium-features #{:content-verification} + (testing "for models" + (is (= (merge + base-search-query + {:where [:and + [:= :card.archived false] + [:= :moderation_review.status "verified"] + [:= :moderation_review.moderated_item_type "card"] + [:= :moderation_review.most_recent true]] + :join [:moderation_review [:= :moderation_review.moderated_item_id :card.id]]}) + (search.filter/build-filters + base-search-query "dataset" + (merge default-search-ctx {:verified true})))))))) + +(deftest ^:parallel build-verified-filter-test-2 + (testing "verified filter" + (mt/with-premium-features #{} + (testing "for cards without ee features" + (is (= (merge + base-search-query + {:where [:and + [:= :card.archived false] + [:inline [:= 0 1]]]}) + (search.filter/build-filters + base-search-query "card" + (merge default-search-ctx {:verified true})))))))) + +(deftest ^:parallel build-verified-filter-test-2b + (testing "verified filter" + (mt/with-premium-features #{} + (testing "for models without ee features" + (is (= (merge + base-search-query + {:where [:and + [:= :card.archived false] + [:inline [:= 0 1]]]}) + (search.filter/build-filters + base-search-query "dataset" + (merge default-search-ctx {:verified true})))))))) + +(deftest ^:parallel build-filter-throw-error-for-unsuported-filters-test + (testing "throw error for filtering with unsupport models" + (is (thrown-with-msg? + clojure.lang.ExceptionInfo + #":created-by filter for database is not supported" + (search.filter/build-filters + base-search-query + "database" + (merge default-search-ctx + {:created-by #{1}})))))) + +(deftest build-filters-indexed-entity-test + (testing "users that are not sandboxed or impersonated can search for indexed entity" + (with-redefs [search.permissions/sandboxed-or-impersonated-user? (constantly false)] + (is (= [:and + [:or [:like [:lower :model-index-value.name] "%foo%"]] + [:inline [:= 1 1]]] + (:where (search.filter/build-filters + base-search-query + "indexed-entity" + (merge default-search-ctx {:search-string "foo"})))))))) + +(deftest build-filters-indexed-entity-test-2 + (testing "otherwise search result is empty" + (with-redefs [search.permissions/sandboxed-or-impersonated-user? (constantly true)] + (is (= [:and + [:or [:= 0 1]] + [:inline [:= 1 1]]] + (:where (search.filter/build-filters + base-search-query + "indexed-entity" + (merge default-search-ctx {:search-string "foo"})))))))) + +(deftest ^:parallel build-filters-search-native-query + (doseq [model ["dataset" "card"]] + (testing model + (testing "do not search for native query by default" + (is (= [:and + [:or [:like [:lower :card.name] "%foo%"] [:like [:lower :card.description] "%foo%"]] + [:= :card.archived false]] + (:where (search.filter/build-filters + base-search-query + model + (merge default-search-ctx {:search-string "foo"}))))))))) + +(deftest ^:parallel build-filters-search-native-query-2 + (doseq [model ["dataset" "card"]] + (testing model + (testing "search in both name, description and dataset_query if is enabled" + (is (= [:and [:or + [:like [:lower :card.name] "%foo%"] + [:like [:lower :card.description] "%foo%"] + [:and + [:= :card.query_type "native"] + [:like [:lower :card.dataset_query] "%foo%"]]] + [:= :card.archived false]] + (:where (search.filter/build-filters + base-search-query + model + (merge default-search-ctx {:search-string "foo" :search-native-query true}))))))))) + +(deftest ^:parallel build-filters-search-native-query-3 + (testing "action" + (testing "do not search for native query by default" + (is (= [:and + [:or [:like [:lower :action.name] "%foo%"] [:like [:lower :action.description] "%foo%"]] + [:= :action.archived false]] + (:where (search.filter/build-filters + base-search-query + "action" + (merge default-search-ctx {:search-string "foo"})))))))) + +(deftest ^:parallel build-filters-search-native-query-4 + (testing "action" + (testing "search in both name, description and dataset_query if is enabled" + (is (= [:and + [:or + [:like [:lower :action.name] "%foo%"] + [:like [:lower :action.description] "%foo%"] + [:like [:lower :query_action.dataset_query] "%foo%"]] + [:= :action.archived false]] + (:where (search.filter/build-filters + base-search-query + "action" + (merge default-search-ctx {:search-string "foo" :search-native-query true})))))))) diff --git a/test/metabase/search/scoring_test.clj b/test/metabase/search/in_place/scoring_test.clj similarity index 98% rename from test/metabase/search/scoring_test.clj rename to test/metabase/search/in_place/scoring_test.clj index e23129f0e9eceb75d839333127b414c57e75b78d..0b4ecb67fa0ab3c271565cfcb7f0b87fa021de92 100644 --- a/test/metabase/search/scoring_test.clj +++ b/test/metabase/search/in_place/scoring_test.clj @@ -1,11 +1,11 @@ -(ns metabase.search.scoring-test +(ns metabase.search.in-place.scoring-test (:require [clojure.test :refer :all] [java-time.api :as t] [metabase.search.config :as search.config] - [metabase.search.filter-test :as search.filter-test] + [metabase.search.in-place.filter-test :as search.filter-test] + [metabase.search.in-place.scoring :as scoring] [metabase.search.legacy :as search.legacy] - [metabase.search.scoring :as scoring] [metabase.test :as mt] [toucan2.core :as t2])) diff --git a/test/metabase/search/postgres/core_test.clj b/test/metabase/search/postgres/core_test.clj index 2a362ef83796bf8698576df29882adb3dd8d2888..fd6f1011625f42ab802af03ad78f8de96f99f0ed 100644 --- a/test/metabase/search/postgres/core_test.clj +++ b/test/metabase/search/postgres/core_test.clj @@ -1,14 +1,20 @@ (ns metabase.search.postgres.core-test (:require - [clojure.string :as str] [clojure.test :refer [deftest is testing]] + [macaw.util :as u] [metabase.db :as mdb] + [metabase.models] [metabase.search :as search] [metabase.search.postgres.core :as search.postgres] [metabase.search.postgres.index-test :refer [legacy-results]] [metabase.test :as mt] + [toucan2.core :as t2] [toucan2.realize :as t2.realize])) +(comment + ;; We load this to ensure all the search-models are registered + metabase.models/keep-me) + (def ^:private hybrid (comp t2.realize/realize #'search.postgres/hybrid)) @@ -17,8 +23,10 @@ `(when (= :postgres (mdb/db-type)) ;; TODO add more extensive data to search (mt/dataset ~'test-data - (search.postgres/init! true) - ~@body))) + (mt/with-temp [:model/User {user-id# :id} {:email "someone@somewhere.com"}] + (t2/insert! :model/Collection {:name "Some Collection" :personal_owner_id user-id#}) + (search.postgres/init! true) + ~@body)))) (def ^:private example-terms "Search queries which should give consistent, non-trivial results across engines, for the test data." @@ -34,49 +42,36 @@ (deftest permissions-test (with-setup - ;; Lucky Pidgeon, Crowberto Corv, Rasta Toucan + ;; Rasta Toucan has friends, like Lucky Pidgeon ;; ... plus any additional ones that leaked in from dev or other tests - (is (<= 3 (count (hybrid "collection")))) + (is (< 1 (count (hybrid "collection")))) (testing "Rasta can only see his own collections" - (is (= ["Rasta Toucan's Personal Collection"] - (->> {:current-user-id (mt/user->id :rasta) - :is-superuser? false - :current-user-perms #{"/none/"}} - (hybrid "collection") - (map :name) - ;; These can seep in from other tests T_T - (remove #(str/includes? % "trash")))))))) - -(deftest hybrid-multi-test - (with-setup - (testing "consistent results between both hybrid implementations\n" - (doseq [term example-terms] - (testing term - (is (= (hybrid term) - (#'search.postgres/hybrid-multi term)))))))) + (is (->> {:current-user-id (mt/user->id :rasta) + :is-superuser? false + :current-user-perms #{"/none/"}} + (hybrid "collection") + (map :name) + (not-any? #{"Some Collection"})))))) -(deftest minimal-test - (with-setup - (testing "consistent results with minimal implementations\n" - (doseq [term example-terms] - (testing term - ;; there is no ranking, so order is non-deterministic - (is (= (set (hybrid term)) - (set (#'search.postgres/minimal term))))))))) +(defn- normalize* [xs] + (into #{} + (map (comp #(dissoc % :bookmark :pinned :total_score) + u/strip-nils + #(update % :archived boolean))) + xs)) -(deftest minimal-with-perms-test +(deftest fulltext-test (with-setup - (testing "consistent results with minimal implementations\n" + (testing "consistent results with index-based implementations\n" (doseq [term (take 1 example-terms)] (testing term - ;; there is no ranking, so order is non-deterministic - (is (= (set (hybrid term)) - (set (#'search.postgres/minimal-with-perms - term - {:current-user-id (mt/user->id :crowberto) - :is-superuser? true - :archived? false - :current-user-perms #{"/"} - :model-ancestors? false - :models search/all-models - :search-string term}))))))))) + (is (= (normalize* (hybrid term)) + (normalize* (#'search.postgres/fulltext + term + {:current-user-id (mt/user->id :crowberto) + :is-superuser? true + :archived? false + :current-user-perms #{"/"} + :model-ancestors? false + :models search/all-models + :search-string term}))))))))) diff --git a/test/metabase/search/postgres/index_test.clj b/test/metabase/search/postgres/index_test.clj index 0721354eb19b5c00d297ba05559164e4b406cb09..c06232ba1a403d35dce3a3d08d0e2c76924d3576 100644 --- a/test/metabase/search/postgres/index_test.clj +++ b/test/metabase/search/postgres/index_test.clj @@ -1,5 +1,6 @@ (ns metabase.search.postgres.index-test (:require + [cheshire.core :as json] [clojure.test :refer [deftest is testing]] [metabase.db :as mdb] [metabase.search.postgres.core :as search.postgres] @@ -8,10 +9,14 @@ [metabase.test :as mt] [toucan2.core :as t2])) +(set! *warn-on-reflection* true) + (defn legacy-results "Use the source tables directly to search for records." [search-term & {:as opts}] - (t2/query (#'search.postgres/in-place-query (assoc opts :search-engine :search.engine/in-place :search-term search-term)))) + (-> (assoc opts :search-engine :search.engine/in-place :search-term search-term) + (#'search.postgres/in-place-query) + t2/query)) (def legacy-models "Just the identity of the matches" @@ -23,20 +28,70 @@ (defn- index-hits [term] (count (search.index/search term))) +;; These helpers only mutate the temp local AppDb. #_{:clj-kondo/ignore [:metabase/test-helpers-use-non-thread-safe-functions]} (defmacro with-index "Ensure a clean, small index." [& body] `(when (= :postgres (mdb/db-type)) - (mt/dataset ~(symbol "test-data") - (mt/with-temp [:model/Card {} {:name "Customer Satisfaction" :collection_id 1} - :model/Card {} {:name "The Latest Revenue Projections" :collection_id 1} - :model/Card {} {:name "Projected Revenue" :collection_id 1} - :model/Card {} {:name "Employee Satisfaction" :collection_id 1} - :model/Card {} {:name "Projected Satisfaction" :collection_id 1}] - (search.index/reset-index!) - (search.ingestion/populate-index!) - ~@body)))) + (binding [search.ingestion/*force-sync* true] + (mt/dataset ~(symbol "test-data") + (mt/with-temp [:model/Card {} {:name "Customer Satisfaction" :collection_id 1} + :model/Card {} {:name "The Latest Revenue Projections" :collection_id 1} + :model/Card {} {:name "Projected Revenue" :collection_id 1} + :model/Card {} {:name "Employee Satisfaction" :collection_id 1} + :model/Card {} {:name "Projected Satisfaction" :collection_id 1} + :model/Database {db-id# :id} {:name "Indexed Database"} + :model/Table {} {:name "Indexed Table", :db_id db-id#}] + (search.index/reset-index!) + (search.ingestion/populate-index!) + ~@body))))) + +(deftest idempotent-test + (with-index + (let [count-rows (fn [] (t2/count @#'search.index/active-table)) + rows-before (count-rows)] + (search.ingestion/populate-index!) + (is (= rows-before (count-rows)))))) + +(deftest incremental-update-test + (with-index + (testing "The index is updated when models change" + ;; Has a second entry is "Revenue Project(ions)", when using English dictionary + (is (= 1 #_2 (count (search.index/search "Projected Revenue")))) + (is (= 0 (count (search.index/search "Protected Avenue")))) + + (t2/update! :model/Card {:name "Projected Revenue"} {:name "Protected Avenue"}) + ;; TODO wire up an actual hook + (search.ingestion/update-index! (t2/select-one :model/Card :name "Protected Avenue")) + + ;; wait for the background thread + (is (= 0 #_1 (count (search.index/search "Projected Revenue")))) + (is (= 1 (count (search.index/search "Protected Avenue")))) + + ;; TODO wire up the actual hook, and actually delete it + (search.ingestion/delete-model! (t2/select-one :model/Card :name "Protected Avenue")) + + (is (= 0 #_1 (count (search.index/search "Projected Revenue")))) + (is (= 0 (count (search.index/search "Protected Avenue"))))))) + +(deftest related-update-test + (with-index + (testing "The index is updated when model dependencies change" + (let [index-table @#'search.index/active-table + table-id (t2/select-one-pk :model/Table :name "Indexed Table") + legacy-input #(-> (t2/select-one [index-table :legacy_input] :model "table" :model_id table-id) + :legacy_input + (json/parse-string true)) + db-id (t2/select-one-fn :db_id :model/Table table-id) + db-name-fn (comp :database_name legacy-input) + alternate-name (str (random-uuid))] + + (t2/update! :model/Database db-id {:name alternate-name}) + ;; TODO wire up an actual hook + (search.ingestion/update-index! (t2/select-one :model/Database :id db-id)) + + (is (= alternate-name (db-name-fn))))))) (deftest consistent-subset-test (with-index @@ -54,18 +109,20 @@ ;; but this one does (legacy-hits "venue")))) - (testing "Unless their lexemes are matching" - (doseq [[a b] [["revenue" "revenues"] - ["collect" "collection"]]] - (is (= (search.index/search a) - (search.index/search b))))) + ;; no longer works without english dictionary + #_(testing "Unless their lexemes are matching" + (doseq [[a b] [["revenue" "revenues"] + ["collect" "collection"]]] + (is (= (search.index/search a) + (search.index/search b))))) (testing "Or we match a completion of the final word" - (is (seq (search.index/search "ras"))) - (is (seq (search.index/search "rasta coll"))) - (is (seq (search.index/search "collection ras"))) - (is (empty? (search.index/search "coll rasta"))) - (is (empty? (search.index/search "ras collection")))))) + (is (seq (search.index/search "sat"))) + (is (seq (search.index/search "satisf"))) + (is (seq (search.index/search "employee sat"))) + (is (seq (search.index/search "satisfaction empl"))) + (is (empty? (search.index/search "sat employee"))) + (is (empty? (search.index/search "emp satisfaction")))))) (deftest either-test (with-index @@ -82,7 +139,8 @@ (is (<= 1 (index-hits "user")))) (testing "But stop words are skipped" (is (= 0 (index-hits "or"))) - (is (= 3 (index-hits "its the satisfaction of it")))) + ;; stop words depend on a dictionary + (is (= 0 #_3 (index-hits "its the satisfaction of it")))) (testing "We can combine the individual results" (is (= (+ (index-hits "satisfaction") (index-hits "user")) @@ -98,9 +156,10 @@ (deftest phrase-test (with-index - (is (= 3 (index-hits "projected"))) + ;; Less matches without an english dictionary + (is (= 2 #_3 (index-hits "projected"))) (is (= 2 (index-hits "revenue"))) - (is (= 2 (index-hits "projected revenue"))) + (is (= 1 #_2 (index-hits "projected revenue"))) (testing "only sometimes do these occur sequentially in a phrase" (is (= 1 (index-hits "\"projected revenue\"")))) (testing "legacy search has a bunch of results" @@ -112,29 +171,41 @@ (def search-expr #'search.index/to-tsquery-expr) (deftest to-tsquery-expr-test - (is (= "a & b & c:*" + (is (= "'a' & 'b' & 'c':*" (search-expr "a b c"))) - (is (= "a & b & c:*" + (is (= "'a' & 'b' & 'c':*" (search-expr "a AND b AND c"))) - (is (= "a & b & c" + (is (= "'a' & 'b' & 'c'" (search-expr "a b \"c\""))) - (is (= "a & b | c:*" + (is (= "'a' & 'b' | 'c':*" (search-expr "a b or c"))) - (is (= "this & !that:*" + (is (= "'this' & !'that':*" (search-expr "this -that"))) - (is (= "a & b & c <-> d & e | b & e:*" + (is (= "'a' & 'b' & 'c' <-> 'd' & 'e' | 'b' & 'e':*" (search-expr "a b \" c d\" e or b e"))) - (is (= "ab <-> and <-> cde <-> f | !abc & def & ghi | jkl <-> mno <-> or <-> pqr" + (is (= "'ab' <-> 'and' <-> 'cde' <-> 'f' | !'abc' & 'def' & 'ghi' | 'jkl' <-> 'mno' <-> 'or' <-> 'pqr'" (search-expr "\"ab and cde f\" or -abc def AND ghi OR \"jkl mno OR pqr\""))) - (is (= "big & data | business <-> intelligence | data & wrangling:*" + (is (= "'big' & 'data' | 'business' <-> 'intelligence' | 'data' & 'wrangling':*" (search-expr "Big Data oR \"Business Intelligence\" OR data and wrangling"))) - (is (= "partial <-> quoted <-> and <-> or <-> -split:*" - (search-expr "\"partial quoted AND OR -split")))) + (testing "unbalanced quotes" + (is (= "'big' <-> 'data' & 'big' <-> 'mistake':*" + (search-expr "\"Big Data\" \"Big Mistake")))) + + (is (= "'partial' <-> 'quoted' <-> 'and' <-> 'or' <-> '-split':*" + (search-expr "\"partial quoted AND OR -split"))) + + (testing "dangerous characters" + (is (= "'you' & '<-' & 'pointing':*" + (search-expr "you <- pointing")))) + + (testing "single quotes" + (is (= "'you''re':*" + (search-expr "you're"))))) diff --git a/test/metabase/search/spec_test.clj b/test/metabase/search/spec_test.clj new file mode 100644 index 0000000000000000000000000000000000000000..399521754e01343400a24d4aea43664179202c3e --- /dev/null +++ b/test/metabase/search/spec_test.clj @@ -0,0 +1,125 @@ +(ns ^:mb/once metabase.search.spec-test + (:require + [clojure.test :refer :all] + [metabase.models] + [metabase.search.spec :as search.spec] + [toucan2.core :as t2])) + +(comment + ;; Making sure we load the real specs for each model + (metabase.models/keep-me)) + +(deftest test-qualify-column + (is (= [:table.column :column] (#'search.spec/qualify-column :table :column))) + (is (= :qualified.column (#'search.spec/qualify-column :table :qualified.column))) + (is (= [:table.column :alias] (#'search.spec/qualify-column :table [:column :alias]))) + (is (= [:qualified.column :alias] (#'search.spec/qualify-column :table [:qualified.column :alias])))) + +(deftest test-qualify-columns + (is (= [[:table.column :column] + :qualified.column + [:table.column :alias] + [:qualified.column :alias]] + (search.spec/qualify-columns :table + [:column + :qualified.column + [:column :alias] + [:qualified.column :alias]])))) + +(deftest test-has-table? + (is (#'search.spec/has-table? :table :table.column)) + (is (not (#'search.spec/has-table? :table :column))) + (is (#'search.spec/has-table? nil :column)) + (is (not (#'search.spec/has-table? nil :table.column))) + (is (not (#'search.spec/has-table? :table :qualified.column)))) + +(def ^:private example-spec + {:model :model/MadeUp + :attrs {:collection-id true + :db-id :this.db_id + :table-id :related.table_id} + :search-terms [:this.name :description] + :render-terms {:related-field [:lower :related.field] + :funky-field :%now}}) + +(deftest test-find-fields + (is (= {:this #{:name :description :collection_id :db_id} + :related #{:field :table_id}} + (#'search.spec/find-fields example-spec)))) + +(deftest replace-qualification-test + (is (= :column (#'search.spec/replace-qualification :column :table :sable))) + (is (= :sable.column (#'search.spec/replace-qualification :table.column :table :sable))) + (is (= :table.column (#'search.spec/replace-qualification :table.column :cable :sable))) + (is (= [:and :c.x [:or :c.y [:= :%now :b.z :c.xx]]] + (#'search.spec/replace-qualification [:and :a.x [:or :a.y [:= :%now :b.z :a.xx]]] :a :c)))) + +(deftest search-model-hooks-test + ;; TODO replace real specs with frozen test ones once things have stabilized + + (is (= #:model{:Card #{{:search-model "card", + :fields #{:id + :description + :archived + :archived_directly + :collection_position + :collection_id + :creator_id + :dataset_query + :display + :name + :query_type + :type + :created_at + :updated_at}, + :where [:= :updated.id :this.id]}}, + :Collection #{{:search-model "card", + :fields #{:authority_level :name :namespace :type :location}, + :where [:= :updated.id :this.collection_id]}}, + :Revision #{{:search-model "card", + :fields #{:user_id :timestamp}, + :where [:and + [:= :updated.model_id :this.id] + [:= :updated.most_recent true] + [:= :updated.model "Card"]]}}, + :ModerationReview #{{:search-model "card", + :fields #{:status}, + :where [:and + [:= :updated.moderated_item_type "card"] + [:= :updated.moderated_item_id :this.id] + [:= :updated.most_recent true]]}} + :DashboardCard #{{:search-model "card" + :fields nil + :where [:= :updated.card_id :this.id]}}} + (#'search.spec/search-model-hooks (search.spec/spec "card")))) + + (is (= #:model{:Table #{{:search-model "segment", + :fields #{:description :schema :name :db_id} + :where [:= :updated.id :this.table_id]} + {:search-model "table", + :fields + #{:active :description :schema :name :id :db_id :initial_sync_status :display_name + :visibility_type :created_at :updated_at} + :where [:= :updated.id :this.id]}}, + :Database #{{:search-model "table", :fields #{:name}, :where [:= :updated.id :this.db_id]}} + :Segment #{{:search-model "segment" + :fields #{:description :archived :table_id :name :id :updated_at} + :where [:= :updated.id :this.id]}} + :Collection #{{:search-model "collection" + :fields #{:authority_level :archived :description :name :type :id + :archived_directly :location :namespace :created_at} + :where [:= :updated.id :this.id]}}} + (#'search.spec/merge-hooks + [(#'search.spec/search-model-hooks (search.spec/spec "table")) + (#'search.spec/search-model-hooks (search.spec/spec "segment")) + (#'search.spec/search-model-hooks (search.spec/spec "collection"))])))) + +(deftest search-models-to-update-test + (is (= #{} + (search.spec/search-models-to-update (t2/instance :model/Database {})))) + (is (= #{["table" [:= 123 :this.db_id]] + ["database" [:= 123 :this.id]]} + (search.spec/search-models-to-update (t2/instance :model/Database {:id 123 :name "databass"})))) + (is (= #{["segment" [:= 321 :this.table_id]] + ["table" [:= 321 :this.id]]} + (search.spec/search-models-to-update (t2/instance :model/Table {:id 321 :name "turn-tables"}))))) diff --git a/test/metabase/util_test.cljc b/test/metabase/util_test.cljc index ad7740ac8d5946564ffe3a866171899c59b09bae..36ad263dbcf48ee92909b8a2468a8eaeb449600e 100644 --- a/test/metabase/util_test.cljc +++ b/test/metabase/util_test.cljc @@ -551,3 +551,15 @@ (testing (pr-str (list `lib.util/truncate-string-to-byte-count s max-length)) (is (= expected (truncate-string-to-byte-count s max-length))))))) + +(deftest ^:parallel rconcat-test + (is (= [2 4 6 18 16 14 12 10 8 6 4 2 0 50] + (transduce + (map (partial * 2)) + conj + [] + (u/rconcat + (u/rconcat + (eduction (map inc) (range 3)) + (eduction (map dec) (range 10 0 -1))) + [25])))))