From 87e3b95b1ba98bb0bcd00f51d4b47e4c7078899f Mon Sep 17 00:00:00 2001 From: Chris Truter <crisptrutski@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:49:16 +0200 Subject: [PATCH] Search clean-up: consolidate Filter specifications (#50019) --- src/metabase/search/config.clj | 50 +++++++++++++++++++ src/metabase/search/filter.clj | 72 ++++++++-------------------- src/metabase/search/spec.clj | 28 +++++------ src/metabase/task/search_index.clj | 34 ++++++++----- test/metabase/search/filter_test.clj | 3 +- 5 files changed, 108 insertions(+), 79 deletions(-) diff --git a/src/metabase/search/config.clj b/src/metabase/search/config.clj index dd4d055492e..c8c77fbb56c 100644 --- a/src/metabase/search/config.clj +++ b/src/metabase/search/config.clj @@ -5,6 +5,7 @@ [metabase.models.setting :refer [defsetting]] [metabase.permissions.util :as perms.u] [metabase.public-settings :as public-settings] + [metabase.util :as u] [metabase.util.i18n :refer [deferred-tru]] [metabase.util.malli :as mu] [metabase.util.malli.schema :as ms])) @@ -90,6 +91,55 @@ :view-count 2 :text 10}) +(def ^:private FilterDef + "A relaxed definition, capturing how we can write the filter - with some fields omitted." + [:map {:closed true} + [:key :keyword] + [:type :keyword] + [:field {:optional true} :string] + [:context-key {:optional true} :keyword] + [:supported-value? {:optional true} ifn?] + [:required-feature {:optional true} :keyword]]) + +(def ^:private Filter + "A normalized representation, for the application to leverage." + [:map {:closed true} + [:key :keyword] + [:type :keyword] + [:field :string] + [:context-key :keyword] + [:supported-value? ifn?] + [:required-feature [:maybe :keyword]]]) + +(mu/defn- build-filter :- Filter + [{k :key t :type :keys [context-key field supported-value? required-feature]} :- FilterDef] + {:key k + :type (keyword "metabase.search.filter" (name t)) + :field (or field (u/->snake_case_en (name k))) + :context-key (or context-key k) + :supported-value? (or supported-value? (constantly true)) + :required-feature required-feature}) + +(mu/defn- build-filters :- [:map-of :keyword Filter] + [m] + (-> (reduce #(assoc-in %1 [%2 :key] %2) m (keys m)) + (update-vals build-filter))) + +(def filters + "Specifications for the optional search filters." + (build-filters + {:archived {:type :single-value, :context-key :archived?} + ;; TODO dry this alias up with the index hydration code + :created-at {:type :date-range, :field "model_created_at"} + :creator-id {:type :list, :context-key :created-by} + ;; This actually has nothing to do with tables, as we also filter cards, it would be good to rename the context key. + :database-id {:type :single-value, :context-key :table-db-id} + :id {:type :list, :context-key :ids, :field "model_id"} + :last-edited-at {:type :date-range} + :last-editor-id {:type :list, :context-key :last-edited-by} + :native-query {:type :native-query, :context-key :search-native-query} + :verified {:type :single-value, :supported-value? #{true}, :required-feature :content-verification}})) + (defn weights "Strength of the various scorers. Copied from metabase.search.in-place.scoring, but allowing divergence." [] diff --git a/src/metabase/search/filter.clj b/src/metabase/search/filter.clj index 5eb01ad97eb..a0bdc477d17 100644 --- a/src/metabase/search/filter.clj +++ b/src/metabase/search/filter.clj @@ -3,42 +3,15 @@ [honey.sql.helpers :as sql.helpers] [metabase.driver.common.parameters.dates :as params.dates] [metabase.public-settings.premium-features :as premium-features] + [metabase.search.config :as search.config] [metabase.search.in-place.filter :as search.in-place.filter] [metabase.search.permissions :as search.permissions] [metabase.search.spec :as search.spec] - [metabase.util :as u] [metabase.util.date-2 :as u.date] [metabase.util.i18n :refer [tru]]) (:import (java.time LocalDate))) -(def ^:private filter->type - {:archived? ::single-value - :created-at ::date-range - :created-by ::list - :last-edited-at ::date-range - :last-edited-by ::list - :table-db-id ::single-value - :verified ::verified}) - -(def ^:private context-key->attr - {:archived? :archived - :created-at :created-at - :created-by :creator-id - :last-edited-at :last-edited-at - :last-edited-by :last-editor-id - :search-native-query :native-query - ;; this actually has nothing to do with tables anymore, as we also filter cards. - :table-db-id :database-id - :verified :verified}) - -;; TODO dry this alias up with the index hydration code -(def ^:private field-alias {:created-at :model-created-at}) - -(def ^:private attr->index-key - (into {} (for [k (vals context-key->attr)] - [k (keyword (str "search_index." (u/->snake_case_en (name (get field-alias k k)))))]))) - (defn- remove-if-falsey [m k] (if (m k) m (dissoc m k))) @@ -47,6 +20,14 @@ :all true :app-user (not (search.permissions/sandboxed-or-impersonated-user? search-ctx)))) +(def ^:private context-key->filter + "Map the context keys to their corresponding filters" + (reduce-kv (fn [m k {:keys [context-key]}] + (assoc m context-key k)) + {} + ;; TODO remove special handling of :id + (dissoc search.config/filters :id))) + (defn search-context->applicable-models "Returns a set of models that are applicable given the search context. @@ -54,8 +35,9 @@ [search-ctx] (if (= :search.engine/in-place (:search-engine search-ctx)) (search.in-place.filter/search-context->applicable-models search-ctx) - ;; Archived is an eccentric one - we treat it as false for models that don't map it - (let [required (->> (remove-if-falsey search-ctx :archived?) keys (keep context-key->attr))] + ;; Archived is an eccentric one - we treat it as false for models that don't map it, rather than removing them. + ;; TODO move this behavior to the spec somehow + (let [required (->> (remove-if-falsey search-ctx :archived?) keys (keep context-key->filter))] (into #{} (remove nil?) (for [search-model (:models search-ctx) @@ -95,18 +77,6 @@ (defmethod where-clause* ::list [_ k v] [:in k v]) -(defmethod where-clause* ::verified [_ k v] - (assert (true? v) "filter for non-verified cards is not supported") - (when (premium-features/has-feature? :content-verification) - [:= k v])) - -(def ^:private true-clause - [:inline [:= 1 1]]) - -(assert (= (disj (set (keys context-key->attr)) :search-native-query) - (set (keys filter->type))) - "All filters have been implemented.") - (defn with-filters "Return a HoneySQL clause corresponding to all the optional search filters." [search-context qry] @@ -117,14 +87,14 @@ [:and [:in :model_id ids] ;; NOTE: we limit id-based search to only a subset of the models - ;; TODO this should just become part of the spec e.g. :search-by-id? + ;; TODO this should just become part of the model spec e.g. :search-by-id? [:in :model ["card" "dataset" "metric" "dashboard" "action"]]])) - (reduce (fn [qry [ctx-key attr-key]] - (let [v (get search-context ctx-key)] - (if (some? v) - (sql.helpers/where qry (or (where-clause* (filter->type ctx-key) - (attr->index-key attr-key) v) - true-clause)) - qry))) + (reduce (fn [qry {t :type :keys [context-key required-feature supported-value? field]}] + (or (when-some [v (get search-context context-key)] + (assert (supported-value? v) (str "Unsupported value for " context-key " - " v)) + (when (or (nil? required-feature) (premium-features/has-feature? required-feature)) + (when-some [c (where-clause* t (keyword (str "search_index." field)) v)] + (sql.helpers/where qry c)))) + qry)) qry - (dissoc context-key->attr :search-native-query)))) + (vals (dissoc search.config/filters :id :native-query))))) diff --git a/src/metabase/search/spec.clj b/src/metabase/search/spec.clj index 973a5366c69..8a8e69984e1 100644 --- a/src/metabase/search/spec.clj +++ b/src/metabase/search/spec.clj @@ -6,6 +6,7 @@ [malli.core :as mc] [malli.error :as me] [metabase.config :as config] + [metabase.search.config :as search.config] [metabase.util :as u] [toucan2.core :as t2] [toucan2.tools.transformed :as t2.transformed])) @@ -30,20 +31,19 @@ (def ^:private optional-attrs "These attributes may be omitted (for now) in the interest of brevity in the definitions." - [:id - :name - :created-at - :creator-id - :database-id - :native-query - :official-collection - :dashboardcard-count - :last-edited-at - :last-editor-id - :pinned - :verified - :view-count - :updated-at]) + (->> (keys (apply dissoc search.config/filters explicit-attrs)) + ;; identifiers and rankers + (into + [:id ;; in addition to being a filter, this is a key property + :name + :official-collection + :dashboardcard-count + :pinned + :verified ;; in addition to being a filter, this is also a ranker + :view-count + :updated-at]) + distinct + vec)) (def ^:private default-attrs {:id true diff --git a/src/metabase/task/search_index.clj b/src/metabase/task/search_index.clj index 75ee1b024e1..0dd319f4b5a 100644 --- a/src/metabase/task/search_index.clj +++ b/src/metabase/task/search_index.clj @@ -28,9 +28,9 @@ "Key used to define and trigger a job that makes incremental updates to the search index." (jobs/key (str update-stem ".job"))) -(jobs/defjob ^{DisallowConcurrentExecution true - :doc "Populate Search Index"} - SearchIndexReindex [_ctx] +;; We define the job bodies outside the defrecord, so that we can redefine them live from the REPL + +(defn- reindex! [] (when (search/supports-index?) (if (not @recreated?) (do (log/info "Recreating search index from the latest schema") @@ -38,12 +38,21 @@ ;; the job, resulting in a momentary lack of search results. ;; One solution to this would be to store metadata about the index in another table, which we can use to ;; determine whether it was built by another version of Metabase and should be rebuilt. + (search/init-index! {:force-reset? (not @recreated?)}) (reset! recreated? true)) (do (log/info "Reindexing searchable entities") (search/reindex!))) + ;; It would be nice to output how many entries were updated. (log/info "Done indexing."))) +(defn- update-index! [] + (when (search/supports-index?) + (while true + (let [updated-entry-count (search.ingestion/process-next-batch Long/MAX_VALUE 100)] + (when (pos? updated-entry-count) + (log/infof "Updated %d search index entries" updated-entry-count)))))) + (defn- force-scheduled-task! [^JobDetail job ^Trigger trigger] ;; For some reason, using the schedule-task! with a non-durable job causes it to only fire on the first trigger. #_(task/schedule-task! job trigger) @@ -51,6 +60,16 @@ (task/add-job! job) (task/add-trigger! trigger)) +(jobs/defjob ^{DisallowConcurrentExecution true + :doc "Populate Search Index"} + SearchIndexReindex [_ctx] + (reindex!)) + +(jobs/defjob ^{DisallowConcurrentExecution true + :doc "Keep Search Index updated"} + SearchIndexUpdate [_ctx] + (update-index!)) + (defmethod task/init! ::SearchIndexReindex [_] (let [job (jobs/build (jobs/of-type SearchIndexReindex) @@ -65,15 +84,6 @@ (simple/schedule (simple/with-interval-in-hours 1))))] (force-scheduled-task! job trigger))) -(jobs/defjob ^{DisallowConcurrentExecution true - :doc "Keep Search Index updated"} - SearchIndexUpdate [_ctx] - (when (search/supports-index?) - (while true - (let [updated-entry-count (search.ingestion/process-next-batch Long/MAX_VALUE 100)] - (when (pos? updated-entry-count) - (log/infof "Updated %d search index entries" updated-entry-count)))))) - (defmethod task/init! ::SearchIndexUpdate [_] (let [job (jobs/build (jobs/of-type SearchIndexUpdate) diff --git a/test/metabase/search/filter_test.clj b/test/metabase/search/filter_test.clj index d9ddd9481a6..514a4415895 100644 --- a/test/metabase/search/filter_test.clj +++ b/test/metabase/search/filter_test.clj @@ -12,7 +12,7 @@ metabase.models/keep-me) (defn- filter-keys [] - (keys @#'search.filter/context-key->attr)) + (remove #{:ids} (map :context-key (vals search.config/filters)))) (defn- active-filter-combinations [] ;; We ignore :archived? as we've moved some of these filters to the `:where` clause as a simplifying optimization. @@ -79,7 +79,6 @@ [:< [:cast :search_index.model_created_at :date] #t"2024-10-02"] ;; depends on whether :content-verification is enabled #_[:= :search_index.verified true] - [:inline [:= 1 1]] [:in :search_index.creator_id [123]] [:= :search_index.database_id 231] [:>= [:cast :search_index.last_edited_at :date] #t"2024-10-02"] -- GitLab