From 87e3b95b1ba98bb0bcd00f51d4b47e4c7078899f Mon Sep 17 00:00:00 2001
From: Chris Truter <crisptrutski@users.noreply.github.com>
Date: Thu, 14 Nov 2024 14:49:16 +0200
Subject: [PATCH] Search clean-up: consolidate Filter specifications (#50019)

---
 src/metabase/search/config.clj       | 50 +++++++++++++++++++
 src/metabase/search/filter.clj       | 72 ++++++++--------------------
 src/metabase/search/spec.clj         | 28 +++++------
 src/metabase/task/search_index.clj   | 34 ++++++++-----
 test/metabase/search/filter_test.clj |  3 +-
 5 files changed, 108 insertions(+), 79 deletions(-)

diff --git a/src/metabase/search/config.clj b/src/metabase/search/config.clj
index dd4d055492e..c8c77fbb56c 100644
--- a/src/metabase/search/config.clj
+++ b/src/metabase/search/config.clj
@@ -5,6 +5,7 @@
    [metabase.models.setting :refer [defsetting]]
    [metabase.permissions.util :as perms.u]
    [metabase.public-settings :as public-settings]
+   [metabase.util :as u]
    [metabase.util.i18n :refer [deferred-tru]]
    [metabase.util.malli :as mu]
    [metabase.util.malli.schema :as ms]))
@@ -90,6 +91,55 @@
    :view-count          2
    :text                10})
 
+(def ^:private FilterDef
+  "A relaxed definition, capturing how we can write the filter - with some fields omitted."
+  [:map {:closed true}
+   [:key                               :keyword]
+   [:type                              :keyword]
+   [:field            {:optional true} :string]
+   [:context-key      {:optional true} :keyword]
+   [:supported-value? {:optional true} ifn?]
+   [:required-feature {:optional true} :keyword]])
+
+(def ^:private Filter
+  "A normalized representation, for the application to leverage."
+  [:map {:closed true}
+   [:key              :keyword]
+   [:type             :keyword]
+   [:field            :string]
+   [:context-key      :keyword]
+   [:supported-value? ifn?]
+   [:required-feature [:maybe :keyword]]])
+
+(mu/defn- build-filter :- Filter
+  [{k :key t :type :keys [context-key field supported-value? required-feature]} :- FilterDef]
+  {:key              k
+   :type             (keyword "metabase.search.filter" (name t))
+   :field            (or field (u/->snake_case_en (name k)))
+   :context-key      (or context-key k)
+   :supported-value? (or supported-value? (constantly true))
+   :required-feature required-feature})
+
+(mu/defn- build-filters :- [:map-of :keyword Filter]
+  [m]
+  (-> (reduce #(assoc-in %1 [%2 :key] %2) m (keys m))
+      (update-vals build-filter)))
+
+(def filters
+  "Specifications for the optional search filters."
+  (build-filters
+   {:archived       {:type :single-value, :context-key :archived?}
+    ;; TODO dry this alias up with the index hydration code
+    :created-at     {:type :date-range, :field "model_created_at"}
+    :creator-id     {:type :list, :context-key :created-by}
+    ;; This actually has nothing to do with tables, as we also filter cards, it would be good to rename the context key.
+    :database-id    {:type :single-value, :context-key :table-db-id}
+    :id             {:type :list, :context-key :ids, :field "model_id"}
+    :last-edited-at {:type :date-range}
+    :last-editor-id {:type :list, :context-key :last-edited-by}
+    :native-query   {:type :native-query, :context-key :search-native-query}
+    :verified       {:type :single-value, :supported-value? #{true}, :required-feature :content-verification}}))
+
 (defn weights
   "Strength of the various scorers. Copied from metabase.search.in-place.scoring, but allowing divergence."
   []
diff --git a/src/metabase/search/filter.clj b/src/metabase/search/filter.clj
index 5eb01ad97eb..a0bdc477d17 100644
--- a/src/metabase/search/filter.clj
+++ b/src/metabase/search/filter.clj
@@ -3,42 +3,15 @@
    [honey.sql.helpers :as sql.helpers]
    [metabase.driver.common.parameters.dates :as params.dates]
    [metabase.public-settings.premium-features :as premium-features]
+   [metabase.search.config :as search.config]
    [metabase.search.in-place.filter :as search.in-place.filter]
    [metabase.search.permissions :as search.permissions]
    [metabase.search.spec :as search.spec]
-   [metabase.util :as u]
    [metabase.util.date-2 :as u.date]
    [metabase.util.i18n :refer [tru]])
   (:import
    (java.time LocalDate)))
 
-(def ^:private filter->type
-  {:archived?      ::single-value
-   :created-at     ::date-range
-   :created-by     ::list
-   :last-edited-at ::date-range
-   :last-edited-by ::list
-   :table-db-id    ::single-value
-   :verified       ::verified})
-
-(def ^:private context-key->attr
-  {:archived?           :archived
-   :created-at          :created-at
-   :created-by          :creator-id
-   :last-edited-at      :last-edited-at
-   :last-edited-by      :last-editor-id
-   :search-native-query :native-query
-   ;; this actually has nothing to do with tables anymore, as we also filter cards.
-   :table-db-id         :database-id
-   :verified            :verified})
-
-;; TODO dry this alias up with the index hydration code
-(def ^:private field-alias {:created-at :model-created-at})
-
-(def ^:private attr->index-key
-  (into {} (for [k (vals context-key->attr)]
-             [k (keyword (str "search_index." (u/->snake_case_en (name (get field-alias k k)))))])))
-
 (defn- remove-if-falsey [m k]
   (if (m k) m (dissoc m k)))
 
@@ -47,6 +20,14 @@
     :all      true
     :app-user (not (search.permissions/sandboxed-or-impersonated-user? search-ctx))))
 
+(def ^:private context-key->filter
+  "Map the context keys to their corresponding filters"
+  (reduce-kv (fn [m k {:keys [context-key]}]
+               (assoc m context-key k))
+             {}
+             ;; TODO remove special handling of :id
+             (dissoc search.config/filters :id)))
+
 (defn search-context->applicable-models
   "Returns a set of models that are applicable given the search context.
 
@@ -54,8 +35,9 @@
   [search-ctx]
   (if (= :search.engine/in-place (:search-engine search-ctx))
     (search.in-place.filter/search-context->applicable-models search-ctx)
-    ;; Archived is an eccentric one - we treat it as false for models that don't map it
-    (let [required (->> (remove-if-falsey search-ctx :archived?) keys (keep context-key->attr))]
+    ;; Archived is an eccentric one - we treat it as false for models that don't map it, rather than removing them.
+    ;; TODO move this behavior to the spec somehow
+    (let [required (->> (remove-if-falsey search-ctx :archived?) keys (keep context-key->filter))]
       (into #{}
             (remove nil?)
             (for [search-model (:models search-ctx)
@@ -95,18 +77,6 @@
 
 (defmethod where-clause* ::list [_ k v] [:in k v])
 
-(defmethod where-clause* ::verified [_ k v]
-  (assert (true? v) "filter for non-verified cards is not supported")
-  (when (premium-features/has-feature? :content-verification)
-    [:= k v]))
-
-(def ^:private true-clause
-  [:inline [:= 1 1]])
-
-(assert (= (disj (set (keys context-key->attr)) :search-native-query)
-           (set (keys filter->type)))
-        "All filters have been implemented.")
-
 (defn with-filters
   "Return a HoneySQL clause corresponding to all the optional search filters."
   [search-context qry]
@@ -117,14 +87,14 @@
                              [:and
                               [:in :model_id ids]
                               ;; NOTE: we limit id-based search to only a subset of the models
-                              ;; TODO this should just become part of the spec e.g. :search-by-id?
+                              ;; TODO this should just become part of the model spec e.g. :search-by-id?
                               [:in :model ["card" "dataset" "metric" "dashboard" "action"]]]))
-    (reduce (fn [qry [ctx-key attr-key]]
-              (let [v (get search-context ctx-key)]
-                (if (some? v)
-                  (sql.helpers/where qry (or (where-clause* (filter->type ctx-key)
-                                                            (attr->index-key attr-key) v)
-                                             true-clause))
-                  qry)))
+    (reduce (fn [qry {t :type :keys [context-key required-feature supported-value? field]}]
+              (or (when-some [v (get search-context context-key)]
+                    (assert (supported-value? v) (str "Unsupported value for " context-key " - " v))
+                    (when (or (nil? required-feature) (premium-features/has-feature? required-feature))
+                      (when-some [c (where-clause* t (keyword (str "search_index." field)) v)]
+                        (sql.helpers/where qry c))))
+                  qry))
             qry
-            (dissoc context-key->attr :search-native-query))))
+            (vals (dissoc search.config/filters :id :native-query)))))
diff --git a/src/metabase/search/spec.clj b/src/metabase/search/spec.clj
index 973a5366c69..8a8e69984e1 100644
--- a/src/metabase/search/spec.clj
+++ b/src/metabase/search/spec.clj
@@ -6,6 +6,7 @@
    [malli.core :as mc]
    [malli.error :as me]
    [metabase.config :as config]
+   [metabase.search.config :as search.config]
    [metabase.util :as u]
    [toucan2.core :as t2]
    [toucan2.tools.transformed :as t2.transformed]))
@@ -30,20 +31,19 @@
 
 (def ^:private optional-attrs
   "These attributes may be omitted (for now) in the interest of brevity in the definitions."
-  [:id
-   :name
-   :created-at
-   :creator-id
-   :database-id
-   :native-query
-   :official-collection
-   :dashboardcard-count
-   :last-edited-at
-   :last-editor-id
-   :pinned
-   :verified
-   :view-count
-   :updated-at])
+  (->> (keys (apply dissoc search.config/filters explicit-attrs))
+       ;; identifiers and rankers
+       (into
+        [:id                                                ;;  in addition to being a filter, this is a key property
+         :name
+         :official-collection
+         :dashboardcard-count
+         :pinned
+         :verified                                          ;;  in addition to being a filter, this is also a ranker
+         :view-count
+         :updated-at])
+       distinct
+       vec))
 
 (def ^:private default-attrs
   {:id   true
diff --git a/src/metabase/task/search_index.clj b/src/metabase/task/search_index.clj
index 75ee1b024e1..0dd319f4b5a 100644
--- a/src/metabase/task/search_index.clj
+++ b/src/metabase/task/search_index.clj
@@ -28,9 +28,9 @@
   "Key used to define and trigger a job that makes incremental updates to the search index."
   (jobs/key (str update-stem ".job")))
 
-(jobs/defjob ^{DisallowConcurrentExecution true
-               :doc                        "Populate Search Index"}
-  SearchIndexReindex [_ctx]
+;; We define the job bodies outside the defrecord, so that we can redefine them live from the REPL
+
+(defn- reindex! []
   (when (search/supports-index?)
     (if (not @recreated?)
       (do (log/info "Recreating search index from the latest schema")
@@ -38,12 +38,21 @@
           ;; the job, resulting in a momentary lack of search results.
           ;; One solution to this would be to store metadata about the index in another table, which we can use to
           ;; determine whether it was built by another version of Metabase and should be rebuilt.
+
           (search/init-index! {:force-reset? (not @recreated?)})
           (reset! recreated? true))
       (do (log/info "Reindexing searchable entities")
           (search/reindex!)))
+    ;; It would be nice to output how many entries were updated.
     (log/info "Done indexing.")))
 
+(defn- update-index! []
+  (when (search/supports-index?)
+    (while true
+      (let [updated-entry-count (search.ingestion/process-next-batch Long/MAX_VALUE 100)]
+        (when (pos? updated-entry-count)
+          (log/infof "Updated %d search index entries" updated-entry-count))))))
+
 (defn- force-scheduled-task! [^JobDetail job ^Trigger trigger]
   ;; For some reason, using the schedule-task! with a non-durable job causes it to only fire on the first trigger.
   #_(task/schedule-task! job trigger)
@@ -51,6 +60,16 @@
   (task/add-job! job)
   (task/add-trigger! trigger))
 
+(jobs/defjob ^{DisallowConcurrentExecution true
+               :doc                        "Populate Search Index"}
+  SearchIndexReindex [_ctx]
+  (reindex!))
+
+(jobs/defjob ^{DisallowConcurrentExecution true
+               :doc                        "Keep Search Index updated"}
+  SearchIndexUpdate [_ctx]
+  (update-index!))
+
 (defmethod task/init! ::SearchIndexReindex [_]
   (let [job         (jobs/build
                      (jobs/of-type SearchIndexReindex)
@@ -65,15 +84,6 @@
                       (simple/schedule (simple/with-interval-in-hours 1))))]
     (force-scheduled-task! job trigger)))
 
-(jobs/defjob ^{DisallowConcurrentExecution true
-               :doc                        "Keep Search Index updated"}
-  SearchIndexUpdate [_ctx]
-  (when (search/supports-index?)
-    (while true
-      (let [updated-entry-count (search.ingestion/process-next-batch Long/MAX_VALUE 100)]
-        (when (pos? updated-entry-count)
-          (log/infof "Updated %d search index entries" updated-entry-count))))))
-
 (defmethod task/init! ::SearchIndexUpdate [_]
   (let [job         (jobs/build
                      (jobs/of-type SearchIndexUpdate)
diff --git a/test/metabase/search/filter_test.clj b/test/metabase/search/filter_test.clj
index d9ddd9481a6..514a4415895 100644
--- a/test/metabase/search/filter_test.clj
+++ b/test/metabase/search/filter_test.clj
@@ -12,7 +12,7 @@
   metabase.models/keep-me)
 
 (defn- filter-keys []
-  (keys @#'search.filter/context-key->attr))
+  (remove #{:ids} (map :context-key (vals search.config/filters))))
 
 (defn- active-filter-combinations []
   ;; We ignore :archived? as we've moved some of these filters to the `:where` clause as a simplifying optimization.
@@ -79,7 +79,6 @@
                       [:< [:cast :search_index.model_created_at :date] #t"2024-10-02"]
                       ;; depends on whether :content-verification is enabled
                       #_[:= :search_index.verified true]
-                      [:inline [:= 1 1]]
                       [:in :search_index.creator_id [123]]
                       [:= :search_index.database_id 231]
                       [:>= [:cast :search_index.last_edited_at :date] #t"2024-10-02"]
-- 
GitLab