From 17bb910200c64bdc6051ea5152505ad1aca489ed Mon Sep 17 00:00:00 2001 From: Chris Truter <crisptrutski@users.noreply.github.com> Date: Tue, 19 Nov 2024 10:22:14 +0200 Subject: [PATCH] Various tweaks for experimental search (#50142) --- src/metabase/api/search.clj | 14 ++++++++++++++ src/metabase/models/card.clj | 2 ++ src/metabase/search/config.clj | 6 +++--- src/metabase/search/postgres/core.clj | 10 +++++++++- src/metabase/search/postgres/index.clj | 2 +- src/metabase/search/postgres/scoring.clj | 2 +- test/metabase/search/in_place/scoring_test.clj | 2 +- test/metabase/search/postgres/core_test.clj | 2 +- test/metabase/search/postgres/index_test.clj | 6 +++--- test/metabase/search/spec_test.clj | 2 ++ 10 files changed, 37 insertions(+), 11 deletions(-) diff --git a/src/metabase/api/search.clj b/src/metabase/api/search.clj index f739d886446..c907b9a6bef 100644 --- a/src/metabase/api/search.clj +++ b/src/metabase/api/search.clj @@ -45,6 +45,20 @@ raise))) (meta handler))) +(api/defendpoint POST "/re-init" + "If fulltext search is enabled, this will blow away the index table, re-create it, and re-populate it." + [] + (api/check-superuser) + (cond + (not (public-settings/experimental-fulltext-search-enabled)) + (throw (ex-info "Search index is not enabled." {:status-code 501})) + + (search/supports-index?) + (do (search/init-index! {:force-reset? true}) {:message "done"}) + + :else + (throw (ex-info "Search index is not supported for this installation." {:status-code 501})))) + (api/defendpoint POST "/force-reindex" "If fulltext search is enabled, this will trigger a synchronous reindexing operation." [] diff --git a/src/metabase/models/card.clj b/src/metabase/models/card.clj index 024f5da17ce..3703e18c29c 100644 --- a/src/metabase/models/card.clj +++ b/src/metabase/models/card.clj @@ -1017,6 +1017,8 @@ :dashboardcard-count {:select [:%count.*] :from [:report_dashboardcard] :where [:= :report_dashboardcard.card_id :this.id]} + :database-id :database_id + :last-viewed-at :last_used_at :native-query [:case [:= "native" :query_type] :dataset_query] :official-collection [:= "official" :collection.authority_level] :last-edited-at :r.timestamp diff --git a/src/metabase/search/config.clj b/src/metabase/search/config.clj index 59e04107116..6a697c9c4c0 100644 --- a/src/metabase/search/config.clj +++ b/src/metabase/search/config.clj @@ -34,7 +34,7 @@ (def ^:const stale-time-in-days "Results older than this number of days are all considered to be equally old. In other words, there is a ranking bonus for results newer than this (scaled to just how recent they are). c.f. `search.scoring/recency-score`" - 180) + 30) (def ^:const dashboard-count-ceiling "Results in more dashboards than this are all considered to be equally popular." @@ -48,7 +48,7 @@ (def ^:const view-count-scaling-percentile "The percentile of the given search model's view counts, to be multiplied by [[view-count-scaling]]. The larger this value, the longer it will take for the score to approach 1.0. It will never quite reach it." - 0.9) + 0.99) (def ^:const surrounding-match-context "Show this many words of context before/after matches in long search results" @@ -87,7 +87,7 @@ (assert (= all-models (set models-search-order)) "The models search order has to include all models") (def ^:private default-weights - {:pinned 2 + {:pinned 0 :bookmarked 2 :recency 1.5 :dashboard 1 diff --git a/src/metabase/search/postgres/core.clj b/src/metabase/search/postgres/core.clj index 5e188656019..7854f42ef61 100644 --- a/src/metabase/search/postgres/core.clj +++ b/src/metabase/search/postgres/core.clj @@ -77,6 +77,14 @@ (-> (merge (json/parse-string (:legacy_input index-row) keyword) (select-keys index-row [:total_score :pinned])) + (assoc :scores (mapv (fn [k] + (let [score (get index-row k) + weight (search.config/weight k)] + {:score score + :name k + :weight weight + :contribution (* weight score)})) + (keys (search.scoring/scorers)))) (update :created_at parse-datetime) (update :updated_at parse-datetime) (update :last_edited_at parse-datetime))) @@ -136,7 +144,7 @@ "Do no scoring, whatsoever" [result _scoring-ctx] {:score (:total_score result 1) - :result (assoc result :all-scores [] :relevant-scores [])}) + :result (assoc result :all-scores (:scores result))}) (defn init! "Ensure that the search index exists, and has been populated with all the entities." diff --git a/src/metabase/search/postgres/index.clj b/src/metabase/search/postgres/index.clj index 9dd5c6bff39..3773c25158b 100644 --- a/src/metabase/search/postgres/index.clj +++ b/src/metabase/search/postgres/index.clj @@ -17,7 +17,7 @@ (defonce ^:private reindexing? (atom false)) -(def ^:private tsv-language "simple") +(def ^:private tsv-language "english") (defn- exists? [table-name] (t2/exists? :information_schema.tables :table_name (name table-name))) diff --git a/src/metabase/search/postgres/scoring.clj b/src/metabase/search/postgres/scoring.clj index 0f1b1ddcc1b..a58d5487fb1 100644 --- a/src/metabase/search/postgres/scoring.clj +++ b/src/metabase/search/postgres/scoring.clj @@ -45,7 +45,7 @@ "Prefer items whose value is earlier in some list." [idx-col len] (if (pos? len) - [:/ [:- [:inline (dec len)] idx-col] [:inline len]] + [:/ [:- [:inline (dec len)] idx-col] [:inline (double len)]] [:inline 1])) (defn- sum-columns [column-names] diff --git a/test/metabase/search/in_place/scoring_test.clj b/test/metabase/search/in_place/scoring_test.clj index 0b4ecb67fa0..c22652b0d87 100644 --- a/test/metabase/search/in_place/scoring_test.clj +++ b/test/metabase/search/in_place/scoring_test.clj @@ -214,7 +214,7 @@ (is (= [1 2 3 4] (->> [(item 1 (days-ago 0)) (item 2 (days-ago 1)) - (item 3 (days-ago 50)) + (item 3 (days-ago 20)) (item 4 nil)] shuffle (sort-by score) diff --git a/test/metabase/search/postgres/core_test.clj b/test/metabase/search/postgres/core_test.clj index fd6f1011625..ea4a68c6c09 100644 --- a/test/metabase/search/postgres/core_test.clj +++ b/test/metabase/search/postgres/core_test.clj @@ -55,7 +55,7 @@ (defn- normalize* [xs] (into #{} - (map (comp #(dissoc % :bookmark :pinned :total_score) + (map (comp #(dissoc % :bookmark :pinned :total_score :scores) u/strip-nils #(update % :archived boolean))) xs)) diff --git a/test/metabase/search/postgres/index_test.clj b/test/metabase/search/postgres/index_test.clj index 062216e416f..f76299ddbc1 100644 --- a/test/metabase/search/postgres/index_test.clj +++ b/test/metabase/search/postgres/index_test.clj @@ -133,7 +133,7 @@ (testing "But stop words are skipped" (is (= 0 (index-hits "or"))) ;; stop words depend on a dictionary - (is (= 0 #_3 (index-hits "its the satisfaction of it")))) + (is (= #_0 3 (index-hits "its the satisfaction of it")))) (testing "We can combine the individual results" (is (= (+ (index-hits "satisfaction") (index-hits "user")) @@ -150,9 +150,9 @@ (deftest phrase-test (with-index ;; Less matches without an english dictionary - (is (= 2 #_3 (index-hits "projected"))) + (is (= #_2 3 (index-hits "projected"))) (is (= 2 (index-hits "revenue"))) - (is (= 1 #_2 (index-hits "projected revenue"))) + (is (= #_1 2 (index-hits "projected revenue"))) (testing "only sometimes do these occur sequentially in a phrase" (is (= 1 (index-hits "\"projected revenue\"")))) (testing "legacy search has a bunch of results" diff --git a/test/metabase/search/spec_test.clj b/test/metabase/search/spec_test.clj index 5a02dc16ddc..b4648fe627c 100644 --- a/test/metabase/search/spec_test.clj +++ b/test/metabase/search/spec_test.clj @@ -65,8 +65,10 @@ :collection_position :collection_id :creator_id + :database_id :dataset_query :display + :last_used_at :name :query_type :type -- GitLab