From 17bb910200c64bdc6051ea5152505ad1aca489ed Mon Sep 17 00:00:00 2001
From: Chris Truter <crisptrutski@users.noreply.github.com>
Date: Tue, 19 Nov 2024 10:22:14 +0200
Subject: [PATCH] Various tweaks for experimental search (#50142)

---
 src/metabase/api/search.clj                    | 14 ++++++++++++++
 src/metabase/models/card.clj                   |  2 ++
 src/metabase/search/config.clj                 |  6 +++---
 src/metabase/search/postgres/core.clj          | 10 +++++++++-
 src/metabase/search/postgres/index.clj         |  2 +-
 src/metabase/search/postgres/scoring.clj       |  2 +-
 test/metabase/search/in_place/scoring_test.clj |  2 +-
 test/metabase/search/postgres/core_test.clj    |  2 +-
 test/metabase/search/postgres/index_test.clj   |  6 +++---
 test/metabase/search/spec_test.clj             |  2 ++
 10 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/src/metabase/api/search.clj b/src/metabase/api/search.clj
index f739d886446..c907b9a6bef 100644
--- a/src/metabase/api/search.clj
+++ b/src/metabase/api/search.clj
@@ -45,6 +45,20 @@
                 raise)))
    (meta handler)))
 
+(api/defendpoint POST "/re-init"
+  "If fulltext search is enabled, this will blow away the index table, re-create it, and re-populate it."
+  []
+  (api/check-superuser)
+  (cond
+    (not (public-settings/experimental-fulltext-search-enabled))
+    (throw (ex-info "Search index is not enabled." {:status-code 501}))
+
+    (search/supports-index?)
+    (do (search/init-index! {:force-reset? true}) {:message "done"})
+
+    :else
+    (throw (ex-info "Search index is not supported for this installation." {:status-code 501}))))
+
 (api/defendpoint POST "/force-reindex"
   "If fulltext search is enabled, this will trigger a synchronous reindexing operation."
   []
diff --git a/src/metabase/models/card.clj b/src/metabase/models/card.clj
index 024f5da17ce..3703e18c29c 100644
--- a/src/metabase/models/card.clj
+++ b/src/metabase/models/card.clj
@@ -1017,6 +1017,8 @@
                   :dashboardcard-count {:select [:%count.*]
                                         :from   [:report_dashboardcard]
                                         :where  [:= :report_dashboardcard.card_id :this.id]}
+                  :database-id         :database_id
+                  :last-viewed-at      :last_used_at
                   :native-query        [:case [:= "native" :query_type] :dataset_query]
                   :official-collection [:= "official" :collection.authority_level]
                   :last-edited-at      :r.timestamp
diff --git a/src/metabase/search/config.clj b/src/metabase/search/config.clj
index 59e04107116..6a697c9c4c0 100644
--- a/src/metabase/search/config.clj
+++ b/src/metabase/search/config.clj
@@ -34,7 +34,7 @@
 (def ^:const stale-time-in-days
   "Results older than this number of days are all considered to be equally old. In other words, there is a ranking
   bonus for results newer than this (scaled to just how recent they are). c.f. `search.scoring/recency-score`"
-  180)
+  30)
 
 (def ^:const dashboard-count-ceiling
   "Results in more dashboards than this are all considered to be equally popular."
@@ -48,7 +48,7 @@
 (def ^:const view-count-scaling-percentile
   "The percentile of the given search model's view counts, to be multiplied by [[view-count-scaling]].
   The larger this value, the longer it will take for the score to approach 1.0. It will never quite reach it."
-  0.9)
+  0.99)
 
 (def ^:const surrounding-match-context
   "Show this many words of context before/after matches in long search results"
@@ -87,7 +87,7 @@
 (assert (= all-models (set models-search-order)) "The models search order has to include all models")
 
 (def ^:private default-weights
-  {:pinned              2
+  {:pinned              0
    :bookmarked          2
    :recency             1.5
    :dashboard           1
diff --git a/src/metabase/search/postgres/core.clj b/src/metabase/search/postgres/core.clj
index 5e188656019..7854f42ef61 100644
--- a/src/metabase/search/postgres/core.clj
+++ b/src/metabase/search/postgres/core.clj
@@ -77,6 +77,14 @@
   (-> (merge
        (json/parse-string (:legacy_input index-row) keyword)
        (select-keys index-row [:total_score :pinned]))
+      (assoc :scores (mapv (fn [k]
+                             (let [score  (get index-row k)
+                                   weight (search.config/weight k)]
+                               {:score        score
+                                :name         k
+                                :weight       weight
+                                :contribution (* weight score)}))
+                           (keys (search.scoring/scorers))))
       (update :created_at parse-datetime)
       (update :updated_at parse-datetime)
       (update :last_edited_at parse-datetime)))
@@ -136,7 +144,7 @@
   "Do no scoring, whatsoever"
   [result _scoring-ctx]
   {:score  (:total_score result 1)
-   :result (assoc result :all-scores [] :relevant-scores [])})
+   :result (assoc result :all-scores (:scores result))})
 
 (defn init!
   "Ensure that the search index exists, and has been populated with all the entities."
diff --git a/src/metabase/search/postgres/index.clj b/src/metabase/search/postgres/index.clj
index 9dd5c6bff39..3773c25158b 100644
--- a/src/metabase/search/postgres/index.clj
+++ b/src/metabase/search/postgres/index.clj
@@ -17,7 +17,7 @@
 
 (defonce ^:private reindexing? (atom false))
 
-(def ^:private tsv-language "simple")
+(def ^:private tsv-language "english")
 
 (defn- exists? [table-name]
   (t2/exists? :information_schema.tables :table_name (name table-name)))
diff --git a/src/metabase/search/postgres/scoring.clj b/src/metabase/search/postgres/scoring.clj
index 0f1b1ddcc1b..a58d5487fb1 100644
--- a/src/metabase/search/postgres/scoring.clj
+++ b/src/metabase/search/postgres/scoring.clj
@@ -45,7 +45,7 @@
   "Prefer items whose value is earlier in some list."
   [idx-col len]
   (if (pos? len)
-    [:/ [:- [:inline (dec len)] idx-col] [:inline len]]
+    [:/ [:- [:inline (dec len)] idx-col] [:inline (double len)]]
     [:inline 1]))
 
 (defn- sum-columns [column-names]
diff --git a/test/metabase/search/in_place/scoring_test.clj b/test/metabase/search/in_place/scoring_test.clj
index 0b4ecb67fa0..c22652b0d87 100644
--- a/test/metabase/search/in_place/scoring_test.clj
+++ b/test/metabase/search/in_place/scoring_test.clj
@@ -214,7 +214,7 @@
       (is (= [1 2 3 4]
              (->> [(item 1 (days-ago 0))
                    (item 2 (days-ago 1))
-                   (item 3 (days-ago 50))
+                   (item 3 (days-ago 20))
                    (item 4 nil)]
                   shuffle
                   (sort-by score)
diff --git a/test/metabase/search/postgres/core_test.clj b/test/metabase/search/postgres/core_test.clj
index fd6f1011625..ea4a68c6c09 100644
--- a/test/metabase/search/postgres/core_test.clj
+++ b/test/metabase/search/postgres/core_test.clj
@@ -55,7 +55,7 @@
 
 (defn- normalize* [xs]
   (into #{}
-        (map (comp #(dissoc % :bookmark :pinned :total_score)
+        (map (comp #(dissoc % :bookmark :pinned :total_score :scores)
                    u/strip-nils
                    #(update % :archived boolean)))
         xs))
diff --git a/test/metabase/search/postgres/index_test.clj b/test/metabase/search/postgres/index_test.clj
index 062216e416f..f76299ddbc1 100644
--- a/test/metabase/search/postgres/index_test.clj
+++ b/test/metabase/search/postgres/index_test.clj
@@ -133,7 +133,7 @@
     (testing "But stop words are skipped"
       (is (= 0 (index-hits "or")))
       ;; stop words depend on a dictionary
-      (is (= 0 #_3 (index-hits "its the satisfaction of it"))))
+      (is (= #_0 3 (index-hits "its the satisfaction of it"))))
     (testing "We can combine the individual results"
       (is (= (+ (index-hits "satisfaction")
                 (index-hits "user"))
@@ -150,9 +150,9 @@
 (deftest phrase-test
   (with-index
     ;; Less matches without an english dictionary
-    (is (= 2 #_3 (index-hits "projected")))
+    (is (= #_2 3 (index-hits "projected")))
     (is (= 2 (index-hits "revenue")))
-    (is (= 1 #_2 (index-hits "projected revenue")))
+    (is (= #_1 2 (index-hits "projected revenue")))
     (testing "only sometimes do these occur sequentially in a phrase"
       (is (= 1 (index-hits "\"projected revenue\""))))
     (testing "legacy search has a bunch of results"
diff --git a/test/metabase/search/spec_test.clj b/test/metabase/search/spec_test.clj
index 5a02dc16ddc..b4648fe627c 100644
--- a/test/metabase/search/spec_test.clj
+++ b/test/metabase/search/spec_test.clj
@@ -65,8 +65,10 @@
                                                       :collection_position
                                                       :collection_id
                                                       :creator_id
+                                                      :database_id
                                                       :dataset_query
                                                       :display
+                                                      :last_used_at
                                                       :name
                                                       :query_type
                                                       :type
-- 
GitLab