Skip to content
Snippets Groups Projects
Unverified Commit 17bb9102 authored by Chris Truter's avatar Chris Truter Committed by GitHub
Browse files

Various tweaks for experimental search (#50142)

parent ceba6e40
No related branches found
No related tags found
No related merge requests found
......@@ -45,6 +45,20 @@
raise)))
(meta handler)))
(api/defendpoint POST "/re-init"
"If fulltext search is enabled, this will blow away the index table, re-create it, and re-populate it."
[]
(api/check-superuser)
(cond
(not (public-settings/experimental-fulltext-search-enabled))
(throw (ex-info "Search index is not enabled." {:status-code 501}))
(search/supports-index?)
(do (search/init-index! {:force-reset? true}) {:message "done"})
:else
(throw (ex-info "Search index is not supported for this installation." {:status-code 501}))))
(api/defendpoint POST "/force-reindex"
"If fulltext search is enabled, this will trigger a synchronous reindexing operation."
[]
......
......@@ -1017,6 +1017,8 @@
:dashboardcard-count {:select [:%count.*]
:from [:report_dashboardcard]
:where [:= :report_dashboardcard.card_id :this.id]}
:database-id :database_id
:last-viewed-at :last_used_at
:native-query [:case [:= "native" :query_type] :dataset_query]
:official-collection [:= "official" :collection.authority_level]
:last-edited-at :r.timestamp
......
......@@ -34,7 +34,7 @@
(def ^:const stale-time-in-days
"Results older than this number of days are all considered to be equally old. In other words, there is a ranking
bonus for results newer than this (scaled to just how recent they are). c.f. `search.scoring/recency-score`"
180)
30)
(def ^:const dashboard-count-ceiling
"Results in more dashboards than this are all considered to be equally popular."
......@@ -48,7 +48,7 @@
(def ^:const view-count-scaling-percentile
"The percentile of the given search model's view counts, to be multiplied by [[view-count-scaling]].
The larger this value, the longer it will take for the score to approach 1.0. It will never quite reach it."
0.9)
0.99)
(def ^:const surrounding-match-context
"Show this many words of context before/after matches in long search results"
......@@ -87,7 +87,7 @@
(assert (= all-models (set models-search-order)) "The models search order has to include all models")
(def ^:private default-weights
{:pinned 2
{:pinned 0
:bookmarked 2
:recency 1.5
:dashboard 1
......
......@@ -77,6 +77,14 @@
(-> (merge
(json/parse-string (:legacy_input index-row) keyword)
(select-keys index-row [:total_score :pinned]))
(assoc :scores (mapv (fn [k]
(let [score (get index-row k)
weight (search.config/weight k)]
{:score score
:name k
:weight weight
:contribution (* weight score)}))
(keys (search.scoring/scorers))))
(update :created_at parse-datetime)
(update :updated_at parse-datetime)
(update :last_edited_at parse-datetime)))
......@@ -136,7 +144,7 @@
"Do no scoring, whatsoever"
[result _scoring-ctx]
{:score (:total_score result 1)
:result (assoc result :all-scores [] :relevant-scores [])})
:result (assoc result :all-scores (:scores result))})
(defn init!
"Ensure that the search index exists, and has been populated with all the entities."
......
......@@ -17,7 +17,7 @@
(defonce ^:private reindexing? (atom false))
(def ^:private tsv-language "simple")
(def ^:private tsv-language "english")
(defn- exists? [table-name]
(t2/exists? :information_schema.tables :table_name (name table-name)))
......
......@@ -45,7 +45,7 @@
"Prefer items whose value is earlier in some list."
[idx-col len]
(if (pos? len)
[:/ [:- [:inline (dec len)] idx-col] [:inline len]]
[:/ [:- [:inline (dec len)] idx-col] [:inline (double len)]]
[:inline 1]))
(defn- sum-columns [column-names]
......
......@@ -214,7 +214,7 @@
(is (= [1 2 3 4]
(->> [(item 1 (days-ago 0))
(item 2 (days-ago 1))
(item 3 (days-ago 50))
(item 3 (days-ago 20))
(item 4 nil)]
shuffle
(sort-by score)
......
......@@ -55,7 +55,7 @@
(defn- normalize* [xs]
(into #{}
(map (comp #(dissoc % :bookmark :pinned :total_score)
(map (comp #(dissoc % :bookmark :pinned :total_score :scores)
u/strip-nils
#(update % :archived boolean)))
xs))
......
......@@ -133,7 +133,7 @@
(testing "But stop words are skipped"
(is (= 0 (index-hits "or")))
;; stop words depend on a dictionary
(is (= 0 #_3 (index-hits "its the satisfaction of it"))))
(is (= #_0 3 (index-hits "its the satisfaction of it"))))
(testing "We can combine the individual results"
(is (= (+ (index-hits "satisfaction")
(index-hits "user"))
......@@ -150,9 +150,9 @@
(deftest phrase-test
(with-index
;; Less matches without an english dictionary
(is (= 2 #_3 (index-hits "projected")))
(is (= #_2 3 (index-hits "projected")))
(is (= 2 (index-hits "revenue")))
(is (= 1 #_2 (index-hits "projected revenue")))
(is (= #_1 2 (index-hits "projected revenue")))
(testing "only sometimes do these occur sequentially in a phrase"
(is (= 1 (index-hits "\"projected revenue\""))))
(testing "legacy search has a bunch of results"
......
......@@ -65,8 +65,10 @@
:collection_position
:collection_id
:creator_id
:database_id
:dataset_query
:display
:last_used_at
:name
:query_type
:type
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment