diff --git a/src/metabase/models/model_index.clj b/src/metabase/models/model_index.clj index a6826bd82f852e50b2b22f112cfaf781b55a8239..bd52a6959e724d4b86ede5bf2a771f2e5c2edfd1 100644 --- a/src/metabase/models/model_index.clj +++ b/src/metabase/models/model_index.clj @@ -46,7 +46,7 @@ (def max-indexed-values "Maximum number of values we will index. Actually take one more than this to test if there are more than the threshold." - 5000) + 25000) ;;;; indexing functions @@ -60,34 +60,35 @@ Expression refs (`[:expression \"full-name\"]`) are how the _query_ refers to a custom column. But nested queries don't, (and shouldn't) care that those are expressions. They are just another field. The field type is always `:type/Text` enforced by the endpoint to create model indexes." - [field-ref :- mbql.s/Field] + [field-ref :- mbql.s/Field base-type] (case (first field-ref) :field field-ref :expression (let [[_ expression-name] field-ref] ;; api validated that this is a text field when the model-index was created. When selecting the ;; expression we treat it as a field. - [:field expression-name {:base-type :type/Text}]) - (throw (ex-info (trs "Invalid field ref for indexing: {0}" field-ref) + [:field expression-name {:base-type base-type}]) + (throw (ex-info (format "Invalid field ref for indexing: %s" field-ref) {:field-ref field-ref :valid-clauses [:field :expression]})))) (defn- fetch-values [model-index] (let [model (t2/select-one Card :id (:model_id model-index)) - value-ref (-> model-index - :value_ref - mbql.normalize/normalize-field-ref - fix-expression-refs)] - (try [nil (->> (qp/process-query - {:database (:database_id model) - :type :query - :query {:source-table (format "card__%d" (:id model)) - :breakout [(:pk_ref model-index) value-ref] - :limit (inc max-indexed-values)}}) - :data :rows (filter valid-tuples?))] - (catch Exception e - (log/warn (trs "Error fetching indexed values for model {0}" (:id model)) e) - [(ex-message e) []])))) + fix (fn [field-ref base-type] (-> field-ref mbql.normalize/normalize-field-ref (fix-expression-refs base-type))) + ;; :type/Text and :type/Integer are ensured at creation time on the api. + value-ref (-> model-index :value_ref (fix :type/Text)) + pk-ref (-> model-index :pk_ref (fix :type/Integer))] + (try + [nil (->> (qp/process-query + {:database (:database_id model) + :type :query + :query {:source-table (format "card__%d" (:id model)) + :breakout [pk-ref value-ref] + :limit (inc max-indexed-values)}}) + :data :rows (filter valid-tuples?))] + (catch Exception e + (log/warn (trs "Error fetching indexed values for model {0}" (:id model)) e) + [(ex-message e) []])))) (defn find-changes "Find additions and deletions in indexed values. `source-values` are from the db, `indexed-values` are what we @@ -107,38 +108,43 @@ don't, (and shouldn't) care that those are expressions. They are just another fi "Add indexed values to the model_index_value table." [model-index] (let [[error-message values-to-index] (fetch-values model-index) - current-index-values (into #{} - (map (juxt :model_pk :name)) - (t2/select ModelIndexValue - :model_index_id (:id model-index)))] + current-index-values (into #{} + (map (juxt :model_pk :name)) + (t2/select ModelIndexValue + :model_index_id (:id model-index)))] (if-not (str/blank? error-message) - (t2/update! ModelIndex (:id model-index) {:state "error" - :error error-message + (t2/update! ModelIndex (:id model-index) {:state "error" + :error error-message :indexed_at :%now}) (try (t2/with-transaction [_conn] (let [{:keys [additions deletions]} (find-changes {:current-index current-index-values :source-values values-to-index})] (when (seq deletions) - (t2/delete! ModelIndexValue - :model_index_id (:id model-index) - :pk_ref [:in (->> deletions (map first))])) + (doseq [deletions-part (partition-all 10000 deletions)] + (t2/delete! ModelIndexValue + :model_index_id (:id model-index) + :model_pk [:in (->> deletions-part (map first))]))) (when (seq additions) - (t2/insert! ModelIndexValue - (map (fn [[id v]] - {:name v - :model_pk id - :model_index_id (:id model-index)}) - additions)))) + (doseq [additions-part (partition-all 10000 additions)] + (t2/insert! ModelIndexValue + (map (fn [[id v]] + {:name v + :model_pk id + :model_index_id (:id model-index)}) + additions-part))))) (t2/update! ModelIndex (:id model-index) {:indexed_at :%now - :state (if (> (count values-to-index) max-indexed-values) - "overflow" - "indexed")})) + :state (if (> (count values-to-index) max-indexed-values) + "overflow" + "indexed")})) (catch Exception e + (log/error (format "Error saving model-index values for model-index: %d, model: %d" + (:id model-index) (:model-id model-index)) + e) (t2/update! ModelIndex (:id model-index) - {:state "error" - :error (ex-message e) + {:state "error" + :error (ex-message e) :indexed_at :%now})))))) diff --git a/test/metabase/models/model_index_test.clj b/test/metabase/models/model_index_test.clj index 7ea98584ed54b0e583b71c65a7f4eb066dd96f3b..9e7a1a417032d15578198d9c3ca9279d07b873e7 100644 --- a/test/metabase/models/model_index_test.clj +++ b/test/metabase/models/model_index_test.clj @@ -40,13 +40,22 @@ (deftest quick-run-through (with-scheduler-setup (mt/dataset test-data - (let [query (mt/mbql-query products) + (let [query (mt/mbql-query products + {:filter [:and + [:> $id 0] + [:< $id 10]]}) pk_ref (mt/$ids $products.id) value_ref (mt/$ids $products.title)] - (t2.with-temp/with-temp [Card model (assoc (mt/card-with-source-metadata-for-query query) - :type :model - :name "model index test")] - (let [model-index (mt/user-http-request :rasta :post 200 "/model-index" + (mt/with-model-cleanup [:model/Card] + ;; with-temp doesn't let us update the model to simulate different values returned + (let [response (mt/user-http-request :rasta :post 200 "/card" + (assoc (mt/card-with-source-metadata-for-query query) + :type :model + :name "model index test" + :visualization_settings {} + :display "table")) + model (t2/select-one :model/Card :id (:id response)) + model-index (mt/user-http-request :rasta :post 200 "/model-index" {:model_id (:id model) :pk_ref pk_ref :value_ref value_ref}) @@ -59,10 +68,32 @@ (mt/user-http-request :rasta :get 200 (str "/model-index/" (:id model-index)))))) (testing "We can invoke the task ourself manually" (model-index/add-values! model-index) - (is (= 200 (count (t2/select ModelIndexValue :model_index_id (:id model-index))))) + (is (= 9 (count (t2/select ModelIndexValue :model_index_id (:id model-index))))) (is (= (into #{} cat (mt/rows (qp/process-query - (mt/mbql-query products {:fields [$title]})))) + (mt/mbql-query products {:fields [$title] + :filter [:and + [:> $id 0] + [:< $id 10]]})))) (t2/select-fn-set :name ModelIndexValue :model_index_id (:id model-index))))) + (testing "When the values change the indexed values change" + ;; update the filter on the model to simulate different values indexed + (t2/update! :model/Card + (:id model) + {:dataset_query (mt/mbql-query products + {:filter [:and + [:> $id 10] + [:< $id 20]]})}) + (model-index/add-values! model-index) + (is (= 9 (count (t2/select ModelIndexValue :model_index_id (:id model-index))))) + (is (= (into #{} cat (mt/rows (qp/process-query + (mt/mbql-query products {:fields [$title] + :filter [:and + [:> $id 10] + [:< $id 20]]})))) + (t2/select-fn-set :name ModelIndexValue :model_index_id (:id model-index)))) + (is (=? {:error nil + :state "indexed"} + (t2/select-one :model/ModelIndex :id (:id model-index))))) (let [index-trigger! #(->> (task/scheduler-info) :jobs (by-key "metabase.task.IndexValues.job") @@ -146,10 +177,13 @@ (doseq [[scenario query [field-refs]] (remove nil? [[:mbql (mt/mbql-query products {:fields [$id $title]})] - [:mbql-custom-column (mt/mbql-query products {:expressions - {"full-name" - [:concat $title "custom"]}}) - [(mt/$ids [$products.id [:expression "full-name"]])]] + [:mbql-custom-column (mt/mbql-query products + {:expressions + {"inc-id" + [:+ $id 1] + "full-name" + [:concat $title "custom"]}}) + [[[:expression "inc-id"] [:expression "full-name"]]]] [:native (mt/native-query (qp.compile/compile (mt/mbql-query products {:fields [$id $title]})))] @@ -180,9 +214,10 @@ :pk_ref pk-ref :value_ref value-ref})] (is (nil? error)) + (is (> (count values) 0)) ;; oracle returns BigDecimal ids so need `number?` rather than `int?` (is (mc/validate [:sequential [:tuple number? string?]] values) - (-> (mc/validate [:sequential [:tuple int? string?]] values) + (-> (mc/explain [:sequential [:tuple number? string?]] values) (me/humanize)))))))))) (defn- test-index