Skip to content
Snippets Groups Projects
Unverified Commit a685affd authored by Cam Saul's avatar Cam Saul Committed by GitHub
Browse files

Fix #16299 MongoDB empty results when filtering against array (#16480)

* Fix #16299 MongoDB empty results when filtering against array

* Ok, we still need to use $expr if the VALUE in the expression isn't simple

* More test fixes :wrench:

* More fixes :wrench:
parent edb896f9
No related branches found
No related tags found
No related merge requests found
......@@ -295,11 +295,6 @@
{:arglists '([clause])}
mbql.u/dispatch-by-clause-name-or-class)
(def ^:private ^:dynamic *top-level-filter?*
"Whether we are compiling a top-level filter clause. This means we can generate somewhat simpler `$match` clauses that
don't need to use `$expr` (see below)."
true)
(defmethod compile-filter :between
[[_ field min-val max-val]]
(compile-filter [:and
......@@ -318,23 +313,39 @@
(defmethod compile-filter :starts-with [[_ field v opts]] {(->lvalue field) (str-match-pattern opts \^ v nil)})
(defmethod compile-filter :ends-with [[_ field v opts]] {(->lvalue field) (str-match-pattern opts nil v \$)})
(defn- simple-rvalue? [rvalue]
(defn- rvalue-is-field? [rvalue]
(and (string? rvalue)
(str/starts-with? rvalue "$")))
(defn- rvalue-can-be-compared-directly?
"Whether `rvalue` is something simple that can be compared directly e.g.
{$match {$field {$eq rvalue}}}
as opposed to
{$match {$expr {$eq [$field rvalue]}}}"
[rvalue]
(or (rvalue-is-field? rvalue)
(and (not (map? rvalue))
(not (instance? java.util.regex.Pattern rvalue)))))
(defn- filter-expr [operator field value]
(let [field-rvalue (->rvalue field)
value-rvalue (->rvalue value)]
(if (and (simple-rvalue? field-rvalue) *top-level-filter?*)
(if (and (rvalue-is-field? field-rvalue)
(rvalue-can-be-compared-directly? value-rvalue))
;; if we don't need to do anything fancy with field we can generate a clause like
;;
;; {field {$eq 100}}
;;
;; this only works at the top level. Doesn't work inside compound expressions
{(str/replace-first field-rvalue #"^\$" "") {operator value-rvalue}}
;; {field {$lte 100}}
{(str/replace-first field-rvalue #"^\$" "")
;; for the $eq operator we actually don't need to do {field {$eq 100}}, we can just do {field 100}
(if (= (name operator) "$eq")
value-rvalue
{operator value-rvalue})}
;; if we need to do something fancy then we have to use `$expr` e.g.
;;
;; {$expr {$eq [{$add [$field 1]} 100]}}
;; {$expr {$lte [{$add [$field 1]} 100]}}
{:$expr {operator [field-rvalue value-rvalue]}})))
(defmethod compile-filter := [[_ field value]] (filter-expr $eq field value))
......@@ -346,13 +357,11 @@
(defmethod compile-filter :and
[[_ & args]]
(binding [*top-level-filter?* false]
{$and (mapv compile-filter args)}))
{$and (mapv compile-filter args)})
(defmethod compile-filter :or
[[_ & args]]
(binding [*top-level-filter?* false]
{$or (mapv compile-filter args)}))
{$or (mapv compile-filter args)})
;; MongoDB doesn't support negating top-level filter clauses. So we can leverage the MBQL lib's `negate-filter-clause`
......@@ -405,9 +414,9 @@
(defmethod compile-cond :ends-with
[[_ field value opts]]
(let [strcmp (fn [a b]
(if (get opts :case-sensitive true)
{$eq [a b]}
{$eq [{$strcasecmp [a b]} 0]}))]
{$eq (if (get opts :case-sensitive true)
[a b]
[{$strcasecmp [a b]} 0])})]
(strcmp {:$substrCP [(->rvalue field)
{$subtract [{:$strLenCP (->rvalue field)}
{:$strLenCP (->rvalue value)}]}
......
[
{
"json_field": {
"key_1": "value_jf_a",
"key_2": "value_jf_z"
},
"list_field": [
"value_lf_a",
"value_lf_b"
],
"metas": {
"group_field": "value_gf_a"
}
},
{
"json_field": {
"key_1": "value_jf_b",
"key_2": "value_jf_z"
},
"list_field": [
"value_lf_a",
"value_lf_b"
],
"metas": {
"group_field": "value_gf_b"
}
},
{
"json_field": {
"key_1": "value_jf_c",
"key_2": "value_jf_z"
},
"list_field": [
"value_lf_a",
"value_lf_b"
],
"metas": {
"group_field": "value_gf_c"
}
},
{
"json_field": {
"key_1": "value_jf_a",
"key_2": "value_jf_z"
},
"list_field": [
"value_lf_c",
"value_lf_d"
],
"metas": {
"group_field": "value_gf_a"
}
},
{
"json_field": {
"key_1": "value_jf_b",
"key_2": "value_jf_z"
},
"list_field": [
"value_lf_c",
"value_lf_d"
],
"metas": {
"group_field": "value_gf_b"
}
},
{
"json_field": {
"key_1": "value_jf_c",
"key_2": "value_jf_z"
},
"list_field": [
"value_lf_c",
"value_lf_d"
],
"metas": {
"group_field": "value_gf_c"
}
},
{
"json_field": {
"key_1": "value_jf_a",
"key_2": "value_jf_z"
},
"list_field": [
"value_lf_c",
"value_lf_d"
],
"metas": {
"group_field": "value_gf_a"
}
},
{
"json_field": {
"key_1": "value_jf_b",
"key_2": "value_jf_z"
},
"list_field": [
"value_lf_c",
"value_lf_d"
],
"metas": {
"group_field": "value_gf_b"
}
},
{
"json_field": {
"key_1": "value_jf_c",
"key_2": "value_jf_z"
},
"list_field": [
"value_lf_c",
"value_lf_d"
],
"metas": {
"group_field": "value_gf_c"
}
}
]
......@@ -169,7 +169,7 @@
[:string/ends-with {"$regex" "foo$"} ["foo"]]
[:string/contains {"$regex" "foo"} ["foo"]]
[:string/does-not-contain {"$not" {"$regex" "foo"}} ["foo"]]
[:string/= {"$eq" "foo"} ["foo"]]]]
[:string/= "foo" ["foo"]]]]
(testing operator
(is (= (strip (to-bson [{:$match {"description" form}}]))
(strip
......@@ -179,10 +179,10 @@
(doseq [[operator form input] [[:number/<= {"price" {"$lte" 42}} [42]]
[:number/>= {"price" {"$gte" 42}} [42]]
[:number/!= {"price" {"$ne" 42}} [42]]
[:number/= {"price" {"$eq" 42}} [42]]
;; i don't understand the $ on price here
[:number/between {"$and" [{"$expr" {"$gte" ["$price" 42]}}
{"$expr" {"$lte" ["$price" 142]}}]} [42 142]]]]
[:number/= {"price" 42} [42]]
[:number/between {"$and" [{"price" {"$gte" 42}}
{"price" {"$lte" 142}}]}
[42 142]]]]
(testing operator
(is (= (strip (to-bson [{:$match form}]))
(strip
......@@ -191,8 +191,8 @@
(testing "variadic operators"
(testing :string/=
;; this could be optimized to {:description {:in ["foo" "bar"}}?
(is (= (strip (to-bson [{:$match {"$or" [{"$expr" {"$eq" ["$description" "foo"]}}
{"$expr" {"$eq" ["$description" "bar"]}}]}}]))
(is (= (strip (to-bson [{:$match {"$or" [{"description" "foo"}
{"description" "bar"}]}}]))
(strip
(substitute {:desc (field-filter "description" :type/Text :string/= ["foo" "bar"])}
["[{$match: " (param :desc) "}]"])))))
......@@ -201,20 +201,20 @@
;; desugar middleware that does this [:= 1 2] -> [:or [:= 1] [:= 2]] which makes for more complicated (or just
;; verbose?) query where. perhaps we can introduce some notion of what is sugar and what isn't. I bet the line
;; between what the desugar "optimizes" and what the query processors optimize might be a bit blurry
(is (= (strip (to-bson [{:$match {"$and" [{"$expr" {"$ne" ["$description" "foo"]}}
{"$expr" {"$ne" ["$description" "bar"]}}]}}]))
(is (= (strip (to-bson [{:$match {"$and" [{"description" {"$ne" "foo"}}
{"description" {"$ne" "bar"}}]}}]))
(strip
(substitute {:desc (field-filter "description" :type/Text :string/!= ["foo" "bar"])}
["[{$match: " (param :desc) "}]"])))))
(testing :number/=
(is (= (strip (to-bson [{:$match {"$or" [{"$expr" {"$eq" ["$price" 1]}}
{"$expr" {"$eq" ["$price" 2]}}]}}]))
(is (= (strip (to-bson [{:$match {"$or" [{"price" 1}
{"price" 2}]}}]))
(strip
(substitute {:price (field-filter "price" :type/Integer :number/= [1 2])}
["[{$match: " (param :price) "}]"])))))
(testing :number/!=
(is (= (strip (to-bson [{:$match {"$and" [{"$expr" {"$ne" ["$price" 1]}}
{"$expr" {"$ne" ["$price" 2]}}]}}]))
(is (= (strip (to-bson [{:$match {"$and" [{"price" {"$ne" 1}}
{"price" {"$ne" 2}}]}}]))
(strip
(substitute {:price (field-filter "price" :type/Integer :number/!= [1 2])}
["[{$match: " (param :price) "}]"]))))))))
......
......@@ -112,7 +112,7 @@
(mt/dataset geographical-tips
(mt/with-everything-store
(is (= {:projections ["count"]
:query [{"$match" {"source.username" {"$eq" "tupac"}}}
:query [{"$match" {"source.username" "tupac"}}
{"$group" {"_id" nil, "count" {"$sum" 1}}}
{"$sort" {"_id" 1}}
{"$project" {"_id" false, "count" true}}],
......
(ns metabase.driver.mongo-test
"Tests for Mongo driver."
(:require [clojure.test :refer :all]
(:require [cheshire.core :as json]
[clojure.test :refer :all]
[medley.core :as m]
[metabase.automagic-dashboards.core :as magic]
[metabase.db.metadata-queries :as metadata-queries]
[metabase.driver :as driver]
[metabase.driver.mongo :as mongo]
[metabase.driver.mongo.util :as mongo.u]
[metabase.driver.util :as driver.u]
[metabase.mbql.util :as mbql.u]
[metabase.models.database :refer [Database]]
[metabase.models.field :refer [Field]]
[metabase.models.table :as table :refer [Table]]
[metabase.query-processor :as qp]
......@@ -15,6 +18,7 @@
[metabase.sync :as sync]
[metabase.test :as mt]
[metabase.test.data.interface :as tx]
[monger.collection :as mc]
[taoensso.nippy :as nippy]
[toucan.db :as db])
(:import org.bson.types.ObjectId))
......@@ -326,3 +330,49 @@
{"$sort" {"_id" 1}}
{"$project" {"_id" false, "count" true}}]
:collection "venues"}})))))))
(defn- create-database-from-row-maps! [database-name collection-name row-maps]
(or (db/select-one Database :engine "mongo", :name database-name)
(let [dbdef {:database-name database-name}]
;; destroy Mongo database if it already exists.
(tx/destroy-db! :mongo dbdef)
(let [details (tx/dbdef->connection-details :mongo :db dbdef)]
;; load rows
(mongo.u/with-mongo-connection [conn details]
(doseq [[i row] (map-indexed vector row-maps)
:let [row (assoc row :_id (inc i))]]
(try
(mc/insert conn collection-name row)
(catch Throwable e
(throw (ex-info (format "Error inserting row: %s" (ex-message e))
{:database database-name, :collection collection-name, :details details, :row row}
e)))))
(println (format "Inserted %d rows into %s collection %s."
(count row-maps) (pr-str database-name) (pr-str collection-name))))
;; now sync the Database.
(let [db (db/insert! Database {:name database-name, :engine "mongo", :details details})]
(sync/sync-database! db)
db)))))
(defn- json-from-file [^String filename]
(with-open [rdr (java.io.FileReader. (java.io.File. filename))]
(json/parse-stream rdr true)))
(defn- array-fields-db []
(create-database-from-row-maps!
"test-16299"
"coll"
(json-from-file "modules/drivers/mongo/test/metabase/driver/array-fields.json")))
(deftest filter-on-nested-column-no-results-test
(testing "Should return results when filtering on nested columns (#16299)"
(mt/test-driver :mongo
(mt/with-db (array-fields-db)
(is (= [["value_gf_a" 1] ["value_gf_b" 1]]
(mt/rows
(mt/run-mbql-query coll
{:filter [:and
[:= $coll.json_field.key_1 "value_jf_a" "value_jf_b"]
[:= $list_field "value_lf_a"]]
:aggregation [[:count]]
:breakout [$coll.metas.group_field]}))))))))
......@@ -31,7 +31,7 @@
(let [field-names (for [field-definition field-definitions]
(keyword (:field-name field-definition)))]
;; Use map-indexed so we can get an ID for each row (index + 1)
(doseq [[i row] (map-indexed (partial vector) rows)]
(doseq [[i row] (map-indexed vector rows)]
(try
;; Insert each row
(mc/insert mongo-db (name table-name) (into {:_id (inc i)}
......
......@@ -957,16 +957,18 @@
(testing "Syntactic sugar (`:time-interval` clause)"
(mt/dataset checkins:1-per-day
(is (= 1
(-> (mt/run-mbql-query checkins
{:aggregation [[:count]]
:filter [:time-interval $timestamp :current :day]})
mt/first-row first int)))
(ffirst
(mt/formatted-rows [int]
(mt/run-mbql-query checkins
{:aggregation [[:count]]
:filter [:time-interval $timestamp :current :day]})))))
(is (= 7
(-> (mt/run-mbql-query checkins
{:aggregation [[:count]]
:filter [:time-interval $timestamp :last :week]})
mt/first-row first int)))))))
(ffirst
(mt/formatted-rows [int]
(mt/run-mbql-query checkins
{:aggregation [[:count]]
:filter [:time-interval $timestamp :last :week]})))))))))
;; Make sure that when referencing the same field multiple times with different units we return the one that actually
;; reflects the units the results are in. eg when we breakout by one unit and filter by another, make sure the results
......
......@@ -6,7 +6,6 @@
[metabase.config :as config]
[metabase.driver :as driver]
[metabase.models :refer [Database Field FieldValues Table]]
[metabase.models.field :as field]
[metabase.plugins.classloader :as classloader]
[metabase.sync :as sync]
[metabase.test.data.dataset-definitions :as defs]
......@@ -195,16 +194,32 @@
(pr-str table-name) driver db-id (pr-str db-name)
(u/pprint-to-str (db/select-id->field :name Table, :db_id db-id, :active true)))))))))
(defn- qualified-field-name [{parent-id :parent_id, field-name :name}]
(if parent-id
(str (qualified-field-name (db/select-one Field :id parent-id))
\.
field-name)
field-name))
(defn- all-field-names [table-id]
(into {} (for [field (db/select Field :active true, :table_id table-id)]
[(u/the-id field) (qualified-field-name field)])))
(defn- the-field-id* [table-id field-name & {:keys [parent-id]}]
(or (db/select-one-id Field, :active true, :table_id table-id, :name field-name, :parent_id parent-id)
(let [{db-id :db_id, table-name :name} (db/select-one [Table :name :db_id] :id table-id)
{driver :engine, db-name :name} (db/select-one [Database :engine :name] :id db-id)
field-name (str \' field-name \' (when parent-id
(format " (parent: %d)" parent-id)))]
field-name (qualified-field-name {:parent_id parent-id, :name field-name})
all-field-names (all-field-names table-id)]
(throw
(Exception. (format "Couldn't find Field %s for Table %d '%s' (%s Database %d '%s') .\nFound: %s"
field-name table-id table-name driver db-id db-name
(u/pprint-to-str (db/select-id->field :name Field, :active true, :table_id table-id))))))))
(ex-info (format "Couldn't find Field %s for Table %s.\nFound:\n%s"
(pr-str field-name) (pr-str table-name) (u/pprint-to-str all-field-names))
{:field-name field-name
:table table-name
:table-id table-id
:database db-name
:database-id db-id
:all-fields all-field-names})))))
(defn the-field-id
"Internal impl of `(data/id table field)`."
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment