From eb25bc71135aa984046782957846a1e1f6f0a578 Mon Sep 17 00:00:00 2001 From: Howon Lee <hlee.howon@gmail.com> Date: Mon, 23 Aug 2021 14:33:24 -0700 Subject: [PATCH] Mongo custom expressions (#17117) Mongo custom expressions now are turned on for mongo 5.0 and after only. One idiosyncrasy is that we don't really support the BSON ID format at all in our typesystem despite having it in there, but both of my attempts to get them to work with the native mongo pipeline commands bounced because they really aren't strings, they're BSON ID's. We do rely on the pipeline commands heavily which is the reason for the version requirement. --- .circleci/config.yml | 10 +-- .../test/metabase-db/mongo/query.cy.spec.js | 2 +- .../mongo/src/metabase/driver/mongo.clj | 13 ++- .../metabase/driver/mongo/query_processor.clj | 80 ++++++++++++++++++- .../driver/mongo/query_processor_test.clj | 64 +++++++++++++++ .../mongo/test/metabase/driver/mongo_test.clj | 41 +++++++--- src/metabase/sync/interface.clj | 3 +- src/metabase/sync/sync_metadata.clj | 2 +- src/metabase/sync/sync_metadata/tables.clj | 35 +++++--- .../sync/sync_metadata/comments_test.clj | 4 +- 10 files changed, 223 insertions(+), 31 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 36ba30e462e..76122817b54 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -126,7 +126,7 @@ executors: working_directory: /home/circleci/metabase/metabase/ docker: - image: metabase/ci:circleci-java-11-clj-1.10.3.929-07-27-2021-node-browsers - - image: circleci/mongo:4.0 + - image: circleci/mongo:5.0 presto-186: working_directory: /home/circleci/metabase/metabase/ @@ -196,11 +196,11 @@ executors: resource_class: large - fe-mongo-4: + fe-mongo-5: working_directory: /home/circleci/metabase/metabase/ docker: - image: metabase/ci:circleci-java-11-clj-1.10.3.929-07-27-2021-node-browsers - - image: metabase/qa-databases:mongo-sample-4.0 + - image: metabase/qa-databases:mongo-sample-5.0 fe-postgres-12: working_directory: /home/circleci/metabase/metabase/ @@ -1266,10 +1266,10 @@ workflows: source-folder: << matrix.folder >> - fe-tests-cypress: - name: e2e-tests-mongo-4-<< matrix.edition >> + name: e2e-tests-mongo-5-<< matrix.edition >> requires: - build-uberjar-<< matrix.edition >> - e: fe-mongo-4 + e: fe-mongo-5 cypress-group: "mongo" source-folder: mongo qa-db: true diff --git a/frontend/test/metabase-db/mongo/query.cy.spec.js b/frontend/test/metabase-db/mongo/query.cy.spec.js index d0fe9073e84..10082c04edf 100644 --- a/frontend/test/metabase-db/mongo/query.cy.spec.js +++ b/frontend/test/metabase-db/mongo/query.cy.spec.js @@ -104,5 +104,5 @@ function writeNativeMongoQuery() { parseSpecialCharSequences: false, }); cy.get(".NativeQueryEditor .Icon-play").click(); - cy.findByText("18,760"); + cy.findByText("42"); } diff --git a/modules/drivers/mongo/src/metabase/driver/mongo.clj b/modules/drivers/mongo/src/metabase/driver/mongo.clj index 78e4833164a..a8bdd5f2b94 100644 --- a/modules/drivers/mongo/src/metabase/driver/mongo.clj +++ b/modules/drivers/mongo/src/metabase/driver/mongo.clj @@ -18,6 +18,7 @@ [monger.collection :as mc] [monger.command :as cmd] [monger.conversion :as m.conversion] + [monger.core :as mg] [monger.db :as mdb] monger.json [schema.core :as s] @@ -174,8 +175,9 @@ (defmethod driver/describe-database :mongo [_ database] (with-mongo-connection [^com.mongodb.DB conn database] - {:tables (set (for [collection (disj (mdb/get-collection-names conn) "system.indexes")] - {:schema nil, :name collection}))})) + {:tables (set (for [collection (disj (mdb/get-collection-names conn) "system.indexes")] + {:schema nil, :name collection})) + :version (get (mg/command conn {:buildInfo 1}) "version")})) (defn- table-sample-column-info "Sample the rows (i.e., documents) in `table` and return a map of information about the column keys we found in that @@ -215,6 +217,13 @@ :native-parameters]] (defmethod driver/supports? [:mongo feature] [_ _] true)) +(defmethod driver/database-supports? [:mongo :expressions] [_ _ db] + (let [version (some-> (get-in db [:details :version]) + (str/split #"\.") + first + Integer/parseInt)] + (and (some? version) (<= 5 version)))) + (defmethod driver/mbql->native :mongo [_ query] (qp/mbql->native query)) diff --git a/modules/drivers/mongo/src/metabase/driver/mongo/query_processor.clj b/modules/drivers/mongo/src/metabase/driver/mongo/query_processor.clj index 3b99478c65f..40716c2c15e 100644 --- a/modules/drivers/mongo/src/metabase/driver/mongo/query_processor.clj +++ b/modules/drivers/mongo/src/metabase/driver/mongo/query_processor.clj @@ -105,10 +105,18 @@ [field] (field->name field \.)) +(defmethod ->lvalue :expression + [[_ expression-name]] + expression-name) + (defmethod ->rvalue :default [x] x) +(defmethod ->rvalue :expression + [[_ expression-name]] + (->rvalue (mbql.u/expression-with-name (:query *query*) expression-name))) + (defmethod ->rvalue (class Field) [{coercion :coercion_strategy, :as field}] (let [field-name (str \$ (field->name field "."))] @@ -284,6 +292,77 @@ (u.date/add unit amount) (u.date/bucket unit))))))) +;;; ---------------------------------------------------- functions --------------------------------------------------- + +;; It doesn't make 100% sense to have lvalues for all these but it's a formal requirement + +(defmethod ->lvalue :avg [[_ inp]] (->lvalue inp)) +(defmethod ->lvalue :stddev [[_ inp]] (->lvalue inp)) +(defmethod ->lvalue :sum [[_ inp]] (->lvalue inp)) +(defmethod ->lvalue :min [[_ inp]] (->lvalue inp)) +(defmethod ->lvalue :max [[_ inp]] (->lvalue inp)) + +(defmethod ->lvalue :floor [[_ inp]] (->lvalue inp)) +(defmethod ->lvalue :ceil [[_ inp]] (->lvalue inp)) +(defmethod ->lvalue :round [[_ inp]] (->lvalue inp)) +(defmethod ->lvalue :abs [[_ inp]] (->lvalue inp)) + +(defmethod ->lvalue :log [[_ inp]] (->lvalue inp)) +(defmethod ->lvalue :exp [[_ inp]] (->lvalue inp)) +(defmethod ->lvalue :sqrt [[_ inp]] (->lvalue inp)) + +(defmethod ->lvalue :trim [[_ inp]] (->lvalue inp)) +(defmethod ->lvalue :ltrim [[_ inp]] (->lvalue inp)) +(defmethod ->lvalue :rtrim [[_ inp]] (->lvalue inp)) +(defmethod ->lvalue :upper [[_ inp]] (->lvalue inp)) +(defmethod ->lvalue :lower [[_ inp]] (->lvalue inp)) +(defmethod ->lvalue :length [[_ inp]] (->lvalue inp)) + +(defmethod ->lvalue :power [[_ & args]] (->lvalue (first args))) +(defmethod ->lvalue :replace [[_ & args]] (->lvalue (first args))) +(defmethod ->lvalue :concat [[_ & args]] (->lvalue (first args))) +(defmethod ->lvalue :substring [[_ & args]] (->lvalue (first args))) + +(defmethod ->lvalue :+ [[_ & args]] (->lvalue (first args))) +(defmethod ->lvalue :- [[_ & args]] (->lvalue (first args))) +(defmethod ->lvalue :* [[_ & args]] (->lvalue (first args))) +(defmethod ->lvalue :/ [[_ & args]] (->lvalue (first args))) + +(defmethod ->rvalue :coalesce [[_ & args]] (->lvalue (first args))) + +(defmethod ->rvalue :avg [[_ inp]] {"$avg" (->rvalue inp)}) +(defmethod ->rvalue :stddev [[_ inp]] {"$stdDevPop" (->rvalue inp)}) +(defmethod ->rvalue :sum [[_ inp]] {"$sum" (->rvalue inp)}) +(defmethod ->rvalue :min [[_ inp]] {"$min" (->rvalue inp)}) +(defmethod ->rvalue :max [[_ inp]] {"$max" (->rvalue inp)}) + +(defmethod ->rvalue :floor [[_ inp]] {"$floor" (->rvalue inp)}) +(defmethod ->rvalue :ceil [[_ inp]] {"$ceil" (->rvalue inp)}) +(defmethod ->rvalue :round [[_ inp]] {"$round" (->rvalue inp)}) +(defmethod ->rvalue :abs [[_ inp]] {"$abs" (->rvalue inp)}) + +(defmethod ->rvalue :log [[_ inp]] {"$log10" (->rvalue inp)}) +(defmethod ->rvalue :exp [[_ inp]] {"$exp" (->rvalue inp)}) +(defmethod ->rvalue :sqrt [[_ inp]] {"$sqrt" (->rvalue inp)}) + +(defmethod ->rvalue :trim [[_ inp]] {"$trim" (->rvalue inp)}) +(defmethod ->rvalue :ltrim [[_ inp]] {"$ltrim" (->rvalue inp)}) +(defmethod ->rvalue :rtrim [[_ inp]] {"$rtrim" (->rvalue inp)}) +(defmethod ->rvalue :upper [[_ inp]] {"$toUpper" (->rvalue inp)}) +(defmethod ->rvalue :lower [[_ inp]] {"$toLower" (->rvalue inp)}) +(defmethod ->rvalue :length [[_ inp]] {"$strLenCP" (->rvalue inp)}) + +(defmethod ->rvalue :power [[_ & args]] {"$pow" (mapv ->rvalue args)}) +(defmethod ->rvalue :replace [[_ & args]] {"$replaceAll" (mapv ->rvalue args)}) +(defmethod ->rvalue :concat [[_ & args]] {"$concat" (mapv ->rvalue args)}) +(defmethod ->rvalue :substring [[_ & args]] {"$substrCP" (mapv ->rvalue args)}) + +(defmethod ->rvalue :+ [[_ & args]] {"$add" (mapv ->rvalue args)}) +(defmethod ->rvalue :- [[_ & args]] {"$subtract" (mapv ->rvalue args)}) +(defmethod ->rvalue :* [[_ & args]] {"$multiply" (mapv ->rvalue args)}) +(defmethod ->rvalue :/ [[_ & args]] {"$divide" (mapv ->rvalue args)}) + +(defmethod ->rvalue :coalesce [[_ & args]] {"$ifNull" (mapv ->rvalue args)}) ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | CLAUSE APPLICATION | @@ -645,7 +724,6 @@ {$skip offset})) {$limit items-per-page}])))) - ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | Process & Run | ;;; +----------------------------------------------------------------------------------------------------------------+ diff --git a/modules/drivers/mongo/test/metabase/driver/mongo/query_processor_test.clj b/modules/drivers/mongo/test/metabase/driver/mongo/query_processor_test.clj index 1e1d91c094f..f88d3bafd21 100644 --- a/modules/drivers/mongo/test/metabase/driver/mongo/query_processor_test.clj +++ b/modules/drivers/mongo/test/metabase/driver/mongo/query_processor_test.clj @@ -153,6 +153,70 @@ [:distinct $price]] :limit 5}))))))) +(defn- extract-projections [projections q] + (select-keys (get-in q [:query 0 "$project"]) projections)) + +(deftest expressions-test + (mt/test-driver :mongo + (testing "Should be able to deal with expressions (#9382 is for BQ but we're doing it for mongo too)" + (is (= {"bob" "$latitude", "cobb" "$name"} + (extract-projections + ["bob" "cobb"] + (qp/query->native + (mt/mbql-query venues + {:fields [[:expression "bob"] [:expression "cobb"]] + :expressions {:bob [:field $latitude nil] + :cobb [:field $name nil]} + :limit 5})))))) + (testing "Should be able to deal with 1-arity functions" + (is (= {"cobb" {"$toUpper" "$name"}, + "bob" {"$abs" "$latitude"} } + (extract-projections + ["bob" "cobb"] + (qp/query->native + (mt/mbql-query venues + {:filters [[:expression "bob"] [:expression "cobb"]] + :expressions {:bob [:abs $latitude] + :cobb [:upper $name]} + :limit 5})))))) + (testing "Should be able to deal with 2-arity functions" + (is (= {"bob" {"$add" ["$price" 300]}} + (extract-projections + ["bob"] + (qp/query->native + (mt/mbql-query venues + {:filters [[:expression "bob"] [:expression "cobb"]] + :expressions {:bob [:+ $price 300]} + :limit 5})))))) + (testing "Should be able to deal with a little indirection" + (is (= {"bob" {"$abs" {"$subtract" ["$price" 300]}}} + (extract-projections + ["bob"] + (qp/query->native + (mt/mbql-query venues + {:filters [[:expression "bob"] [:expression "cobb"]] + :expressions {:bob [:abs [:- $price 300]]} + :limit 5})))))) + (testing "Should be able to deal with a little indirection, with an expression in" + (is (= {"bob" {"$abs" "$latitude"}, + "cobb" {"$ceil" {"$abs" "$latitude"}}} + (extract-projections + ["bob" "cobb"] + (qp/query->native + (mt/mbql-query venues + {:filters [[:expression "bob"] [:expression "cobb"]] + :expressions {:bob [:abs $latitude] + :cobb [:ceil [:expression "bob"]]} + :limit 5})))))) + (testing "Should be able to deal with coalescing" + (is (= {"bob" {"$ifNull" ["$latitude" "$price"]}} + (extract-projections + ["bob"] + (qp/query->native + (mt/mbql-query venues + {:expressions {:bob [:coalesce [:field $latitude nil] [:field $price nil]]} + :limit 5})))))))) + (deftest compile-time-interval-test (mt/test-driver :mongo (testing "Make sure time-intervals work the way they're supposed to." diff --git a/modules/drivers/mongo/test/metabase/driver/mongo_test.clj b/modules/drivers/mongo/test/metabase/driver/mongo_test.clj index 7300f05cd2a..f44ae69ae5f 100644 --- a/modules/drivers/mongo/test/metabase/driver/mongo_test.clj +++ b/modules/drivers/mongo/test/metabase/driver/mongo_test.clj @@ -61,10 +61,33 @@ :expected true} {:details {:conn-uri "mongodb://localhost:3000/bad-db-name?connectTimeoutMS=50"} :expected false}]] - (testing (str "connect with " details) - (is (= expected - (driver.u/can-connect-with-details? :mongo details)) - (str message)))))) + (testing (str "connect with " details) + (is (= expected + (driver.u/can-connect-with-details? :mongo details)) + (str message)))))) + +(deftest database-supports?-test +(mt/test-driver + :mongo + (doseq [{:keys [details expected]} [{:details {:host "localhost" + :port 3000 + :dbname "bad-db-name" + :version "5.0.0"} + :expected true} + {:details {} + :expected false} + {:details {:version nil} + :expected false} + {:details {:host "localhost" + :port 27017 + :dbname "metabase-test" + :version "2.2134234.lol"} + :expected false}]] + (testing (str "connect with " details) + (is (= expected + (let [db (db/insert! Database {:name "dummy", :engine "mongo", :details details})] + (driver/database-supports? :mongo :expressions db)))))))) + (def ^:private native-query "[{\"$project\": {\"_id\": \"$_id\"}}, @@ -96,11 +119,11 @@ (deftest describe-database-test (mt/test-driver :mongo - (is (= {:tables #{{:schema nil, :name "checkins"} - {:schema nil, :name "categories"} - {:schema nil, :name "users"} - {:schema nil, :name "venues"}}} - (driver/describe-database :mongo (mt/db)))))) + (is (= #{{:schema nil, :name "checkins"} + {:schema nil, :name "categories"} + {:schema nil, :name "users"} + {:schema nil, :name "venues"}} + (:tables (driver/describe-database :mongo (mt/db))))))) (deftest describe-table-test (mt/test-driver :mongo diff --git a/src/metabase/sync/interface.clj b/src/metabase/sync/interface.clj index 33747a9247f..bfab0baeb2f 100644 --- a/src/metabase/sync/interface.clj +++ b/src/metabase/sync/interface.clj @@ -17,7 +17,8 @@ (def DatabaseMetadata "Schema for the expected output of `describe-database`." - {:tables #{DatabaseMetadataTable}}) + {:tables #{DatabaseMetadataTable} + (s/optional-key :version) (s/maybe su/NonBlankString)}) (def TableMetadataField "Schema for a given Field as provided in `describe-table`." diff --git a/src/metabase/sync/sync_metadata.clj b/src/metabase/sync/sync_metadata.clj index fe7f29bc58f..f612cd49cc5 100644 --- a/src/metabase/sync/sync_metadata.clj +++ b/src/metabase/sync/sync_metadata.clj @@ -34,7 +34,7 @@ (def ^:private sync-steps [(sync-util/create-sync-step "sync-timezone" sync-tz/sync-timezone! sync-timezone-summary) ;; Make sure the relevant table models are up-to-date - (sync-util/create-sync-step "sync-tables" sync-tables/sync-tables! sync-tables-summary) + (sync-util/create-sync-step "sync-tables" sync-tables/sync-tables-and-database! sync-tables-summary) ;; Now for each table, sync the fields (sync-util/create-sync-step "sync-fields" sync-fields/sync-fields! sync-fields-summary) ;; Now for each table, sync the FKS. This has to be done after syncing all the fields to make sure target fields exist diff --git a/src/metabase/sync/sync_metadata/tables.clj b/src/metabase/sync/sync_metadata/tables.clj index 92f2392e0e2..5f3d4045bfd 100644 --- a/src/metabase/sync/sync_metadata/tables.clj +++ b/src/metabase/sync/sync_metadata/tables.clj @@ -3,6 +3,7 @@ (:require [clojure.data :as data] [clojure.string :as str] [clojure.tools.logging :as log] + [metabase.models.database :as db-model :refer [Database]] [metabase.models.humanization :as humanization] [metabase.models.table :as table :refer [Table]] [metabase.sync.fetch-metadata :as fetch-metadata] @@ -80,6 +81,14 @@ ;;; ---------------------------------------------------- Syncing ----------------------------------------------------- +(s/defn ^:private update-database-metadata! + "If there is a version in the db-metadata update the DB to have that in the DB model" + [database :- i/DatabaseInstance db-metadata :- i/DatabaseMetadata] + (log/info (trs "Found new version for DB: {0}" (:version db-metadata))) + (db/update! Database (u/the-id database) + :details + (assoc (:details database) :version (:version db-metadata)))) + ;; TODO - should we make this logic case-insensitive like it is for fields? (s/defn ^:private create-or-reactivate-tables! @@ -137,10 +146,11 @@ :description description)))) -(s/defn ^:private db-metadata :- #{i/DatabaseMetadataTable} - "Return information about `database` by calling its driver's implementation of `describe-database`." - [database :- i/DatabaseInstance] - (set (for [table (:tables (fetch-metadata/db-metadata database)) +(s/defn ^:private table-set :- #{i/DatabaseMetadataTable} + "So there exist tables for the user and metabase metadata tables for internal usage by metabase. + Get set of user tables only, excluding metabase metadata tables." + [db-metadata :- i/DatabaseMetadata] + (set (for [table (:tables db-metadata) :when (not (metabase-metadata/is-metabase-metadata-table? table))] table))) @@ -152,19 +162,26 @@ :db_id (u/the-id database) :active true)))) -(s/defn sync-tables! +(s/defn sync-tables-and-database! "Sync the Tables recorded in the Metabase application database with the ones obtained by calling `database`'s driver's - implementation of `describe-database`." + implementation of `describe-database`. + Also syncs the database metadata taken from describe-database if there is any" [database :- i/DatabaseInstance] ;; determine what's changed between what info we have and what's in the DB - (let [db-metadata (db-metadata database) + (let [db-metadata (fetch-metadata/db-metadata database) + db-tables (table-set db-metadata) our-metadata (our-metadata database) strip-desc (fn [metadata] (set (map #(dissoc % :description) metadata))) [new-tables old-tables] (data/diff - (strip-desc db-metadata) + (strip-desc db-tables) (strip-desc our-metadata)) - [changed-tables] (data/diff db-metadata our-metadata)] + [changed-tables] (data/diff db-tables our-metadata)] + ;; update database metadata from database + (when (some? (:version db-metadata)) + (sync-util/with-error-handling (format "Error creating/reactivating tables for %s" + (sync-util/name-for-logging database)) + (update-database-metadata! database db-metadata))) ;; create new tables as needed or mark them as active again (when (seq new-tables) (sync-util/with-error-handling (format "Error creating/reactivating tables for %s" diff --git a/test/metabase/sync/sync_metadata/comments_test.clj b/test/metabase/sync/sync_metadata/comments_test.clj index 28dda64b529..0a3b3d75a48 100644 --- a/test/metabase/sync/sync_metadata/comments_test.clj +++ b/test/metabase/sync/sync_metadata/comments_test.clj @@ -103,7 +103,7 @@ ;; change the description in metabase while the source table comment remains the same (db/update-where! Table {:id (mt/id "table_with_updated_desc")}, :description "updated table description") ;; now sync the DB again, this should NOT overwrite the manually updated description - (sync-tables/sync-tables! (mt/db)) + (sync-tables/sync-tables-and-database! (mt/db)) (is (= #{{:name (mt/format-name "table_with_updated_desc"), :description "updated table description"}} (db->tables (mt/db))))))))) @@ -114,6 +114,6 @@ ;; modify the source DB to add the comment and resync (driver/notify-database-updated driver/*driver* (mt/db)) (tx/create-db! driver/*driver* (basic-table "table_with_comment_after_sync" "added comment")) - (sync-tables/sync-tables! (mt/db)) + (sync-tables/sync-tables-and-database! (mt/db)) (is (= #{{:name (mt/format-name "table_with_comment_after_sync"), :description "added comment"}} (db->tables (mt/db)))))))) -- GitLab