From 473758e3d20b0db14af712a4168c6ffec601b604 Mon Sep 17 00:00:00 2001 From: Cam Saul <1455846+camsaul@users.noreply.github.com> Date: Thu, 9 Mar 2023 12:15:26 -0800 Subject: [PATCH] MLv2 clause and type determination overhaul (#29070) * MLv2 `define-mbql-clause` and `type-of` calculation * Continued [ci skip] * MLv2 overhaul :flex: --- shared/src/metabase/types.cljc | 25 ++ shared/test/metabase/types_test.cljc | 128 ++++---- src/metabase/lib/field.cljc | 6 +- src/metabase/lib/filter.cljc | 6 +- src/metabase/lib/schema.cljc | 7 + src/metabase/lib/schema/common.cljc | 8 + src/metabase/lib/schema/expression.cljc | 290 +++++++----------- .../lib/schema/expression/arithmetic.cljc | 14 + src/metabase/lib/schema/filter.cljc | 220 +++++++------ src/metabase/lib/schema/literal.cljc | 147 ++++----- src/metabase/lib/schema/literal/jvm.clj | 58 ++++ src/metabase/lib/schema/mbql_clause.cljc | 149 +++++++++ src/metabase/lib/schema/ref.cljc | 57 ++-- src/metabase/util.cljc | 2 + src/metabase/util/malli/registry.cljc | 5 + .../schema/expression/arithmetic_test.cljc | 33 ++ test/metabase/lib/schema/expression_test.cljc | 55 ---- test/metabase/lib/schema/filter_test.cljc | 101 ++++-- test/metabase/lib/schema/literal_test.cljc | 59 ++++ .../metabase/lib/schema/mbql_clause_test.cljc | 25 ++ test/metabase/lib/schema/ref_test.cljc | 17 + 21 files changed, 900 insertions(+), 512 deletions(-) create mode 100644 src/metabase/lib/schema/expression/arithmetic.cljc create mode 100644 src/metabase/lib/schema/literal/jvm.clj create mode 100644 src/metabase/lib/schema/mbql_clause.cljc create mode 100644 test/metabase/lib/schema/expression/arithmetic_test.cljc delete mode 100644 test/metabase/lib/schema/expression_test.cljc create mode 100644 test/metabase/lib/schema/literal_test.cljc create mode 100644 test/metabase/lib/schema/mbql_clause_test.cljc create mode 100644 test/metabase/lib/schema/ref_test.cljc diff --git a/shared/src/metabase/types.cljc b/shared/src/metabase/types.cljc index d7ec2245e30..9f747b9a0e2 100644 --- a/shared/src/metabase/types.cljc +++ b/shared/src/metabase/types.cljc @@ -335,6 +335,31 @@ [field] (field-is-type? :type/Temporal field)) +(defn- most-specific-common-ancestor* + "Impl for [[most-specific-common-ancestor]]." + [x y] + (cond + (= x :type/*) nil + (= y :type/*) nil + (isa? x y) y + (isa? y x) x + ;; if we haven't had a match yet, recursively try using parent types. + :else + (some (fn [x'] + (some (fn [y'] + (when-not (= [x' y'] [x y]) + (most-specific-common-ancestor* x' y'))) + (cons y (parents y)))) + (cons x (parents x))))) + +(defn most-specific-common-ancestor + "Return the most-specific type that is an ancestor of both `x` and `y`. + + (most-specific-common-ancestor :type/BigInteger :type/Decimal) => :type/Number" + [x y] + (or (most-specific-common-ancestor* x y) + :type/*)) + #?(:cljs (defn ^:export isa "Is `x` the same as, or a descendant type of, `y`?" diff --git a/shared/test/metabase/types_test.cljc b/shared/test/metabase/types_test.cljc index 104b32fad36..c56379c85ca 100644 --- a/shared/test/metabase/types_test.cljc +++ b/shared/test/metabase/types_test.cljc @@ -1,66 +1,78 @@ (ns metabase.types-test - #?@ - (:clj - [(:require - [clojure.test :as t] - [metabase.types :as types] - [metabase.types.coercion-hierarchies :as coercion-hierarchies])] - :cljs - [(:require - [cljs.test :as t :include-macros true] - [metabase.types :as types] - [metabase.types.coercion-hierarchies :as coercion-hierarchies])])) + (:require + [clojure.test :refer [deftest is testing are]] + [metabase.types :as types] + [metabase.types.coercion-hierarchies :as coercion-hierarchies])) -(comment types/keep-me) +(deftest ^:parallel most-specific-common-ancestor-test + (are [x y expected] (= expected + (types/most-specific-common-ancestor x y) + (types/most-specific-common-ancestor y x) + (types/most-specific-common-ancestor x expected) + (types/most-specific-common-ancestor y expected) + (types/most-specific-common-ancestor expected x) + (types/most-specific-common-ancestor expected y)) + :type/Integer :type/Integer :type/Integer + :type/Integer :type/BigInteger :type/Integer + :type/Integer :type/Float :type/Number + :type/BigInteger :type/Float :type/Number + :type/Integer :type/Decimal :type/Number + :type/BigInteger :type/Decimal :type/Number + :type/Integer :type/Text :type/* + :type/DateTimeWithZoneID :type/DateTimeWithZoneOffset :type/DateTimeWithTZ + :type/DateTimeWithZoneID :type/TimeWithZoneOffset :type/Temporal + nil :type/Integer :type/* + nil nil :type/* + :type/* :type/* :type/*)) (derive ::Coerce-Int-To-Str :Coercion/*) (coercion-hierarchies/define-types! ::Coerce-Int-To-Str :type/Integer :type/Text) -(t/deftest is-coercible?-test - (t/is (= true - (types/is-coercible? ::Coerce-Int-To-Str :type/Integer :type/Text))) - (t/testing "should be able to coerce from a subtype of base type" - (t/is (types/is-coercible? ::Coerce-Int-To-Str :type/BigInteger :type/Text))) - (t/testing "should NOT be able to coerce from a parent type of base type" - (t/is (not (types/is-coercible? ::Coerce-Int-To-Str :type/Number :type/Text)))) - (t/testing "should be able to coerce to a parent type of effective type" - (t/is (types/is-coercible? ::Coerce-Int-To-Str :type/Integer :type/*))) - (t/testing "should NOT be able to coerce to a subtype type of effective type" - (t/is (not (types/is-coercible? ::Coerce-Int-To-Str :type/Integer :type/UUID)))) - (t/testing "should be able to mix & match a bit." - (t/is (types/is-coercible? ::Coerce-Int-To-Str :type/BigInteger :type/*)))) +(deftest ^:parallel is-coercible?-test + (is (= true + (types/is-coercible? ::Coerce-Int-To-Str :type/Integer :type/Text))) + (testing "should be able to coerce from a subtype of base type" + (is (types/is-coercible? ::Coerce-Int-To-Str :type/BigInteger :type/Text))) + (testing "should NOT be able to coerce from a parent type of base type" + (is (not (types/is-coercible? ::Coerce-Int-To-Str :type/Number :type/Text)))) + (testing "should be able to coerce to a parent type of effective type" + (is (types/is-coercible? ::Coerce-Int-To-Str :type/Integer :type/*))) + (testing "should NOT be able to coerce to a subtype type of effective type" + (is (not (types/is-coercible? ::Coerce-Int-To-Str :type/Integer :type/UUID)))) + (testing "should be able to mix & match a bit." + (is (types/is-coercible? ::Coerce-Int-To-Str :type/BigInteger :type/*)))) (derive ::Coerce-BigInteger-To-Instant :Coercion/*) (coercion-hierarchies/define-types! ::Coerce-BigInteger-To-Instant :type/BigInteger :type/Instant) -(t/deftest coercion-possibilities-test - (t/is (= {:type/Text #{::Coerce-Int-To-Str} +(deftest ^:parallel coercion-possibilities-test + (is (= {:type/Text #{::Coerce-Int-To-Str} + :type/Instant #{:Coercion/UNIXMicroSeconds->DateTime + :Coercion/UNIXMilliSeconds->DateTime + :Coercion/UNIXSeconds->DateTime}} + (types/coercion-possibilities :type/Integer))) + (is (= {:type/Instant #{:Coercion/UNIXMicroSeconds->DateTime + :Coercion/UNIXMilliSeconds->DateTime + :Coercion/UNIXSeconds->DateTime}} + (types/coercion-possibilities :type/Decimal))) + + (testing "Should work for for subtypes of a the coercion base type(s)" + (is (= {:type/Text #{::Coerce-Int-To-Str} :type/Instant #{:Coercion/UNIXMicroSeconds->DateTime :Coercion/UNIXMilliSeconds->DateTime - :Coercion/UNIXSeconds->DateTime}} - (types/coercion-possibilities :type/Integer))) - (t/is (= {:type/Instant #{:Coercion/UNIXMicroSeconds->DateTime - :Coercion/UNIXMilliSeconds->DateTime - :Coercion/UNIXSeconds->DateTime}} - (types/coercion-possibilities :type/Decimal))) - - (t/testing "Should work for for subtypes of a the coercion base type(s)" - (t/is (= {:type/Text #{::Coerce-Int-To-Str} - :type/Instant #{:Coercion/UNIXMicroSeconds->DateTime - :Coercion/UNIXMilliSeconds->DateTime - :Coercion/UNIXSeconds->DateTime - ::Coerce-BigInteger-To-Instant}} - (types/coercion-possibilities :type/BigInteger)))) + :Coercion/UNIXSeconds->DateTime + ::Coerce-BigInteger-To-Instant}} + (types/coercion-possibilities :type/BigInteger)))) - (t/testing "Should *not* work for ancestor types of the coercion base type(s)" - (t/is (= nil - (types/coercion-possibilities :type/Number)))) - (t/testing "Non-inheritable coercions" + (testing "Should *not* work for ancestor types of the coercion base type(s)" + (is (= nil + (types/coercion-possibilities :type/Number)))) + (testing "Non-inheritable coercions" ;; type/* has a coercion :Coercion/YYYYMMDDHHMMSSBytes->Temporal - (t/is (= {:type/* #{:Coercion/YYYYMMDDHHMMSSBytes->Temporal}} - (types/coercion-possibilities :type/*))) + (is (= {:type/* #{:Coercion/YYYYMMDDHHMMSSBytes->Temporal}} + (types/coercion-possibilities :type/*))) ;; a random type descendant of type/* should not have this coercion - (t/is (= nil (types/coercion-possibilities :type/DruidHyperUnique))))) + (is (= nil (types/coercion-possibilities :type/DruidHyperUnique))))) (defn- keywords-in-namespace [keyword-namespace] (->> #?(:clj (var-get #'clojure.core/global-hierarchy) @@ -71,44 +83,44 @@ (filter #(= (namespace %) (name keyword-namespace))))) (defn- test-derived-from [a-type {:keys [required disallowed]}] - (t/testing a-type - (t/testing (str "should derive from one of" required) - (t/is (some + (testing a-type + (testing (str "should derive from one of" required) + (is (some (partial isa? a-type) required))) (doseq [t disallowed] - (t/testing (str "should NOT derive from " t) - (t/is (not (isa? a-type t))))))) + (testing (str "should NOT derive from " t) + (is (not (isa? a-type t))))))) (defn- test-keywords-in-namespace-derived-from [keyword-namespace options] (doseq [t (keywords-in-namespace keyword-namespace)] (test-derived-from t options))) -(t/deftest data-types-test +(deftest ^:parallel data-types-test (test-keywords-in-namespace-derived-from "type" {:required [:type/* :Semantic/* :Relation/*] :disallowed [#_:Semantic/* :Coercion/* #_:Relation/* :entity/*]})) -(t/deftest semantic-types-test +(deftest ^:parallel semantic-types-test (test-keywords-in-namespace-derived-from "Semantic" {:required [:Semantic/*] :disallowed [:Coercion/* :Relation/* :entity/*]})) -(t/deftest relation-types-test +(deftest ^:parallel relation-types-test (test-keywords-in-namespace-derived-from "Relation" {:required [:Relation/*] :disallowed [:type/* :Semantic/* :Coercion/* :entity/*]})) -(t/deftest coercion-strategies-test +(deftest ^:parallel coercion-strategies-test (test-keywords-in-namespace-derived-from "Coercion" {:required [:Coercion/*] :disallowed [:type/* :Semantic/* :Relation/* :entity/*]})) -(t/deftest entity-types-test +(deftest ^:parallel entity-types-test (test-keywords-in-namespace-derived-from "entity" {:required [:entity/*] diff --git a/src/metabase/lib/field.cljc b/src/metabase/lib/field.cljc index 9d022e7df05..f10cc3f5a53 100644 --- a/src/metabase/lib/field.cljc +++ b/src/metabase/lib/field.cljc @@ -4,10 +4,12 @@ [metabase.lib.dispatch :as lib.dispatch] [metabase.lib.metadata :as lib.metadata] [metabase.lib.options :as lib.options] - [metabase.lib.schema.ref :as lib.schema.ref] + [metabase.lib.schema.ref] [metabase.lib.temporal-bucket :as lib.temporal-bucket] [metabase.util.malli :as mu])) +(comment metabase.lib.schema.ref/keep-me) + (defmulti ^:private ->field {:arglists '([query stage-number field])} (fn [_query _stage-number field] @@ -39,7 +41,7 @@ [[_field options id-or-name] unit] [:field (assoc options :temporal-unit unit) id-or-name]) -(mu/defn field :- ::lib.schema.ref/field +(mu/defn field :- :mbql.clause/field "Create a `:field` clause." ([query x] (->field query -1 x)) diff --git a/src/metabase/lib/filter.cljc b/src/metabase/lib/filter.cljc index 0b033282f8e..32cfbf0503e 100644 --- a/src/metabase/lib/filter.cljc +++ b/src/metabase/lib/filter.cljc @@ -4,9 +4,11 @@ [metabase.lib.dispatch :as lib.dispatch] [metabase.lib.field :as lib.field] [metabase.lib.options :as lib.options] - [metabase.lib.schema.filter :as lib.schema.filter] + [metabase.lib.schema.filter] [metabase.util.malli :as mu])) +(comment metabase.lib.schema.filter/keep-me) + (defmulti ^:private ->filter-arg {:arglists '([query stage-number x])} (fn [_query _stage-number x] @@ -26,7 +28,7 @@ (mu/defn = :- [:or fn? - ::lib.schema.filter/=] + :mbql.clause/=] "Create an `=` filter clause." ([x y] (fn [query stage-number] diff --git a/src/metabase/lib/schema.cljc b/src/metabase/lib/schema.cljc index b9c1e9277c3..a9e6e88b747 100644 --- a/src/metabase/lib/schema.cljc +++ b/src/metabase/lib/schema.cljc @@ -10,12 +10,19 @@ [metabase.lib.schema.aggregation :as aggregation] [metabase.lib.schema.common :as common] [metabase.lib.schema.expression :as expression] + [metabase.lib.schema.expression.arithmetic] + [metabase.lib.schema.filter] [metabase.lib.schema.id :as id] [metabase.lib.schema.join :as join] + [metabase.lib.schema.literal] [metabase.lib.schema.order-by :as order-by] [metabase.lib.schema.ref :as ref] [metabase.util.malli.registry :as mr])) +(comment metabase.lib.schema.expression.arithmetic/keep-me + metabase.lib.schema.filter/keep-me + metabase.lib.schema.literal/keep-me) + (mr/def ::stage.native [:map [:lib/type [:= :mbql.stage/native]] diff --git a/src/metabase/lib/schema/common.cljc b/src/metabase/lib/schema/common.cljc index 60957e704b6..fc041acf381 100644 --- a/src/metabase/lib/schema/common.cljc +++ b/src/metabase/lib/schema/common.cljc @@ -1,8 +1,11 @@ (ns metabase.lib.schema.common (:require [clojure.string :as str] + [metabase.types] [metabase.util.malli.registry :as mr])) +(comment metabase.types/keep-me) + ;;; Schema for a string that cannot be blank. (mr/def ::non-blank-string [:and @@ -23,3 +26,8 @@ (mr/def ::options [:map [:lib/uuid ::uuid]]) + +(mr/def ::base-type + [:fn + {:error/message "valid base type"} + #(isa? % :type/*)]) diff --git a/src/metabase/lib/schema/expression.cljc b/src/metabase/lib/schema/expression.cljc index c782cbd0fa3..8be4f423ae7 100644 --- a/src/metabase/lib/schema/expression.cljc +++ b/src/metabase/lib/schema/expression.cljc @@ -1,215 +1,135 @@ (ns metabase.lib.schema.expression (:require - [malli.core :as mc] + [metabase.lib.dispatch :as lib.dispatch] [metabase.lib.schema.common :as common] - [metabase.lib.schema.filter :as filter] - [metabase.lib.schema.literal :as literal] - [metabase.lib.schema.ref :as ref] - [metabase.lib.schema.temporal-bucketing :as temporal-bucketing] + [metabase.shared.util.i18n :as i18n] [metabase.types] + [metabase.util.malli :as mu] [metabase.util.malli.registry :as mr])) (comment metabase.types/keep-me) -(defn- ref-of-base-type [base-type] +(defmulti type-of* + "Impl for [[type-of]]. Use [[type-of]], but implement [[type-of*]]. + + For MBQL clauses, try really hard not return an ambiguous set of possible types! Calculate things and determine what + the result type will be! + + If we don't have enough information to determine the type (e.g. a `:field` clause that needs a metadata provider to + determine the type), return `::expression/type.unknown`. This is a temporary workaround until we figure out how to + always have type info!" + {:arglists '([expr])} + (fn [x] + ;; For the fallback case: use the actual type/class name as the dispatch type rather than `:type/*`. This is so we + ;; can implement support for some platform-specific classes like `BigDecimal` or `java.time.OffsetDateTime`, for + ;; use inside QP code or whatever. In the future maybe we can add support for JS-specific stuff too. + (let [dispatch-value (lib.dispatch/dispatch-value x)] + (if (= dispatch-value :type/*) + (type x) + dispatch-value)))) + +(defmethod type-of* :default + [expr] + (throw (ex-info (i18n/tru "Don''t know how to determine the base type of {0}" (pr-str expr)) + {:expr expr}))) + +(defn- mbql-clause? [expr] + (and (vector? expr) + (keyword? (first expr)))) + +(mr/def ::base-type + [:or + [:= ::type.unknown] + ::common/base-type]) + +(mu/defn type-of :- [:or + ::base-type + [:set {:min 2} ::base-type]] + "Determine the type of an MBQL expression. Returns either a type keyword, or if the type is ambiguous, a set of + possible types." + [expr] + (or + ;; for MBQL clauses with `:base-type` in their options: ignore their dumb [[type-of*]] methods and return that type + ;; directly. Ignore everything else! Life hack! + (and (mbql-clause? expr) + (map? (second expr)) + (:base-type (second expr))) + (type-of* expr))) + +(defn- is-type? [x y] + (cond + (set? x) (some #(is-type? % y) x) + (set? y) (some #(is-type? x %) y) + (= x ::type.unknown) true + :else (isa? x y))) + +(defn type-of? + "Whether the [[type-of]] `expr` isa? [[metabase.types]] `base-type`." + [expr base-type] + (let [expr-type (type-of expr)] + (assert ((some-fn keyword? set?) expr-type) + (i18n/tru "type-of {0} returned an invalid type {1}" (pr-str expr) (pr-str expr-type))) + (is-type? expr-type base-type))) + +(defn- expression-schema + "Schema that matches the following rules: + + 1a. expression is *not* an MBQL clause, OR + + 1b. expression is an registered MBQL clause and matches the schema registered + with [[metabase.lib.schema.mbql-clause]], AND + + 2. expression's [[type-of]] isa? `base-type`" + [base-type description] [:and - [:ref ::ref/ref] + [:or + [:fn + {:error/message "This is shaped like an MBQL clause"} + (complement mbql-clause?)] + [:ref :metabase.lib.schema.mbql-clause/clause]] [:fn - {:error/message (str ":field, :expression, :or :aggregation with a " base-type " :base-type")} - (fn [[_ref opts]] - (isa? (:base-type opts) base-type))]]) + {:error/message description} + #(type-of? % base-type)]]) -(defn- ref-of-base-type-with-temporal-unit [base-type temporal-unit-schema] - [:and - (ref-of-base-type base-type) - [:fn - {:error/message (str ":field, :expression, :or :aggregation reference returning a " - base-type - " with a :temporal-unit that matches " - ;; TODO -- humanize this - (pr-str temporal-unit-schema))} - (let [validator (mc/validator temporal-unit-schema)] - (fn [[_ref {:keys [temporal-unit], :as _opts}]] - (validator temporal-unit)))]]) - -;;; An expression that we can filter on, or do case statements on, etc. (mr/def ::boolean - [:or - {:error/message "expression returning a boolean"} - [:ref ::literal/boolean] - [:ref ::filter/filter] - (ref-of-base-type :type/Boolean)]) + (expression-schema :type/Boolean "expression returning a boolean")) -;;; An expression that returns a string. (mr/def ::string - [:or - {:error/message "expression returning a string"} - [:ref ::literal/string] - (ref-of-base-type :type/Text)]) - -;;; `:length` clause: returns an integer -;;; -;;; [:length options <string-expression> -(mr/def ::length - [:tuple - [:= :length] - ::common/options - ::string]) - -;;; a `:*` clause with all integer expression arguments returns an integer. -(mr/def ::*.integer - [:schema - [:cat - [:= :*] - ::common/options - [:* [:schema [:ref ::integer]]]]]) - -;;; An expression that returns an integer. + (expression-schema :type/Text "expression returning a string")) + (mr/def ::integer - [:or - {:error/message "expression returning an integer"} - ::literal/integer - ::length - [:ref ::*.integer] - (ref-of-base-type :type/Integer) - ;; `:temporal-extract` units like `:day-of-month` actually return `:type/Integer` rather than a temporal type - (ref-of-base-type-with-temporal-unit :type/Date ::temporal-bucketing/unit.date.extract) - (ref-of-base-type-with-temporal-unit :type/Time ::temporal-bucketing/unit.time.extract) - (ref-of-base-type-with-temporal-unit :type/DateTime ::temporal-bucketing/unit.date-time.extract)]) - -;;; a `:*` clause that isn't an `::*.integer` (i.e., one or more clauses sub-expressions is a non-integer-real expression) -;;; returns a non-integer-real number -(mr/def ::*.non-integer-real - [:and - [:schema - [:cat - [:= :*] - ::common/options - [:* [:schema [:ref ::number]]]]] - [:not ::*.integer]]) - -;;; An expression that returns a number that includes a non-integer-real place, including but not limited floats, doubles, -;;; BigDecimals, SmallDecimals, MediumDecimals, etc! Basically any normal number that isn't an integer! + (expression-schema :type/Integer "expression returning an integer")) + (mr/def ::non-integer-real - [:or - {:error/message "expression returning a number with a non-integer-real place"} - [:ref ::literal/non-integer-real] - [:ref ::*.non-integer-real] - ;; for some crazy reason `:type/Float` is the common base type of all numbers with non-integer-real places, not something - ;; smart like `:type/Decimal` - (ref-of-base-type :type/Float)]) - -;;; Any expression that returns any kind of number. + (expression-schema :type/Float "expression returning a non-integer real number")) + (mr/def ::number - [:or - {:error/message "expression returning a number"} - ::integer - ::non-integer-real]) - -;;; `:datetime-add` with a Date expression: -;;; -;;; [:datetime-add <options> <date-expression> <amount> <date-interval-unit>] -(mr/def ::datetime-add.date - [:schema - [:tuple - [:= :datetime-add] - ::common/options - #_expression [:ref ::date] - #_amount [:ref ::integer] - #_unit ::temporal-bucketing/unit.date.interval]]) - -;;; Any expression that returns a `:type/Date` + (expression-schema :type/Number "expression returning a number")) + (mr/def ::date - [:or - {:error/message "expression returning a date"} - [:ref ::literal/date] - [:ref ::datetime-add.date] - (ref-of-base-type-with-temporal-unit :type/Date [:not ::temporal-bucketing/unit.date.extract]) - ;; TODO -- does a truncation of a :type/DateTime to `::temporal-bucketing/unit.date.truncate` like `:day` return a - ;; `:type/Date`? Should we act like it does? - #_(ref-of-base-type-with-temporal-unit :type/DateTime ::temporal-bucketing/unit.date.truncate)]) - -;;; `:datetime-add` with a Time expression: -;;; -;;; [:datetime-add <options> <time-expression> <amount> <time-interval-unit>] -(mr/def ::datetime-add.time - [:schema - [:tuple - [:= :datetime-add] - ::common/options - #_expression [:ref ::time] - #_amount [:ref ::integer] - #_unit ::temporal-bucketing/unit.time.interval]]) - -;;; Any expression that returns a `:type/Time` + (expression-schema :type/Date "expression returning a date")) + (mr/def ::time - [:or - {:error/message "expression returning a time"} - [:ref ::literal/time] - [:ref ::datetime-add.time] - (ref-of-base-type-with-temporal-unit :type/Time [:not ::temporal-bucketing/unit.time.extract])]) - -;;; `:datetime-add` with a DateTime expression: -;;; -;;; [:datetime-add <options> <date-time-expression> <amount> <date-time-interval-unit>] -(mr/def ::datetime-add.date-time - [:tuple - {:error/message ":datetime-add clause with a date time expression"} - [:= :datetime-add] - ::common/options - #_expression [:ref ::date-time] - #_amount [:ref ::integer] - #_unit ::temporal-bucketing/unit.date-time.interval]) - -;;; Any expression that returns a `:type/DateTime` -(mr/def ::date-time - [:or - {:error/message "expression returning a date time"} - [:ref ::literal/date-time] - [:ref ::datetime-add.date-time] - (ref-of-base-type-with-temporal-unit :type/DateTime [:not ::temporal-bucketing/unit.date-time.extract])]) + (expression-schema :type/Time "expression returning a time")) + +(mr/def ::datetime + (expression-schema :type/DateTime "expression returning a date time")) -;;; Any expression that returns some sort of temporal value `java.time.OffsetDateTime` (mr/def ::temporal - [:or - {:error/message "expression returning a date, time, or date time"} - ::date - ::time - ::date-time]) + (expression-schema :type/Temporal "expression returning a date, time, or date time")) -;;; Any type of expression that you can appear in an `:order-by` clause, or in a filter like `:>` or `:<=`. This is -;;; basically everything except for boolean expressions. (mr/def ::orderable - [:or - ::string - ::number - ::temporal - ;; we'll also assume Fields all orderable. This isn't true of all fields but we're not smart enough yet to attach - ;; expression types to Fields. Maybe if we were smarter we could do that. Should every `:field` include - ;; `:base-type` info? - [:ref ::ref/ref]]) - -;;; Any type of expression that can appear in an `:=` or `!=`. I guess this is currently everything? + (expression-schema #{:type/Text :type/Number :type/Temporal} + "an expression that can be compared with :> or :<")) + (mr/def ::equality-comparable [:maybe - [:or - ::boolean - ::string - ::number - ::temporal - [:ref ::ref/ref]]]) + (expression-schema #{:type/Boolean :type/Text :type/Number :type/Temporal} + "an expression that can appear in := or :!=")]) -;;; Any sort of expression at all. +;;; any type of expression. (mr/def ::expression - [:maybe - [:or - ::boolean - ::string - ::number - ::temporal - [:ref ::ref/ref] - ;; this is here for now until we fill this schema out with all of the possibilities. - any?]]) + [:maybe (expression-schema :type/* "any type of expression")]) ;;; the `:expressions` definition map as found as a top-level key in an MBQL stage (mr/def ::expressions diff --git a/src/metabase/lib/schema/expression/arithmetic.cljc b/src/metabase/lib/schema/expression/arithmetic.cljc new file mode 100644 index 00000000000..c481f3338ad --- /dev/null +++ b/src/metabase/lib/schema/expression/arithmetic.cljc @@ -0,0 +1,14 @@ +(ns metabase.lib.schema.expression.arithmetic + "Arithmetic expressions like `:+`." + (:require + [metabase.lib.schema.expression :as expression] + [metabase.lib.schema.mbql-clause :as mbql-clause] + [metabase.types :as types])) + +(mbql-clause/define-catn-mbql-clause :* + [:args [:repeat {:min 2} [:schema [:ref ::expression/number]]]]) + +(defmethod expression/type-of* :* + [[_tag _opts & args]] + #_{:clj-kondo/ignore [:reduce-without-init]} + (reduce types/most-specific-common-ancestor (map expression/type-of args))) diff --git a/src/metabase/lib/schema/filter.cljc b/src/metabase/lib/schema/filter.cljc index cd0bc3fb86d..742620202a4 100644 --- a/src/metabase/lib/schema/filter.cljc +++ b/src/metabase/lib/schema/filter.cljc @@ -2,72 +2,76 @@ "Schemas for the various types of filter clauses that you'd pass to `:filter` or use inside something else that takes a boolean expression." (:require + [clojure.set :as set] [metabase.lib.schema.common :as common] - [metabase.lib.schema.ref :as ref] + [metabase.lib.schema.expression :as expression] + [metabase.lib.schema.mbql-clause :as mbql-clause] + [metabase.types :as types] [metabase.util.malli.registry :as mr])) -(def ^:private eq-comparable - ;; avoid circular refs between these namespaces. - [:schema [:ref :metabase.lib.schema.expression/equality-comparable]]) +(doseq [op [:and :or]] + (mbql-clause/define-catn-mbql-clause op :- :type/Boolean + [:args [:repeat {:min 2} [:schema [:ref ::expression/boolean]]]])) -(def ^:private orderable - ;; avoid circular refs between these namespaces. - [:schema [:ref :metabase.lib.schema.expression/orderable]]) +(mbql-clause/define-tuple-mbql-clause :not :- :type/Boolean + [:ref ::expression/boolean]) -(defn- defclause - [sch & args] - {:pre [(qualified-keyword? sch)]} - (mr/def sch - [:catn - [:clause [:= (keyword (name sch))]] - [:options ::common/options] - (into [:args] args)])) +(doseq [op [:= :!=]] + (mbql-clause/define-catn-mbql-clause op :- :type/Boolean + [:args [:repeat {:min 2} [:schema [:ref ::expression/equality-comparable]]]])) -(doseq [op [::and ::or]] - (defclause op [:repeat {:min 2} [:schema [:ref ::filter]]])) +(doseq [op [:< :<= :> :>=]] + (mbql-clause/define-tuple-mbql-clause op :- :type/Boolean + #_x [:ref ::expression/orderable] + #_y [:ref ::expression/orderable])) -(defclause ::not - [:schema [:ref ::filter]]) - -(doseq [op [::= ::!=]] - (defclause op - [:repeat {:min 2} eq-comparable])) - -(doseq [op [::< ::<= ::> ::>=]] - (defclause op - [:cat orderable orderable])) - -(defclause ::between - [:catn [:field orderable] [:min orderable] [:max orderable]]) +(mbql-clause/define-tuple-mbql-clause :between :- :type/Boolean + ;; TODO -- we should probably enforce additional constraints that the various arg types have to agree, e.g. it makes + ;; no sense to say something like `[:between {} <date> <[:ref ::expression/string]> <integer>]` + #_expr [:ref ::expression/orderable] + #_min [:ref ::expression/orderable] + #_max [:ref ::expression/orderable]) ;; sugar: a pair of `:between` clauses -(defclause ::inside - [:catn - [:lat-field orderable] - [:lon-field orderable] - [:lat-max orderable] - [:lon-min orderable] - [:lat-min orderable] - [:lon-max orderable]]) - -;; [:= ... nil], [:!= ... nil], [:or [:= ... nil] [:= ... ""]], [:and [:!= ... nil] [:!= ... ""]] -(doseq [op [::is-null ::not-null ::is-empty ::not-empty]] - (defclause op - ::ref/field)) +(mbql-clause/define-tuple-mbql-clause :inside :- :type/Boolean + #_lat-field [:ref ::expression/orderable] + #_lon-field [:ref ::expression/orderable] + #_lat-max [:ref ::expression/orderable] + #_lon-min [:ref ::expression/orderable] + #_lat-min [:ref ::expression/orderable] + #_lon-max [:ref ::expression/orderable]) + +;;; null checking expressions +;;; +;;; these are sugar for [:= ... nil] and [:!= ... nil] respectively +(doseq [op [:is-null :not-null]] + (mbql-clause/define-tuple-mbql-clause op :- :type/Boolean + [:ref ::expression/expression])) + +;;; one-arg [:ref ::expression/string] filter clauses +;;; +;;; :is-empty is sugar for [:or [:= ... nil] [:= ... ""]] +;;; +;;; :not-empty is sugar for [:and [:!= ... nil] [:!= ... ""]] +(doseq [op [:is-empty :not-empty]] + (mbql-clause/define-tuple-mbql-clause op :- :type/Boolean + [:ref ::expression/string])) (def ^:private string-filter-options [:map [:case-sensitive {:optional true} :boolean]]) ; default true -(def ^:private string - [:schema [:ref :metabase.lib.schema.expression/string]]) - +;; binary [:ref ::expression/string] filter clauses. These also accept a `:case-sensitive` option +;; +;; `:does-not-contain` is sugar for `[:not [:contains ...]]`: +;; ;; [:does-not-contain ...] = [:not [:contains ...]] -(doseq [op [::starts-with ::ends-with ::contains ::does-not-contain]] - (mr/def op - [:catn - [:clause [:= (keyword (name op))]] - [:options [:merge ::common/options string-filter-options]] - [:args [:catn [:whole string] [:part string]]]])) +(doseq [op [:starts-with :ends-with :contains :does-not-contain]] + (mbql-clause/define-mbql-clause op :- :type/Boolean + [:tuple + [:= op] + [:merge ::common/options string-filter-options] + #_whole [:ref ::expression/string] + #_part [:ref ::expression/string]])) (def ^:private time-interval-options [:map [:include-current {:optional true} :boolean]]) ; default false @@ -76,43 +80,75 @@ [:enum :default :minute :hour :day :week :month :quarter :year]) ;; SUGAR: rewritten as a filter clause with a relative-datetime value -(mr/def ::time-interval - [:catn - [:clause [:= :time-interval]] - [:options [:merge ::common/options time-interval-options]] - [:args [:catn - [:field ::ref/field] - [:n [:or :int [:enum :current :last :next]]] - [:unit relative-datetime-unit]]]]) - -(defclause ::segment - [:catn [:segment-id [:or ::common/int-greater-than-zero ::common/non-blank-string]]]) - -(defclause ::case - [:+ [:catn - [:pred [:schema [:ref ::filter]]] - [:expr [:schema [:ref :metabase.lib.schema.expression/boolean]]]]]) - -;; Boolean literals are not included here because they are not very portable -;; across different databases. In places where they should also be allowed -;; the :metabase.lib.schema.expression/boolean schema can be used. -(mr/def ::filter - [:or - [:ref ::ref/field] - ;; primitive clauses - ::and - ::or - ::not - ::= ::!= - ::< ::<= - ::> ::>= - ::between - ::starts-with ::ends-with ::contains - ::case - ;; sugar clauses - ::inside - ::is-null ::not-null - ::is-empty ::not-empty - ::does-not-contain - ::time-interval - ::segment]) +(mbql-clause/define-mbql-clause :time-interval :- :type/Boolean + ;; TODO -- we should probably further constraint this so you can't do weird stuff like + ;; + ;; [:time-interval {} <time> :current :year] + ;; + ;; using units that don't agree with the expr type + [:tuple + [:= :time-interval] + [:merge ::common/options time-interval-options] + #_expr [:ref ::expression/temporal] + #_n [:or + [:enum :current :last :next] + ;; I guess there's no reason you shouldn't be able to do something like 1 + 2 in here + [:ref ::expression/integer]] + #_unit relative-datetime-unit]) + +;; segments are guaranteed to return valid filter clauses and thus booleans, right? +(mbql-clause/define-mbql-clause :segment :- :type/Boolean + [:tuple + [:= :segment] + ::common/options + [:or ::common/int-greater-than-zero ::common/non-blank-string]]) + +;;; believe it or not, a `:case` clause really has the syntax [:case {} [[pred1 expr1] [pred2 expr2] ...]] +(mr/def ::case-subclause + [:tuple + {:error/message "Valid :case [pred expr] pair"} + #_pred [:ref ::expression/boolean] + #_expr [:ref ::expression/expression]]) + +;;; TODO -- this is not really a filter clause and doesn't belong in here. But where does it belong? +(mbql-clause/define-tuple-mbql-clause :case + ;; TODO -- we should further constrain this so all of the exprs are of the same type + [:sequential {:min 1} [:ref ::case-subclause]]) + +;;; the logic for calculating the return type of a `:case` statement is not optimal nor perfect. But it should be ok +;;; for now and errors on the side of being permissive. See this Slack thread for more info: +;;; https://metaboat.slack.com/archives/C04DN5VRQM6/p1678325996901389 +(defmethod expression/type-of* :case + [[_tag _opts pred-expr-pairs]] + (reduce + (fn [best-guess [_pred expr]] + (let [return-type (expression/type-of expr)] + (cond + (nil? best-guess) + return-type + + ;; if both types are keywords return their most-specific ancestor. + (and (keyword? best-guess) + (keyword? return-type)) + (types/most-specific-common-ancestor best-guess return-type) + + ;; if one type is a specific type but the other is an ambiguous union of possible types, return the specific + ;; type. A case can't possibly have multiple different return types, so if one expression has an unambiguous + ;; type then the whole thing has to have a compatible type. + (keyword? best-guess) + best-guess + + (keyword? return-type) + return-type + + ;; if both types are ambiguous unions of possible types then return the intersection of the two. But if the + ;; intersection is empty, return the union of everything instead. I don't really want to go down a rabbit + ;; hole of trying to find the intersection between the most-specific common ancestors + :else + (or (when-let [intersection (not-empty (set/intersection best-guess return-type))] + (if (= (count intersection) 1) + (first intersection) + intersection)) + (set/union best-guess return-type))))) + nil + pred-expr-pairs)) diff --git a/src/metabase/lib/schema/literal.cljc b/src/metabase/lib/schema/literal.cljc index 425c52ba199..ec74d018f39 100644 --- a/src/metabase/lib/schema/literal.cljc +++ b/src/metabase/lib/schema/literal.cljc @@ -1,23 +1,48 @@ (ns metabase.lib.schema.literal "Malli schemas for string, temporal, number, and boolean literals." (:require - [metabase.util.malli.registry :as mr])) + [malli.core :as mc] + [metabase.lib.schema.expression :as expression] + [metabase.util.malli.registry :as mr] + #?@(:clj ([metabase.lib.schema.literal.jvm])))) + +(defmethod expression/type-of* :dispatch-type/nil + [_nil] + :type/*) + +(mr/def ::boolean + :boolean) + +(defmethod expression/type-of* :dispatch-type/boolean + [_bool] + :type/Boolean) (mr/def ::boolean :boolean) -;;; TODO -- for Clojure, we should also add BigInteger (mr/def ::integer - :int) + #?(:clj [:or + :int + :metabase.lib.schema.literal.jvm/big-integer] + :cljs :int)) + +(defmethod expression/type-of* :dispatch-type/integer + [_int] + :type/Integer) -;;; TODO -- for Clojure, we should also add BigDecimal, and `:float`: -;;; -;;; (malli.core/validate :double (float 1.0)) => false -;;; ;;; we should probably also restrict this to disallow NaN and positive/negative infinity, I don't know in what ;;; universe we'd want to allow those if they're not disallowed already. (mr/def ::non-integer-real - :double) + #?(:clj [:or + :double + :metabase.lib.schema.literal.jvm/float + :metabase.lib.schema.literal.jvm/big-decimal] + :cljs :double)) + +(defmethod expression/type-of* :dispatch-type/number + [_non-integer-real] + ;; `:type/Float` is the 'base type' of all non-integer real number types in [[metabase.types]] =( + :type/Float) (mr/def ::string :string) @@ -25,85 +50,69 @@ ;;; TODO -- these temporal literals could be a little stricter, right now they are pretty permissive, you shouldn't be ;;; allowed to have month `13` or `02-29` for example -(def ^:private date-part-regex #"\d{4}-\d{2}-\d{2}") - -(def ^:private time-part-regex #"\d{2}:\d{2}(?::\d{2}(?:\.\d{3})?)?") - -(def ^:private offset-part-regex - ;; I think technically a zone offset can have a seconds part too but let's not worry too much about supporting that - ;; for now. - (re-pattern (str "(?:Z|" time-part-regex ")"))) +;;; TODO -- these were split out into separate parts originally but apparently string <-> re-pattern conversion +;;; doesn't work in Cljs the way it works in Clj (def ^:private local-date-regex - (re-pattern (str "^" date-part-regex "$"))) + #"^\d{4}-\d{2}-\d{2}$") (def ^:private local-time-regex - (re-pattern (str "^" time-part-regex "$"))) + #"^\d{2}:\d{2}(?::\d{2}(?:\.\d{3})?)?$") (def ^:private offset-time-regex - (re-pattern (str "^" time-part-regex offset-part-regex "$"))) + #"^\d{2}:\d{2}(?::\d{2}(?:\.\d{3})?)?(?:Z|\d{2}:\d{2}(?::\d{2}(?:\.\d{3})?)?)$") -(def ^:private local-date-time-regex - (re-pattern (str "^" date-part-regex "T" time-part-regex "$"))) +(def ^:private local-datetime-regex + #"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}(?::\d{2}(?:\.\d{3})?)?$") -(def ^:private offset-date-time-regex - (re-pattern (str "^" date-part-regex "T" time-part-regex offset-part-regex "$"))) +(def ^:private offset-datetime-regex + #"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}(?::\d{2}(?:\.\d{3})?)?(?:Z|\d{2}:\d{2}(?::\d{2}(?:\.\d{3})?)?)$") -;; e.g. 2022-02-23 -(mr/def ::date.local - #?(:clj [:or - :time/local-date - [:re local-date-regex]] - :cljs [:re local-date-regex])) + +(mr/def ::string.date + [:re local-date-regex]) + +(mr/def ::string.time + [:or + [:re local-time-regex] + [:re offset-time-regex]]) + +(mr/def ::string.datetime + [:or + [:re local-datetime-regex] + [:re offset-datetime-regex]]) + +(defmethod expression/type-of* :dispatch-type/string + [s] + (condp mc/validate s + ::string.datetime #{:type/Text :type/DateTime} + ::string.date #{:type/Text :type/Date} + ::string.time #{:type/Text :type/Time} + :type/Text)) (mr/def ::date - ;; we don't currently support offset dates :shrug: - ::date.local) + #?(:clj [:or + :time/local-date + ::string.date] + :cljs ::string.date)) -;; e.g. 13:12 or 13:12:00 or 13:12:00.000 -(mr/def ::time.local +(mr/def ::time #?(:clj [:or + ::string.time :time/local-time - [:re local-time-regex]] - :cljs [:re local-time-regex])) + :time/offset-time] + :cljs ::string.time)) -(mr/def ::time.offset +(mr/def ::datetime #?(:clj [:or - :time/offset-time - [:re offset-time-regex]] - :cljs [:re offset-time-regex])) - -(mr/def ::time - [:or - ::time.local - ::time.offset]) - -(mr/def ::date-time.local - (let [re [:re {:error/message "local date time string literal"} local-date-time-regex]] - #?(:clj [:or - {:error/message "local date time literal"} - [:schema - {:error/message "instance of java.time.LocalDateTime"} - :time/local-date-time] - re] - :cljs re))) - -(mr/def ::date-time.offset - (let [re [:re {:error/message "offset date time string literal"} offset-date-time-regex]] - #?(:clj [:or - :time/offset-date-time - :time/zoned-date-time - re] - :cljs re))) - -(mr/def ::date-time - [:or - {:error/message "date time literal"} - ::date-time.local - ::date-time.offset]) + ::string.datetime + :time/local-date-time + :time/offset-date-time + :time/zoned-date-time] + :cljs ::string.datetime)) (mr/def ::temporal [:or ::date ::time - ::date-time]) + ::datetime]) diff --git a/src/metabase/lib/schema/literal/jvm.clj b/src/metabase/lib/schema/literal/jvm.clj new file mode 100644 index 00000000000..5497d0e2531 --- /dev/null +++ b/src/metabase/lib/schema/literal/jvm.clj @@ -0,0 +1,58 @@ +(ns metabase.lib.schema.literal.jvm + "JVM-specific literal definitions." + (:require + [metabase.lib.schema.expression :as expression] + [metabase.util.malli.registry :as mr])) + +(set! *warn-on-reflection* true) + +(defn- instance-of [^Class klass] + [:fn {:error/message (str "instance of " (.getName klass))} + #(instance? klass %)]) + +(mr/def ::big-integer + [:or + (instance-of java.math.BigInteger) + (instance-of clojure.lang.BigInt)]) + +(defmethod expression/type-of* java.math.BigInteger + [_n] + :type/BigInteger) + +(defmethod expression/type-of* clojure.lang.BigInt + [_n] + :type/BigInteger) + +(mr/def ::big-decimal + (instance-of java.math.BigDecimal)) + +(defmethod expression/type-of* java.math.BigDecimal + [_n] + :type/Decimal) + +(mr/def ::float + (instance-of Float)) + +(defmethod expression/type-of* java.time.LocalDate + [_t] + :type/DateTime) + +(defmethod expression/type-of* java.time.LocalTime + [_t] + :type/Time) + +(defmethod expression/type-of* java.time.OffsetTime + [_t] + :type/TimeWithTZ) + +(defmethod expression/type-of* java.time.LocalDateTime + [_t] + :type/DateTime) + +(defmethod expression/type-of* java.time.OffsetDateTime + [_t] + :type/DateTimeWithZoneOffset) + +(defmethod expression/type-of* java.time.ZonedDateTime + [_t] + :type/DateTimeWithZoneID) diff --git a/src/metabase/lib/schema/mbql_clause.cljc b/src/metabase/lib/schema/mbql_clause.cljc new file mode 100644 index 00000000000..a9c49e5341f --- /dev/null +++ b/src/metabase/lib/schema/mbql_clause.cljc @@ -0,0 +1,149 @@ +(ns metabase.lib.schema.mbql-clause + (:require + [metabase.lib.schema.common :as common] + [metabase.lib.schema.expression :as expression] + [metabase.types] + [metabase.util.malli :as mu] + [metabase.util.malli.registry :as mr])) + +(comment metabase.types/keep-me) + +(defonce ^:private ^{:doc "Set of all registered MBQL clause tags e.g. #{:starts-with}"} tag-registry + (atom #{})) + +(defn- tag-schema + "Build the schema for `::tag`, for a valid MBQL clause tag." + [] + (into [:enum] (sort @tag-registry))) + +(defn- update-tag-schema! [] + (mr/def ::tag + (tag-schema))) + +(defn- tag->registered-schema-name + "Given an MBQL clause tag like `:starts-with`, return the name of the schema we'll register for it, e.g. + `:mbql.clause/starts-with`." + [tag] + (keyword "mbql.clause" (name tag))) + +(defn- clause*-schema + "Build the schema for `::clause*`, a `:multi` schema that maps MBQL clause tag -> the schema + in [[clause-schema-registry]]." + [] + (into [:multi {:dispatch first + :error/fn (fn [{:keys [value]} _] + (if (vector? value) + (str "Invalid " (pr-str (first value)) " clause: " (pr-str value)) + "not an MBQL clause"))}] + (map (fn [tag] + [tag [:ref (tag->registered-schema-name tag)]])) + @tag-registry)) + +(defn- update-clause-schema! [] + (mr/def ::clause* + (clause*-schema))) + +;;; whenever [[tag-registry]] is updated, update the `::tag` and `::clause*` schemas. +(add-watch tag-registry + ::update-schemas + (fn [_key _ref _old-state _new-state] + (update-tag-schema!) + (update-clause-schema!))) + +;;; create initial, empty definitions of `::tag` and `::clause*` +(update-tag-schema!) +(update-clause-schema!) + +(mr/def ::clause + [:and + [:schema + [:cat + [:schema [:ref ::tag]] + [:* any?]]] + [:ref ::clause*]]) + +(mu/defn define-mbql-clause + "Register the `schema` for an MBQL clause with `tag` keyword, and update the `:metabase.lib.schema.mbql-clause/clause` + so it knows about this clause. Optionally specify the [[expression/type-of]] that this clause returns, inline, if + the clause always returns a certain type; otherwise you can implement [[expression/type-of]] separately. + + (define-mbql-clause :is-null :- :type/Boolean + [:tuple + [:= :is-null] + ::common/options + [:ref :metabase.lib.schema.expression/expression]])" + ([tag :- simple-keyword? + schema] + (let [schema-name (tag->registered-schema-name tag)] + (mr/def schema-name schema) + ;; only need to update the registry and calculated schemas if this is the very first time we're defining this + ;; clause. Otherwise since they're wrapped in `:ref` we don't need to recalculate them. This way we can avoid tons + ;; of pointless recalculations every time we reload a namespace. + (when-not (contains? @tag-registry tag) + (swap! tag-registry conj tag))) + nil) + + ([tag :- simple-keyword? + _arrow :- [:= :-] + return-type :- [:fn {:error/message "valid base type"} #(isa? % :type/*)] + schema] + (define-mbql-clause tag schema) + (defmethod expression/type-of* tag + [_clause] + return-type) + nil)) + +;;; TODO -- add more stuff. + +(defn catn-clause-schema + "Helper intended for use with [[define-mbql-clause]]. Create an MBQL clause schema with `:catn`. Use this for clauses + with variable length. For clauses with fixed argument length, use [[tuple-clause-schema]] instead, since that gives + slight better error messages and doesn't love to complain about 'potentially recursive seqexes' when you forget to + wrap args in `:schema`." + [tag & args] + {:pre [(simple-keyword? tag) + (every? vector? args) + (every? keyword? (map first args))]} + [:schema + (into [:catn + {:error/message (str "Valid " tag " clause")} + [:tag [:= tag]] + [:options ::common/options]] + args)]) + +(defn tuple-clause-schema + "Helper intended for use with [[define-mbql-clause]]. Create a clause schema with `:tuple`. Use this for fixed-length + MBQL clause schemas. Use [[catn-clause-schema]] for variable-length schemas." + [tag & args] + {:pre [(simple-keyword? tag)]} + (into [:tuple + {:error/message (str "Valid " tag " clause")} + [:= tag] + ::common/options] + args)) + +;;;; Even more convenient functions! + +(defn- define-mbql-clause-with-schema-fn [schema-fn tag & args] + (let [[return-type & args] (if (= (first args) :-) + (cons (second args) (drop 2 args)) + (cons nil args)) + schema (apply schema-fn tag args)] + (if return-type + (define-mbql-clause tag :- return-type schema) + (define-mbql-clause tag schema)))) + +(defn define-tuple-mbql-clause + "Helper. Combines [[define-mbql-clause]] and [[tuple-clause-schema]]." + [tag & args] + (apply define-mbql-clause-with-schema-fn tuple-clause-schema tag args)) + +(defn define-catn-mbql-clause + "Helper. Combines [[define-mbql-clause]] and [[catn-clause-schema]]." + [tag & args] + (apply define-mbql-clause-with-schema-fn catn-clause-schema tag args)) + +(defn resolve-schema + "For REPL/test usage: get the definition of the schema associated with an MBQL clause tag." + [tag] + (mr/resolve-schema (tag->registered-schema-name tag))) diff --git a/src/metabase/lib/schema/ref.cljc b/src/metabase/lib/schema/ref.cljc index 727181124ff..8c5ed513c07 100644 --- a/src/metabase/lib/schema/ref.cljc +++ b/src/metabase/lib/schema/ref.cljc @@ -3,7 +3,9 @@ (:require [metabase.lib.dispatch :as lib.dispatch] [metabase.lib.schema.common :as common] + [metabase.lib.schema.expression :as expression] [metabase.lib.schema.id :as id] + [metabase.lib.schema.mbql-clause :as mbql-clause] [metabase.lib.schema.temporal-bucketing :as temporal-bucketing] [metabase.types] [metabase.util.malli.registry :as mr])) @@ -16,14 +18,11 @@ [:map [:temporal-unit {:optional true} ::temporal-bucketing/unit]]]) -(mr/def ::base-type - [:fn #(isa? % :type/*)]) - (mr/def ::field.literal.options - [:and + [:merge ::field.options [:map - [:base-type ::base-type]]]) + [:base-type ::common/base-type]]]) ;;; `:field` clause (mr/def ::field.literal @@ -38,7 +37,7 @@ ::field.options ; TODO -- we should make `:base-type` required here too ::id/field]) -(mr/def ::field +(mbql-clause/define-mbql-clause :field [:and [:tuple [:= :field] @@ -49,26 +48,40 @@ [:dispatch-type/integer ::field.id] [:dispatch-type/string ::field.literal]]]) -(mr/def ::expression - [:tuple - [:= :expression] - ::field.options - ::common/non-blank-string]) +(defmethod expression/type-of* :field + [[_tag opts _id-or-name]] + (or (:base-type opts) + ::expression/type.unknown)) + +(mbql-clause/define-tuple-mbql-clause :expression + ::common/non-blank-string) + +(defmethod expression/type-of* :expression + [[_tag opts _expression-name]] + (or (:base-type opts) + ::expression/type.unknown)) -(mr/def ::aggregation +(mr/def ::aggregation-options + [:merge + ::common/options + [:name {:optional true} ::common/non-blank-string] + [:display-name {:optional true}] ::common/non-blank-string]) + +(mbql-clause/define-mbql-clause :aggregation [:tuple [:= :aggregation] - ::field.options + ::aggregation-options ::common/int-greater-than-or-equal-to-zero]) +(defmethod expression/type-of* :aggregation + [[_tag opts _index]] + (or (:base-type opts) + ::expression/type.unknown)) + (mr/def ::ref [:and - [:tuple - [:enum :field :expression :aggregation] - ::field.options - any?] - [:multi {:dispatch #(keyword (first %)) - :error/message ":field, :expression, :or :aggregation reference"} - [:field ::field] - [:aggregation ::aggregation] - [:expression ::expression]]]) + ::mbql-clause/clause + [:fn + {:error/message ":field, :expression, :or :aggregation reference"} + (fn [[tag :as _clause]] + (#{:field :expression :aggregation} tag))]]) diff --git a/src/metabase/util.cljc b/src/metabase/util.cljc index 345afaf2ec8..57e5f3207cf 100644 --- a/src/metabase/util.cljc +++ b/src/metabase/util.cljc @@ -695,6 +695,7 @@ (instance? java.util.regex.Pattern x))) (derive :dispatch-type/nil :dispatch-type/*) +(derive :dispatch-type/boolean :dispatch-type/*) (derive :dispatch-type/string :dispatch-type/*) (derive :dispatch-type/keyword :dispatch-type/*) (derive :dispatch-type/number :dispatch-type/*) @@ -722,6 +723,7 @@ [x] (cond (nil? x) :dispatch-type/nil + (boolean? x) :dispatch-type/boolean (string? x) :dispatch-type/string (keyword? x) :dispatch-type/keyword (integer? x) :dispatch-type/integer diff --git a/src/metabase/util/malli/registry.cljc b/src/metabase/util/malli/registry.cljc index 8c885d14b50..fad6b4465ad 100644 --- a/src/metabase/util/malli/registry.cljc +++ b/src/metabase/util/malli/registry.cljc @@ -27,3 +27,8 @@ "Like [[clojure.spec.alpha/def]]; add a Malli schema to our registry." [type schema] `(register! ~type ~schema))) + +(defn resolve-schema + "For REPL/test usage: get the definition of a registered schema from the registry." + [k] + (mr/schema registry k)) diff --git a/test/metabase/lib/schema/expression/arithmetic_test.cljc b/test/metabase/lib/schema/expression/arithmetic_test.cljc new file mode 100644 index 00000000000..4b842ffbbd7 --- /dev/null +++ b/test/metabase/lib/schema/expression/arithmetic_test.cljc @@ -0,0 +1,33 @@ +(ns metabase.lib.schema.expression.arithmetic-test + (:require + [clojure.test :refer [deftest is testing]] + [malli.core :as mc] + [metabase.lib.schema.expression :as expression] + [metabase.lib.schema.expression.arithmetic] + [metabase.lib.test-metadata :as meta])) + +(comment metabase.lib.schema.expression.arithmetic/keep-me) + +(deftest ^:parallel times-test + (let [venues-price [:field {:lib/uuid (str (random-uuid)), :base-type :type/Integer} (meta/id :venues :price)]] + (testing "A `:field` clause with an integer base type in its options should be considered to be an integer expression" + (is (mc/validate + ::expression/integer + venues-price))) + (testing "integer literals are integer expressions" + (is (mc/validate + ::expression/integer + 2))) + (testing "Multiplication with all integer args should be considered to be an integer expression" + (let [expr [:* {:lib/uuid (str (random-uuid))} venues-price 2]] + (is (= :type/Integer + (expression/type-of expr))) + (is (mc/validate :mbql.clause/* expr)) + (is (mc/validate ::expression/integer expr)))) + (testing "Multiplication with one or more non-integer args should NOT be considered to be an integer expression." + (let [expr [:* {:lib/uuid (str (random-uuid))} venues-price 2.1]] + (is (= :type/Number + (expression/type-of expr))) + (is (mc/validate :mbql.clause/* expr)) + (is (not (mc/validate ::expression/integer expr))) + (is (mc/validate ::expression/number expr)))))) diff --git a/test/metabase/lib/schema/expression_test.cljc b/test/metabase/lib/schema/expression_test.cljc deleted file mode 100644 index 8246baaf3a7..00000000000 --- a/test/metabase/lib/schema/expression_test.cljc +++ /dev/null @@ -1,55 +0,0 @@ -(ns metabase.lib.schema.expression-test - (:require - [clojure.test :refer [are deftest is testing]] - [malli.core :as mc] - [metabase.lib.schema.expression :as expression] - [metabase.lib.schema.filter :as filter] - [metabase.lib.test-metadata :as meta])) - -(deftest ^:parallel integer-literal-test - (testing "valid schemas" - (are [n] (are [schema] (mc/validate schema n) - ::expression/integer - ::expression/number - ::expression/orderable - ::expression/equality-comparable - ::expression/expression) - (int 1) - (long 1) - ;; TODO - #_(bigint 1))) - (testing "invalid schemas" - (are [n] (are [schema] (not (mc/validate schema n)) - ;; sanity check - ::filter/filter - ::expression/boolean - ::expression/string - ::expression/date - ::expression/time - ::expression/date-time - ::expression/temporal) - (int 1) - (long 1) - ;; TODO - #_(bigint 1)))) - -(deftest ^:parallel integer-expression-test - (let [venues-price [:field {:lib/uuid (str (random-uuid)), :base-type :type/Integer} (meta/id :venues :price)]] - (testing "A `:field` clause with an integer base type in its options should be considered to be an integer expression" - (is (mc/validate - ::expression/integer - venues-price))) - (testing "integer literals are integer expressions" - (is (mc/validate - ::expression/integer - 2))) - (testing "Multiplication with all integer args should be considered to be an integer expression" - (are [schema] (mc/validate - schema - [:* {:lib/uuid (str (random-uuid))} venues-price 2]) - ::expression/*.integer - ::expression/integer)) - (testing "Multiplication with one or more non-integer args should NOT be considered to be an integer expression." - (is (not (mc/validate - ::expression/integer - [:* {} venues-price 2.0])))))) diff --git a/test/metabase/lib/schema/filter_test.cljc b/test/metabase/lib/schema/filter_test.cljc index aee1339d8b9..fea1e2e6e55 100644 --- a/test/metabase/lib/schema/filter_test.cljc +++ b/test/metabase/lib/schema/filter_test.cljc @@ -1,18 +1,66 @@ (ns metabase.lib.schema.filter-test (:require - [clojure.test :refer [deftest is testing]] + [clojure.test :refer [are deftest is testing]] [clojure.walk :as walk] [malli.core :as mc] - malli.registry - ;; expression and filter recursively depend on each other - metabase.lib.schema.expression - [metabase.lib.schema.filter :as filter] - [metabase.util.malli.registry :as mr])) + [metabase.lib.schema.expression :as expression] + [metabase.lib.schema.filter] + [metabase.lib.schema.literal] + [metabase.lib.schema.ref])) + +(comment metabase.lib.schema.filter/keep-me + metabase.lib.schema.literal/keep-me + metabase.lib.schema.ref/keep-me) + +(defn- case-expr [& args] + (let [clause [:case + {:lib/uuid (str (random-uuid))} + (mapv (fn [arg] + [[:= {:lib/uuid (str (random-uuid))} 1 1] + arg]) + args)]] + (is (mc/validate :mbql.clause/case clause)) + clause)) + +(deftest ^:parallel case-type-of-test + (are [expr expected] (= expected + (expression/type-of expr)) + ;; easy, no ambiguity + (case-expr 1 1) + :type/Integer + + ;; Ambiguous literal types + (case-expr "2023-03-08") + #{:type/Text :type/Date} + + (case-expr "2023-03-08" "2023-03-08") + #{:type/Text :type/Date} + + ;; Ambiguous literal types mixed with unambiguous types + (case-expr "2023-03-08" "abc") + :type/Text + + ;; Literal types that are ambiguous in different ways! `:type/Text` is the only common type between them! + (case-expr "2023-03-08" "05:13") + :type/Text + + ;; Confusion! The "2023-03-08T06:15" is #{:type/String :type/DateTime}, which is less specific than + ;; `:type/DateTimeWithLocalTZ`. Technically this should return `:type/DateTime`, since it's the most-specific + ;; common ancestor type compatible with all args! But calculating that stuff is way too hard! So this will have to + ;; do for now! -- Cam + (case-expr "2023-03-08T06:15" [:field {:lib/uuid (str (random-uuid)), :base-type :type/DateTimeWithLocalTZ} 1]) + :type/DateTimeWithLocalTZ + + ;; Differing types with a common base type that is more specific than `:type/*` + (case-expr 1 1.1) + :type/Number)) (defn- ensure-uuids [filter-expr] (walk/postwalk (fn [f] - (if (and (vector? f) (not (map-entry? f))) + (if (and (vector? f) + (keyword? (first f)) + (not (map-entry? f))) (let [[op & args] f] (cond (and (map? (first args)) (not (:lib/uuid (first args)))) @@ -25,16 +73,6 @@ f)) filter-expr)) -(defn- known-filter-ops - "Return all registered filter operators." - [] - (into #{} - (comp (filter #(and (qualified-keyword? %) - (not= % ::filter/filter) - (= (namespace %) (namespace ::filter/filter)))) - (map (comp keyword name))) - (keys (malli.registry/schemas @#'mr/registry)))) - (defn- filter-ops "Return the set of filter operators in `filter-expr`." [filter-expr] @@ -81,17 +119,26 @@ [:time-interval {:include-current true} field :next :default] [:segment 1] [:segment "segment-id"] - [:case [:= 1 1] true [:not-null field] [:< 0 1]]]] - (is (= (known-filter-ops) (filter-ops filter-expr))) - ;; testing all the subclauses of `filter-expr` above individually. If something gets broken this is easier to debug + [:case [[[:= 1 1] true] [[:not-null field] [:< 0 1]]]]]] + (doseq [op (filter-ops filter-expr)] + (testing (str op " is a registered MBQL clause (a type-of* method is registered for it)") + (is (not (identical? (get-method expression/type-of* op) + (get-method expression/type-of* :default)))))) + ;; test all the subclauses of `filter-expr` above individually. If something gets broken this is easier to debug (doseq [filter-clause (rest filter-expr) :let [filter-clause (ensure-uuids filter-clause)]] - (testing (list `mc/validate ::filter/filter filter-clause) - (is (mc/validate ::filter/filter filter-clause)))) + (testing (pr-str filter-clause) + (is (= (expression/type-of filter-clause) :type/Boolean)) + (is (mc/validate ::expression/boolean filter-clause)))) ;; now test the entire thing - (is (mc/validate ::filter/filter (ensure-uuids filter-expr))))) + (is (mc/validate ::expression/boolean (ensure-uuids filter-expr)))))) - (testing "invalid filter" - (is (false? (mc/validate - ::filter/filter - (ensure-uuids [:xor 13 [:field {:lib/uuid (str (random-uuid))} 1]])))))) +(deftest ^:parallel invalid-filter-test + (testing "invalid filters" + (are [clause] (mc/explain + ::expression/boolean + (ensure-uuids clause)) + ;; xor doesn't exist + [:xor 13 [:field 1 {:lib/uuid (str (random-uuid))}]] + ;; 1 is not a valid <string> arg + [:contains "abc" 1]))) diff --git a/test/metabase/lib/schema/literal_test.cljc b/test/metabase/lib/schema/literal_test.cljc new file mode 100644 index 00000000000..a517bef1769 --- /dev/null +++ b/test/metabase/lib/schema/literal_test.cljc @@ -0,0 +1,59 @@ +(ns metabase.lib.schema.literal-test + (:require + [clojure.test :refer [are deftest is testing]] + [malli.core :as mc] + [metabase.lib.schema.expression :as expression] + [metabase.lib.schema.literal :as literal])) + +(deftest ^:parallel integer-literal-test + (testing "valid schemas" + (are [n] (are [schema] (mc/validate schema n) + ::expression/integer + ::expression/number + ::expression/orderable + ::expression/equality-comparable + ::expression/expression) + (int 1) + (long 1) + #?@(:clj ((bigint 1) + (biginteger 1))))) + (testing "invalid schemas" + (are [n] (are [schema] (mc/explain schema n) + ::expression/boolean + ::expression/string + ::expression/date + ::expression/time + ::expression/datetime + ::expression/temporal) + (int 1) + (long 1) + #?@(:clj ((bigint 1) + (biginteger 1)))))) + +(deftest ^:parallel string-literal-type-of-test + (is (mc/validate ::literal/string.datetime "2023-03-08T03:18")) + (are [s expected] (= expected + (expression/type-of s)) + "" :type/Text + "abc" :type/Text + "2023" :type/Text + "2023-03-08" #{:type/Text :type/Date} + "03:18" #{:type/Text :type/Time} + "2023-03-08T03:18" #{:type/Text :type/DateTime})) + +(deftest ^:parallel string-literal-test + (testing "valid schemas" + (are [schema] (mc/validate schema "s") + ::expression/string + ::expression/orderable + ::expression/equality-comparable + ::expression/expression)) + (testing "invalid schemas" + (are [schema] (mc/explain schema "s") + ::expression/boolean + ::expression/integer + ::expression/number + ::expression/date + ::expression/time + ::expression/datetime + ::expression/temporal))) diff --git a/test/metabase/lib/schema/mbql_clause_test.cljc b/test/metabase/lib/schema/mbql_clause_test.cljc new file mode 100644 index 00000000000..7a4c33382d7 --- /dev/null +++ b/test/metabase/lib/schema/mbql_clause_test.cljc @@ -0,0 +1,25 @@ +(ns metabase.lib.schema.mbql-clause-test + (:require + [clojure.test :refer [deftest is testing]] + [malli.core :as mc] + [metabase.lib.schema.filter] + [metabase.lib.schema.literal] + [metabase.lib.schema.mbql-clause :as mbql-clause])) + +(comment metabase.lib.schema.filter/keep-me + metabase.lib.schema.literal/keep-me) + +(deftest ^:parallel schema-test + (is (mc/validate + ::mbql-clause/clause + [:contains {:lib/uuid "1527d17e-a2f0-4e5f-a92b-65d1db90c094"} "x" "y"])) + (testing "1 should not be allowed as a <string> arg" + (is (not (mc/validate + ::mbql-clause/clause + [:contains {:lib/uuid "1527d17e-a2f0-4e5f-a92b-65d1db90c094"} "x" 1]))))) + +(deftest ^:parallel resolve-schema-test + (testing "Schema should be registered" + (is (mbql-clause/resolve-schema :contains))) + (testing "Schema should be valid" + (is (mc/schema (mbql-clause/resolve-schema :contains))))) diff --git a/test/metabase/lib/schema/ref_test.cljc b/test/metabase/lib/schema/ref_test.cljc new file mode 100644 index 00000000000..6f505800446 --- /dev/null +++ b/test/metabase/lib/schema/ref_test.cljc @@ -0,0 +1,17 @@ +(ns metabase.lib.schema.ref-test + (:require + [clojure.test :refer [are deftest is]] + [malli.core :as mc] + [metabase.lib.schema.expression :as expression] + [metabase.lib.schema.ref])) + +(comment metabase.lib.schema.ref/keep-me) + +(deftest ^:parallel unknown-type-test + (let [expr [:field {:lib/uuid "214211bc-9bc0-4025-afc5-2256a523bafe"} 1]] + (is (= ::expression/type.unknown + (expression/type-of expr))) + (is (expression/type-of? expr :type/Boolean)) + (are [schema] (mc/validate schema expr) + ::expression/boolean + ::expression/expression))) -- GitLab