diff --git a/shared/src/metabase/mbql/schema.cljc b/shared/src/metabase/mbql/schema.cljc index 46eb035e204a69a2b386b604cfab381c0157c857..f23c690982ec0784dcc9c570091fc9da6cf187ff 100644 --- a/shared/src/metabase/mbql/schema.cljc +++ b/shared/src/metabase/mbql/schema.cljc @@ -101,15 +101,18 @@ ;; TODO -- currently these are all the same between date/time/datetime -(def ^:private ^{:arglists '([s])} can-parse-date? +(def ^{:arglists '([s])} can-parse-date? + "Returns whether a string can be parsed to an ISO 8601 date or not." #?(:clj (partial can-parse-iso-8601? DateTimeFormatter/ISO_DATE) :cljs can-parse-iso-8601?)) -(def ^:private ^{:arglists '([s])} can-parse-datetime? +(def ^{:arglists '([s])} can-parse-datetime? + "Returns whether a string can be parsed to an ISO 8601 datetime or not." #?(:clj (partial can-parse-iso-8601? DateTimeFormatter/ISO_DATE_TIME) :cljs can-parse-iso-8601?)) -(def ^:private ^{:arglists '([s])} can-parse-time? +(def ^{:arglists '([s])} can-parse-time? + "Returns whether a string can be parsed to an ISO 8601 time or not." #?(:clj (partial can-parse-iso-8601? DateTimeFormatter/ISO_TIME) :cljs can-parse-iso-8601?)) diff --git a/src/metabase/driver/postgres.clj b/src/metabase/driver/postgres.clj index bb8dc9caf5f707e9eee6c343ca78c48f209444f8..29538e5cb600380e625010f6937ef5bd62a8e492 100644 --- a/src/metabase/driver/postgres.clj +++ b/src/metabase/driver/postgres.clj @@ -283,24 +283,16 @@ (defn- json-query [identifier nfc-field] (letfn [(handle-name [x] (if (number? x) (str x) (name x)))] - (let [field-type (:effective_type nfc-field) + (let [field-type (:database_type nfc-field) nfc-path (:nfc_path nfc-field) unwrapped-identifier (:form identifier) parent-identifier (field/nfc-field->parent-identifier unwrapped-identifier nfc-field) - ;; Array and sub-JSON coerced to text - cast-type (cond - (isa? field-type :type/Integer) - :type/Integer - (isa? field-type :type/Float) - :type/Float - (isa? field-type :type/Boolean) - :type/Boolean - :else - :type/Text)] - (hx/cast cast-type - (apply hsql/call [:json_extract_path_text - (hx/cast :json parent-identifier) - (mapv #(hx/cast :text (handle-name %)) (rest nfc-path))]))))) + names (format "{%s}" (str/join "," (map handle-name (rest nfc-path))))] + (reify + hformat/ToSql + (to-sql [_] + (hformat/to-params-default names "nfc_path") + (format "(%s#>> ?::text[])::%s " (hformat/to-sql parent-identifier) field-type)))))) (defmethod sql.qp/->honeysql [:postgres :field] [driver [_ id-or-name _opts :as clause]] diff --git a/src/metabase/driver/sql_jdbc/sync/describe_table.clj b/src/metabase/driver/sql_jdbc/sync/describe_table.clj index 4ac9827d342adae02fa27db555d7b68e8bd54682..7cbbfb88da67f2cf576c1d6b86ed47cd4f001257 100644 --- a/src/metabase/driver/sql_jdbc/sync/describe_table.clj +++ b/src/metabase/driver/sql_jdbc/sync/describe_table.clj @@ -12,6 +12,7 @@ [metabase.driver.sql-jdbc.sync.common :as common] [metabase.driver.sql-jdbc.sync.interface :as i] [metabase.driver.sql.query-processor :as sql.qp] + [metabase.mbql.schema :as mbql.s] [metabase.util :as u] [metabase.util.honeysql-extensions :as hx]) (:import [java.sql Connection DatabaseMetaData ResultSet])) @@ -209,10 +210,19 @@ (flatten-row xs path))))))] (into {} (flatten-row row [field-name])))) +(defn- type-by-parsing-string + "Mostly just (type member) but with a bit to suss out strings which are ISO8601 and say that they are datetimes" + [member] + (let [member-type (type member)] + (if (and (instance? String member) + (mbql.s/can-parse-datetime? member)) + java.time.LocalDateTime + member-type))) + (defn- row->types [row] (into {} (for [[field-name field-val] row] (let [flat-row (flattened-row field-name field-val)] - (into {} (map (fn [[k v]] [k (type v)]) flat-row)))))) + (into {} (map (fn [[k v]] [k (type-by-parsing-string v)]) flat-row)))))) (defn- describe-json-xform [member] ((comp (map #(for [[k v] %] [k (json/parse-string v)])) @@ -220,33 +230,46 @@ (map row->types)) member)) (defn- describe-json-rf + "Reducing function that takes a bunch of maps from row->types, + and gets them to conform to the type hierarchy, + going through and taking the lowest common denominator type at each pass, + ignoring the nils." ([] nil) - ([fst] fst) - ([fst snd] + ([acc-field-type-map] acc-field-type-map) + ([acc-field-type-map second-field-type-map] (into {} - (for [json-column (set/union (keys snd) (keys fst))] + (for [json-column (set/union (keys second-field-type-map) + (keys acc-field-type-map))] (cond - (or (nil? fst) - (nil? (fst json-column)) - (= (hash (fst json-column)) (hash (snd json-column)))) - [json-column (snd json-column)] + (or (nil? acc-field-type-map) + (nil? (acc-field-type-map json-column)) + (= (hash (acc-field-type-map json-column)) + (hash (second-field-type-map json-column)))) + [json-column (second-field-type-map json-column)] - (or (nil? snd) - (nil? (snd json-column))) - [json-column (fst json-column)] + (or (nil? second-field-type-map) + (nil? (second-field-type-map json-column))) + [json-column (acc-field-type-map json-column)] - (every? #(isa? % Number) [(fst json-column) (snd json-column)]) + (every? #(isa? % Number) [(acc-field-type-map json-column) + (second-field-type-map json-column)]) [json-column java.lang.Number] - (every? #{java.lang.String java.lang.Long java.lang.Integer java.lang.Double java.lang.Boolean} - [(fst json-column) (snd json-column)]) + (every? + (fn [column-type] + (some (fn [allowed-type] + (isa? column-type allowed-type)) + [String Number Boolean java.time.LocalDateTime])) + [(acc-field-type-map json-column) (second-field-type-map json-column)]) [json-column java.lang.String] :else [json-column nil]))))) -(def ^:const field-type-map - "We deserialize the JSON in order to determine types, +(def field-type-map + "Map from Java types for deserialized JSON (so small subset of Java types) to MBQL types. + + We actually do deserialize the JSON in order to determine types, so the java / clojure types we get have to be matched to MBQL types" {java.lang.String :type/Text ;; JSON itself has the single number type, but Java serde of JSON is stricter @@ -255,16 +278,31 @@ java.lang.Double :type/Float java.lang.Number :type/Number java.lang.Boolean :type/Boolean + java.time.LocalDateTime :type/DateTime clojure.lang.PersistentVector :type/Array clojure.lang.PersistentArrayMap :type/Structured}) +(def db-type-map + "Map from MBQL types to database types. + + This is the lowest common denominator of types, hopefully, + although as of writing this is just geared towards Postgres types" + {:type/Text "text" + :type/Integer "integer" + :type/Float "double precision" + :type/Number "double precision" + :type/Boolean "boolean" + :type/DateTime "timestamp" + :type/Array "text" + :type/Structured "text"}) + (defn- field-types->fields [field-types] (let [valid-fields (for [[field-path field-type] (seq field-types)] (if (nil? field-type) nil (let [curr-type (get field-type-map field-type :type/*)] {:name (str/join " \u2192 " (map name field-path)) ;; right arrow - :database-type curr-type + :database-type (db-type-map curr-type) :base-type curr-type ;; Postgres JSONB field, which gets most usage, doesn't maintain JSON object ordering... :database-position 0 diff --git a/test/metabase/driver/postgres_test.clj b/test/metabase/driver/postgres_test.clj index 77ae9b5cec3c93a0930d793cf8107fdf41c9acad..67625cca824a6103cdb3c3fb37118caedcf27f8a 100644 --- a/test/metabase/driver/postgres_test.clj +++ b/test/metabase/driver/postgres_test.clj @@ -279,22 +279,16 @@ (deftest ^:parallel json-query-test (let [boop-identifier (hx/with-type-info (hx/identifier :field "boop" "bleh -> meh") {})] (testing "Transforming MBQL query with JSON in it to postgres query works" - (let [boop-field {:nfc_path [:bleh :meh]}] - (is (= ["CAST(json_extract_path_text(CAST(boop.bleh AS json), (CAST(? AS text))) AS Text)" "meh"] + (let [boop-field {:nfc_path [:bleh :meh] :database_type "integer"}] + (is (= ["(boop.bleh#>> ?::text[])::integer " "{meh}"] (hsql/format (#'postgres/json-query boop-identifier boop-field)))))) (testing "What if types are weird and we have lists" - (let [weird-field {:nfc_path [:bleh "meh" :foobar 1234]}] - (is (= ["CAST(json_extract_path_text(CAST(boop.bleh AS json), (CAST(? AS text), CAST(? AS text), CAST(? AS text))) AS Text)" - "meh" - "foobar" - "1234"] + (let [weird-field {:nfc_path [:bleh "meh" :foobar 1234] :database_type "integer"}] + (is (= ["(boop.bleh#>> ?::text[])::integer " "{meh,foobar,1234}"] (hsql/format (#'postgres/json-query boop-identifier weird-field)))))) (testing "Give us a boolean cast when the field is boolean" - (let [boolean-boop-field {:effective_type :type/Boolean :nfc_path [:bleh "boop" :foobar 1234]}] - (is (= ["CAST(json_extract_path_text(CAST(boop.bleh AS json), (CAST(? AS text), CAST(? AS text), CAST(? AS text))) AS Boolean)" - "boop" - "foobar" - "1234"] + (let [boolean-boop-field {:database_type "boolean" :nfc_path [:bleh "boop" :foobar 1234]}] + (is (= ["(boop.bleh#>> ?::text[])::boolean " "{boop,foobar,1234}"] (hsql/format (#'postgres/json-query boop-identifier boolean-boop-field)))))))) (deftest describe-nested-field-columns-test @@ -305,8 +299,8 @@ spec (sql-jdbc.conn/connection-details->spec :postgres details)] (jdbc/with-db-connection [conn (sql-jdbc.conn/connection-details->spec :postgres details)] (jdbc/execute! spec [(str "CREATE TABLE describe_json_table (coherent_json_val JSON NOT NULL, incoherent_json_val JSON NOT NULL);" - "INSERT INTO describe_json_table (coherent_json_val, incoherent_json_val) VALUES ('{\"a\": 1, \"b\": 2}', '{\"a\": 1, \"b\": 2, \"c\": 3, \"d\": 44}');" - "INSERT INTO describe_json_table (coherent_json_val, incoherent_json_val) VALUES ('{\"a\": 2, \"b\": 3}', '{\"a\": [1, 2], \"b\": \"blurgle\", \"c\": 3.22}');")])) + "INSERT INTO describe_json_table (coherent_json_val, incoherent_json_val) VALUES ('{\"a\": 1, \"b\": 2, \"c\": \"2017-01-13T17:09:22.222\"}', '{\"a\": 1, \"b\": 2, \"c\": 3, \"d\": 44}');" + "INSERT INTO describe_json_table (coherent_json_val, incoherent_json_val) VALUES ('{\"a\": 2, \"b\": 3, \"c\": \"2017-01-13T17:09:42.411\"}', '{\"a\": [1, 2], \"b\": \"blurgle\", \"c\": 3.22}');")])) (mt/with-temp Database [database {:engine :postgres, :details details}] (is (= :type/SerializedJSON (->> (sql-jdbc.sync/describe-table :postgres database {:name "describe_json_table"}) @@ -315,31 +309,37 @@ (first) (:semantic-type)))) (is (= '#{{:name "incoherent_json_val → b", - :database-type :type/Text, + :database-type "text", :base-type :type/Text, :database-position 0, :nfc-path [:incoherent_json_val "b"] :visibility-type :normal} {:name "coherent_json_val → a", - :database-type :type/Integer, + :database-type "integer", :base-type :type/Integer, :database-position 0, :nfc-path [:coherent_json_val "a"] :visibility-type :normal} {:name "coherent_json_val → b", - :database-type :type/Integer, + :database-type "integer", :base-type :type/Integer, :database-position 0, :nfc-path [:coherent_json_val "b"] :visibility-type :normal} + {:name "coherent_json_val → c", + :database-type "timestamp", + :base-type :type/DateTime, + :database-position 0, + :visibility-type :normal, + :nfc-path [:coherent_json_val "c"]} {:name "incoherent_json_val → c", - :database-type :type/Number, + :database-type "double precision", :base-type :type/Number, :database-position 0, :visibility-type :normal, :nfc-path [:incoherent_json_val "c"]} {:name "incoherent_json_val → d", - :database-type :type/Integer, + :database-type "integer", :base-type :type/Integer, :database-position 0, :visibility-type :normal, diff --git a/test/metabase/driver/sql_jdbc/sync/describe_table_test.clj b/test/metabase/driver/sql_jdbc/sync/describe_table_test.clj index 19cf1ecd28d862c5ac6cdc49febfa4dc1fae0d61..dc263bea5b4106d06fd1d064db541b933715047c 100644 --- a/test/metabase/driver/sql_jdbc/sync/describe_table_test.clj +++ b/test/metabase/driver/sql_jdbc/sync/describe_table_test.clj @@ -69,6 +69,12 @@ (filter :semantic-type) (map (juxt (comp str/lower-case :name) :semantic-type)))))))) +(deftest type-by-parsing-string + (testing "type-by-parsing-string" + (is (= java.lang.String (#'describe-table/type-by-parsing-string "bleh"))) + (is (= java.time.LocalDateTime (#'describe-table/type-by-parsing-string "2017-01-13T17:09:42.411"))) + (is (= java.lang.Long (#'describe-table/type-by-parsing-string 11111))))) + (deftest describe-nested-field-columns-test (testing "flattened-row" (let [row {:bob {:dobbs 123 :cobbs "boop"}}