Skip to content
Snippets Groups Projects
Unverified Commit 3dd761e4 authored by Howon Lee's avatar Howon Lee Committed by GitHub
Browse files

Make field-values work with JSON fields (really, make the type hierarchy work...

Make field-values work with JSON fields (really, make the type hierarchy work and get field values to the 90-yard line) (#21609)

Many bugs but not the underlying group-by bug squashed for JSON field values. Type hierarchy fixed properly.
parent a04ce441
No related branches found
No related tags found
No related merge requests found
......@@ -101,15 +101,18 @@
;; TODO -- currently these are all the same between date/time/datetime
(def ^:private ^{:arglists '([s])} can-parse-date?
(def ^{:arglists '([s])} can-parse-date?
"Returns whether a string can be parsed to an ISO 8601 date or not."
#?(:clj (partial can-parse-iso-8601? DateTimeFormatter/ISO_DATE)
:cljs can-parse-iso-8601?))
(def ^:private ^{:arglists '([s])} can-parse-datetime?
(def ^{:arglists '([s])} can-parse-datetime?
"Returns whether a string can be parsed to an ISO 8601 datetime or not."
#?(:clj (partial can-parse-iso-8601? DateTimeFormatter/ISO_DATE_TIME)
:cljs can-parse-iso-8601?))
(def ^:private ^{:arglists '([s])} can-parse-time?
(def ^{:arglists '([s])} can-parse-time?
"Returns whether a string can be parsed to an ISO 8601 time or not."
#?(:clj (partial can-parse-iso-8601? DateTimeFormatter/ISO_TIME)
:cljs can-parse-iso-8601?))
......
......@@ -283,24 +283,16 @@
(defn- json-query [identifier nfc-field]
(letfn [(handle-name [x] (if (number? x) (str x) (name x)))]
(let [field-type (:effective_type nfc-field)
(let [field-type (:database_type nfc-field)
nfc-path (:nfc_path nfc-field)
unwrapped-identifier (:form identifier)
parent-identifier (field/nfc-field->parent-identifier unwrapped-identifier nfc-field)
;; Array and sub-JSON coerced to text
cast-type (cond
(isa? field-type :type/Integer)
:type/Integer
(isa? field-type :type/Float)
:type/Float
(isa? field-type :type/Boolean)
:type/Boolean
:else
:type/Text)]
(hx/cast cast-type
(apply hsql/call [:json_extract_path_text
(hx/cast :json parent-identifier)
(mapv #(hx/cast :text (handle-name %)) (rest nfc-path))])))))
names (format "{%s}" (str/join "," (map handle-name (rest nfc-path))))]
(reify
hformat/ToSql
(to-sql [_]
(hformat/to-params-default names "nfc_path")
(format "(%s#>> ?::text[])::%s " (hformat/to-sql parent-identifier) field-type))))))
(defmethod sql.qp/->honeysql [:postgres :field]
[driver [_ id-or-name _opts :as clause]]
......
......@@ -12,6 +12,7 @@
[metabase.driver.sql-jdbc.sync.common :as common]
[metabase.driver.sql-jdbc.sync.interface :as i]
[metabase.driver.sql.query-processor :as sql.qp]
[metabase.mbql.schema :as mbql.s]
[metabase.util :as u]
[metabase.util.honeysql-extensions :as hx])
(:import [java.sql Connection DatabaseMetaData ResultSet]))
......@@ -209,10 +210,19 @@
(flatten-row xs path))))))]
(into {} (flatten-row row [field-name]))))
(defn- type-by-parsing-string
"Mostly just (type member) but with a bit to suss out strings which are ISO8601 and say that they are datetimes"
[member]
(let [member-type (type member)]
(if (and (instance? String member)
(mbql.s/can-parse-datetime? member))
java.time.LocalDateTime
member-type)))
(defn- row->types [row]
(into {} (for [[field-name field-val] row]
(let [flat-row (flattened-row field-name field-val)]
(into {} (map (fn [[k v]] [k (type v)]) flat-row))))))
(into {} (map (fn [[k v]] [k (type-by-parsing-string v)]) flat-row))))))
(defn- describe-json-xform [member]
((comp (map #(for [[k v] %] [k (json/parse-string v)]))
......@@ -220,33 +230,46 @@
(map row->types)) member))
(defn- describe-json-rf
"Reducing function that takes a bunch of maps from row->types,
and gets them to conform to the type hierarchy,
going through and taking the lowest common denominator type at each pass,
ignoring the nils."
([] nil)
([fst] fst)
([fst snd]
([acc-field-type-map] acc-field-type-map)
([acc-field-type-map second-field-type-map]
(into {}
(for [json-column (set/union (keys snd) (keys fst))]
(for [json-column (set/union (keys second-field-type-map)
(keys acc-field-type-map))]
(cond
(or (nil? fst)
(nil? (fst json-column))
(= (hash (fst json-column)) (hash (snd json-column))))
[json-column (snd json-column)]
(or (nil? acc-field-type-map)
(nil? (acc-field-type-map json-column))
(= (hash (acc-field-type-map json-column))
(hash (second-field-type-map json-column))))
[json-column (second-field-type-map json-column)]
(or (nil? snd)
(nil? (snd json-column)))
[json-column (fst json-column)]
(or (nil? second-field-type-map)
(nil? (second-field-type-map json-column)))
[json-column (acc-field-type-map json-column)]
(every? #(isa? % Number) [(fst json-column) (snd json-column)])
(every? #(isa? % Number) [(acc-field-type-map json-column)
(second-field-type-map json-column)])
[json-column java.lang.Number]
(every? #{java.lang.String java.lang.Long java.lang.Integer java.lang.Double java.lang.Boolean}
[(fst json-column) (snd json-column)])
(every?
(fn [column-type]
(some (fn [allowed-type]
(isa? column-type allowed-type))
[String Number Boolean java.time.LocalDateTime]))
[(acc-field-type-map json-column) (second-field-type-map json-column)])
[json-column java.lang.String]
:else
[json-column nil])))))
(def ^:const field-type-map
"We deserialize the JSON in order to determine types,
(def field-type-map
"Map from Java types for deserialized JSON (so small subset of Java types) to MBQL types.
We actually do deserialize the JSON in order to determine types,
so the java / clojure types we get have to be matched to MBQL types"
{java.lang.String :type/Text
;; JSON itself has the single number type, but Java serde of JSON is stricter
......@@ -255,16 +278,31 @@
java.lang.Double :type/Float
java.lang.Number :type/Number
java.lang.Boolean :type/Boolean
java.time.LocalDateTime :type/DateTime
clojure.lang.PersistentVector :type/Array
clojure.lang.PersistentArrayMap :type/Structured})
(def db-type-map
"Map from MBQL types to database types.
This is the lowest common denominator of types, hopefully,
although as of writing this is just geared towards Postgres types"
{:type/Text "text"
:type/Integer "integer"
:type/Float "double precision"
:type/Number "double precision"
:type/Boolean "boolean"
:type/DateTime "timestamp"
:type/Array "text"
:type/Structured "text"})
(defn- field-types->fields [field-types]
(let [valid-fields (for [[field-path field-type] (seq field-types)]
(if (nil? field-type)
nil
(let [curr-type (get field-type-map field-type :type/*)]
{:name (str/join " \u2192 " (map name field-path)) ;; right arrow
:database-type curr-type
:database-type (db-type-map curr-type)
:base-type curr-type
;; Postgres JSONB field, which gets most usage, doesn't maintain JSON object ordering...
:database-position 0
......
......@@ -279,22 +279,16 @@
(deftest ^:parallel json-query-test
(let [boop-identifier (hx/with-type-info (hx/identifier :field "boop" "bleh -> meh") {})]
(testing "Transforming MBQL query with JSON in it to postgres query works"
(let [boop-field {:nfc_path [:bleh :meh]}]
(is (= ["CAST(json_extract_path_text(CAST(boop.bleh AS json), (CAST(? AS text))) AS Text)" "meh"]
(let [boop-field {:nfc_path [:bleh :meh] :database_type "integer"}]
(is (= ["(boop.bleh#>> ?::text[])::integer " "{meh}"]
(hsql/format (#'postgres/json-query boop-identifier boop-field))))))
(testing "What if types are weird and we have lists"
(let [weird-field {:nfc_path [:bleh "meh" :foobar 1234]}]
(is (= ["CAST(json_extract_path_text(CAST(boop.bleh AS json), (CAST(? AS text), CAST(? AS text), CAST(? AS text))) AS Text)"
"meh"
"foobar"
"1234"]
(let [weird-field {:nfc_path [:bleh "meh" :foobar 1234] :database_type "integer"}]
(is (= ["(boop.bleh#>> ?::text[])::integer " "{meh,foobar,1234}"]
(hsql/format (#'postgres/json-query boop-identifier weird-field))))))
(testing "Give us a boolean cast when the field is boolean"
(let [boolean-boop-field {:effective_type :type/Boolean :nfc_path [:bleh "boop" :foobar 1234]}]
(is (= ["CAST(json_extract_path_text(CAST(boop.bleh AS json), (CAST(? AS text), CAST(? AS text), CAST(? AS text))) AS Boolean)"
"boop"
"foobar"
"1234"]
(let [boolean-boop-field {:database_type "boolean" :nfc_path [:bleh "boop" :foobar 1234]}]
(is (= ["(boop.bleh#>> ?::text[])::boolean " "{boop,foobar,1234}"]
(hsql/format (#'postgres/json-query boop-identifier boolean-boop-field))))))))
(deftest describe-nested-field-columns-test
......@@ -305,8 +299,8 @@
spec (sql-jdbc.conn/connection-details->spec :postgres details)]
(jdbc/with-db-connection [conn (sql-jdbc.conn/connection-details->spec :postgres details)]
(jdbc/execute! spec [(str "CREATE TABLE describe_json_table (coherent_json_val JSON NOT NULL, incoherent_json_val JSON NOT NULL);"
"INSERT INTO describe_json_table (coherent_json_val, incoherent_json_val) VALUES ('{\"a\": 1, \"b\": 2}', '{\"a\": 1, \"b\": 2, \"c\": 3, \"d\": 44}');"
"INSERT INTO describe_json_table (coherent_json_val, incoherent_json_val) VALUES ('{\"a\": 2, \"b\": 3}', '{\"a\": [1, 2], \"b\": \"blurgle\", \"c\": 3.22}');")]))
"INSERT INTO describe_json_table (coherent_json_val, incoherent_json_val) VALUES ('{\"a\": 1, \"b\": 2, \"c\": \"2017-01-13T17:09:22.222\"}', '{\"a\": 1, \"b\": 2, \"c\": 3, \"d\": 44}');"
"INSERT INTO describe_json_table (coherent_json_val, incoherent_json_val) VALUES ('{\"a\": 2, \"b\": 3, \"c\": \"2017-01-13T17:09:42.411\"}', '{\"a\": [1, 2], \"b\": \"blurgle\", \"c\": 3.22}');")]))
(mt/with-temp Database [database {:engine :postgres, :details details}]
(is (= :type/SerializedJSON
(->> (sql-jdbc.sync/describe-table :postgres database {:name "describe_json_table"})
......@@ -315,31 +309,37 @@
(first)
(:semantic-type))))
(is (= '#{{:name "incoherent_json_val → b",
:database-type :type/Text,
:database-type "text",
:base-type :type/Text,
:database-position 0,
:nfc-path [:incoherent_json_val "b"]
:visibility-type :normal}
{:name "coherent_json_val → a",
:database-type :type/Integer,
:database-type "integer",
:base-type :type/Integer,
:database-position 0,
:nfc-path [:coherent_json_val "a"]
:visibility-type :normal}
{:name "coherent_json_val → b",
:database-type :type/Integer,
:database-type "integer",
:base-type :type/Integer,
:database-position 0,
:nfc-path [:coherent_json_val "b"]
:visibility-type :normal}
{:name "coherent_json_val → c",
:database-type "timestamp",
:base-type :type/DateTime,
:database-position 0,
:visibility-type :normal,
:nfc-path [:coherent_json_val "c"]}
{:name "incoherent_json_val → c",
:database-type :type/Number,
:database-type "double precision",
:base-type :type/Number,
:database-position 0,
:visibility-type :normal,
:nfc-path [:incoherent_json_val "c"]}
{:name "incoherent_json_val → d",
:database-type :type/Integer,
:database-type "integer",
:base-type :type/Integer,
:database-position 0,
:visibility-type :normal,
......
......@@ -69,6 +69,12 @@
(filter :semantic-type)
(map (juxt (comp str/lower-case :name) :semantic-type))))))))
(deftest type-by-parsing-string
(testing "type-by-parsing-string"
(is (= java.lang.String (#'describe-table/type-by-parsing-string "bleh")))
(is (= java.time.LocalDateTime (#'describe-table/type-by-parsing-string "2017-01-13T17:09:42.411")))
(is (= java.lang.Long (#'describe-table/type-by-parsing-string 11111)))))
(deftest describe-nested-field-columns-test
(testing "flattened-row"
(let [row {:bob {:dobbs 123 :cobbs "boop"}}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment