Skip to content
Snippets Groups Projects
Unverified Commit 378b58fa authored by Howon Lee's avatar Howon Lee Committed by GitHub
Browse files

Nested column enrichment for JSON nested columns (#20778)

Put in capability but not link it in for nested column enrichment for postgres JSON columns
parent 808cbf6c
Branches
Tags
No related merge requests found
......@@ -170,7 +170,7 @@
"Number of rows to sample for describe-nested-field-columns"
10000)
(defn- flatten-row [row]
(defn- flattened-row [field-name row]
(letfn [(flatten-row [row path]
(lazy-seq
(when-let [[[k v] & xs] (seq row)]
......@@ -180,12 +180,12 @@
:else
(cons [(conj path k) v]
(flatten-row xs path))))))]
(into {} (flatten-row row []))))
(into {} (flatten-row row [field-name]))))
(defn- row->types [row]
(into {} (for [[field-name field-val] row]
[field-name (let [flattened-row (flatten-row field-val)]
(into {} (map (fn [[k v]] [k (type v)]) flattened-row)))])))
(let [flat-row (flattened-row field-name field-val)]
(into {} (map (fn [[k v]] [k (type v)]) flat-row))))))
(defn- describe-json-xform [member]
((comp (map #(for [[k v] %] [k (json/parse-string v)]))
......@@ -202,6 +202,33 @@
[json-column (snd json-column)]
[json-column nil])))))
(def ^:const field-type-map
"We deserialize the JSON in order to determine types,
so the java / clojure types we get have to be matched to MBQL types"
{java.lang.String :type/Text
;; JSON itself has the single number type, but Java serde of JSON is stricter
java.lang.Long :type/Integer
java.lang.Integer :type/Integer
java.lang.Double :type/Float
java.lang.Boolean :type/Boolean
clojure.lang.PersistentVector :type/Array
clojure.lang.PersistentArrayMap :type/Structured})
(defn- field-types->fields [field-types]
(let [valid-fields (for [[field-path field-type] (seq field-types)]
(if (nil? field-type)
nil
{:name (str/join " \u2192 " (map name field-path)) ;; right arrow
:database-type nil
:base-type (get field-type-map field-type :type/*)
;; Postgres JSONB field, which gets most usage, doesn't maintain JSON object ordering...
:database-position 0
:nfc-path field-path}))
field-hash (apply hash-set (filter some? valid-fields))]
field-hash))
;; The name's nested field columns but what the people wanted (issue #708)
;; was JSON so what they're getting is JSON.
(defn- describe-nested-field-columns*
[driver spec table]
(with-open [conn (jdbc/get-connection spec)]
......@@ -214,8 +241,10 @@
sql-args (hsql/format {:select json-field-names
:from [(keyword (:name table))]
:limit nested-field-sample-limit} {:quoting :ansi})
query (jdbc/reducible-query spec sql-args)]
{:types (transduce describe-json-xform describe-json-rf query)})))
query (jdbc/reducible-query spec sql-args)
field-types (transduce describe-json-xform describe-json-rf query)
fields (field-types->fields field-types)]
fields)))
;; Describe the nested fields present in a table (currently and maybe forever just JSON),
;; including if they have proper keyword and type stability.
......
......@@ -48,6 +48,17 @@
(assoc metabase-field :nested-fields (set (for [nested-field nested-fields]
(add-nested-fields nested-field parent-id->fields)))))))
(s/defn ^:private add-nested-field-columns :- common/TableMetadataFieldWithID
"Nested field columns are flattened, unlike the ordinary nested fields.
Add the pertinent nested fields to the parent column"
[mb-field :- common/TableMetadataFieldWithID,
nfc-fields :- #{common/TableMetadataFieldWithID}]
(let [column-fields (filter #(= (keyword (mb-field :name))
(get-in % [:nfc-path 0])) nfc-fields)]
(if-not (seq column-fields)
mb-field
(assoc mb-field :nested-fields (set column-fields)))))
(s/defn fields->our-metadata :- #{common/TableMetadataFieldWithID}
"Given a sequence of Metabase Fields, format them and return them in a hierachy so the format matches the one
`db-metadata` comes back in."
......@@ -75,7 +86,6 @@
[table :- i/TableInstance]
(-> table table->fields fields->our-metadata))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | FETCHING METADATA FROM CONNECTED DB |
;;; +----------------------------------------------------------------------------------------------------------------+
......
......@@ -286,16 +286,16 @@
(deftest describe-nested-field-columns-test
(mt/test-driver :postgres
(testing "flatten-row"
(testing "flattened-row"
(let [row {:bob {:dobbs 123 :cobbs "boop"}}
flattened {[:bob :dobbs] 123
[:bob :cobbs] "boop"}]
(is (= (#'postgres/flatten-row row) flattened))))
flattened {[:mob :bob :dobbs] 123
[:mob :bob :cobbs] "boop"}]
(is (= flattened (#'postgres/flattened-row :mob row)))))
(testing "row->types"
(let [row {:bob {:dobbs {:robbs 123} :cobbs [1 2 3]}}
types {:bob {[:cobbs] clojure.lang.PersistentVector
[:dobbs :robbs] java.lang.Long}}]
(is (= (#'postgres/row->types row) types))))
types {[:bob :cobbs] clojure.lang.PersistentVector
[:bob :dobbs :robbs] java.lang.Long}]
(is (= types (#'postgres/row->types row)))))
(testing "describes json columns and gives types for ones with coherent schemas only"
(drop-if-exists-and-create-db! "describe-json-test")
(let [details (mt/dbdef->connection-details :postgres :db {:database-name "describe-json-test"})
......@@ -304,8 +304,25 @@
"INSERT INTO describe_json_table (coherent_json_val, incoherent_json_val) VALUES ('{\"a\": 1, \"b\": 2}', '{\"a\": 1, \"b\": 2}');"
"INSERT INTO describe_json_table (coherent_json_val, incoherent_json_val) VALUES ('{\"a\": 2, \"b\": 3}', '{\"a\": [1, 2], \"b\": 2}');")])
(mt/with-temp Database [database {:engine :postgres, :details details}]
(is (= (into (sorted-map) (sql-jdbc.sync/describe-nested-field-columns :postgres database {:name "describe_json_table"}))
(into (sorted-map) {:types {:coherent_json_val {["a"] java.lang.Integer, ["b"] java.lang.Integer} :incoherent_json_val nil}}))))))))
(is (= '#{{:name "incoherent_json_val → b",
:database-type nil,
:base-type :type/Integer,
:database-position 0,
:nfc-path [:incoherent_json_val "b"]}
{:name "coherent_json_val → a",
:database-type nil,
:base-type :type/Integer,
:database-position 0,
:nfc-path [:coherent_json_val "a"]}
{:name "coherent_json_val → b",
:database-type nil,
:base-type :type/Integer,
:database-position 0,
:nfc-path [:coherent_json_val "b"]}}
(sql-jdbc.sync/describe-nested-field-columns
:postgres
database
{:name "describe_json_table"}))))))))
(mt/defdataset with-uuid
[["users"
......
......@@ -11,6 +11,51 @@
[metabase.util :as u]
[toucan.db :as db]))
(deftest add-nested-field-columns-test
(testing "adds nested field columns for one field"
(let [our-field {:name "coherent_json_val"
:base-type :type/*
:database-type "blah",
:database-position 0,
:id 1}
nfc-fields '#{{:name "incoherent_json_val → b",
:database-type "blah",
:base-type :type/*,
:database-position 0,
:id 2,
:nfc-path [:incoherent_json_val "b"]}
{:name "coherent_json_val → a",
:database-type "blah",
:base-type :type/*,
:database-position 0,
:id 3,
:nfc-path [:coherent_json_val "a"]}
{:name "coherent_json_val → b",
:database-type "blah",
:base-type :type/*,
:database-position 0,
:id 4,
:nfc-path [:coherent_json_val "b"]}}]
(is (= (#'sync-fields.fetch-metadata/add-nested-field-columns our-field nfc-fields)
{:name "coherent_json_val",
:base-type :type/*,
:database-type "blah",
:database-position 0,
:id 1,
:nested-fields
#{{:name "coherent_json_val → b",
:database-type "blah",
:base-type :type/*,
:database-position 0,
:id 4,
:nfc-path [:coherent_json_val "b"]}
{:name "coherent_json_val → a",
:database-type "blah",
:base-type :type/*,
:database-position 0,
:id 3,
:nfc-path [:coherent_json_val "a"]}}})))))
;; `our-metadata` should match up with what we have in the DB
(deftest does-metadata-match-test
(mt/with-temp Database [db {:engine ::toucanery/toucanery}]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment