Skip to content
Snippets Groups Projects
Unverified Commit e71d49da authored by Howon Lee's avatar Howon Lee Committed by GitHub
Browse files

Whack more divergences from JSON column spec (#21543)

Type hierarchy except for dates and arrays fixed. Visibility setting mutation on original columns and field values on deck. Still whacking away at #21534.
parent c0eee45c
No related branches found
No related tags found
No related merge requests found
(ns metabase.driver.postgres
"Database driver for PostgreSQL databases. Builds on top of the SQL JDBC driver, which implements most functionality
for JDBC-based drivers."
(:require [cheshire.core :as json]
[clojure.java.jdbc :as jdbc]
(:require [clojure.java.jdbc :as jdbc]
[clojure.set :as set]
[clojure.string :as str]
[clojure.tools.logging :as log]
......@@ -16,6 +15,7 @@
[metabase.driver.sql-jdbc.connection :as sql-jdbc.conn]
[metabase.driver.sql-jdbc.execute :as sql-jdbc.execute]
[metabase.driver.sql-jdbc.sync :as sql-jdbc.sync]
[metabase.driver.sql-jdbc.sync.describe-table :as sql-jdbc.sync.describe-table]
[metabase.driver.sql.query-processor :as sql.qp]
[metabase.driver.sql.util.unprepare :as unprepare]
[metabase.models.field :as field]
......@@ -176,119 +176,22 @@
(binding [*enum-types* (enum-types driver database)]
(sql-jdbc.sync/describe-table driver database table)))
(def ^:const nested-field-sample-limit
"Number of rows to sample for describe-nested-field-columns"
10000)
(def ^:const max-nested-field-columns
"Maximum number of nested field columns."
100)
(defn- flattened-row [field-name row]
(letfn [(flatten-row [row path]
(lazy-seq
(when-let [[[k v] & xs] (seq row)]
(cond (and (map? v) (not-empty v))
(into (flatten-row v (conj path k))
(flatten-row xs path))
:else
(cons [(conj path k) v]
(flatten-row xs path))))))]
(into {} (flatten-row row [field-name]))))
(defn- row->types [row]
(into {} (for [[field-name field-val] row]
(let [flat-row (flattened-row field-name field-val)]
(into {} (map (fn [[k v]] [k (type v)]) flat-row))))))
(defn- describe-json-xform [member]
((comp (map #(for [[k v] %] [k (json/parse-string v)]))
(map #(into {} %))
(map row->types)) member))
(defn- describe-json-rf
([] nil)
([fst] fst)
([fst snd]
(into {}
(for [json-column (keys snd)]
(cond
(or (nil? fst) (= (hash (fst json-column)) (hash (snd json-column))))
[json-column (snd json-column)]
;; Not too much complexity in type hierarchy because
;; there's not too much complexity in JSON's types
(every? #{java.lang.Long java.lang.Integer} [(fst json-column) (snd json-column)])
[json-column java.lang.Long]
(every? #{java.lang.String java.lang.Long java.lang.Integer java.lang.Double java.lang.Boolean}
[(fst json-column) (snd json-column)])
[json-column java.lang.String]
:else
[json-column nil])))))
(def ^:const field-type-map
"We deserialize the JSON in order to determine types,
so the java / clojure types we get have to be matched to MBQL types"
{java.lang.String :type/Text
;; JSON itself has the single number type, but Java serde of JSON is stricter
java.lang.Long :type/Integer
java.lang.Integer :type/Integer
java.lang.Double :type/Float
java.lang.Boolean :type/Boolean
clojure.lang.PersistentVector :type/Array
clojure.lang.PersistentArrayMap :type/Structured})
(defn- field-types->fields [field-types]
(let [valid-fields (for [[field-path field-type] (seq field-types)]
(if (nil? field-type)
nil
{:name (str/join " \u2192 " (map name field-path)) ;; right arrow
:database-type nil
:base-type (get field-type-map field-type :type/*)
;; Postgres JSONB field, which gets most usage, doesn't maintain JSON object ordering...
:database-position 0
:nfc-path field-path}))
field-hash (apply hash-set (filter some? valid-fields))]
field-hash))
;; The name's nested field columns but what the people wanted (issue #708)
;; was JSON so what they're getting is JSON.
(defn- describe-nested-field-columns*
[driver spec table]
(with-open [conn (jdbc/get-connection spec)]
(let [map-inner (fn [f xs] (map #(into {}
(for [[k v] %]
[k (f v)])) xs))
table-fields (sql-jdbc.sync/describe-table-fields driver conn table)
json-fields (filter #(= (:semantic-type %) :type/SerializedJSON) table-fields)]
(if (nil? (seq json-fields))
#{}
(let [json-field-names (mapv (comp keyword :name) json-fields)
sql-args (hsql/format {:select json-field-names
:from [(keyword (:name table))]
:limit nested-field-sample-limit} {:quoting :ansi})
query (jdbc/reducible-query spec sql-args)
field-types (transduce describe-json-xform describe-json-rf query)
fields (field-types->fields field-types)]
fields)))))
;; Describe the nested fields present in a table (currently and maybe forever just JSON),
;; including if they have proper keyword and type stability.
;; Not to be confused with existing nested field functionality for mongo,
;; since this one only applies to JSON fields, whereas mongo only has BSON (JSON basically) fields.
;; Every single database major is fiddly and weird and different about JSON so there's only a trivial default impl in sql.jdbc
(defmethod sql-jdbc.sync/describe-nested-field-columns :postgres
[driver database table]
(let [spec (sql-jdbc.conn/db->pooled-connection-spec database)
fields (describe-nested-field-columns* driver spec table)]
fields (sql-jdbc.sync.describe-table/describe-nested-field-columns driver spec table)]
(if (> (count fields) max-nested-field-columns)
#{}
fields)))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | metabase.driver.sql impls |
;;; +----------------------------------------------------------------------------------------------------------------+
......
(ns metabase.driver.sql-jdbc.sync.describe-table
"SQL JDBC impl for `describe-table` and `describe-table-fks`."
(:require [clojure.java.jdbc :as jdbc]
"SQL JDBC impl for `describe-table`, `describe-table-fks`, and `describe-nested-field-columns`."
(:require [cheshire.core :as json]
[clojure.java.jdbc :as jdbc]
[clojure.set :as set]
[clojure.string :as str]
[clojure.tools.logging :as log]
[honeysql.core :as hsql]
[medley.core :as m]
[metabase.driver :as driver]
[metabase.driver.sql-jdbc.connection :as sql-jdbc.conn]
......@@ -189,3 +192,106 @@
(let [spec (sql-jdbc.conn/db->pooled-connection-spec db-or-id-or-spec-or-conn)]
(with-open [conn (jdbc/get-connection spec)]
(describe-table-fks* driver conn table db-name-or-nil)))))
(def ^:const nested-field-sample-limit
"Number of rows to sample for describe-nested-field-columns"
10000)
(defn- flattened-row [field-name row]
(letfn [(flatten-row [row path]
(lazy-seq
(when-let [[[k v] & xs] (seq row)]
(cond (and (map? v) (not-empty v))
(into (flatten-row v (conj path k))
(flatten-row xs path))
:else
(cons [(conj path k) v]
(flatten-row xs path))))))]
(into {} (flatten-row row [field-name]))))
(defn- row->types [row]
(into {} (for [[field-name field-val] row]
(let [flat-row (flattened-row field-name field-val)]
(into {} (map (fn [[k v]] [k (type v)]) flat-row))))))
(defn- describe-json-xform [member]
((comp (map #(for [[k v] %] [k (json/parse-string v)]))
(map #(into {} %))
(map row->types)) member))
(defn- describe-json-rf
([] nil)
([fst] fst)
([fst snd]
(into {}
(for [json-column (set/union (keys snd) (keys fst))]
(cond
(or (nil? fst)
(nil? (fst json-column))
(= (hash (fst json-column)) (hash (snd json-column))))
[json-column (snd json-column)]
(or (nil? snd)
(nil? (snd json-column)))
[json-column (fst json-column)]
(every? #(isa? % Number) [(fst json-column) (snd json-column)])
[json-column java.lang.Number]
(every? #{java.lang.String java.lang.Long java.lang.Integer java.lang.Double java.lang.Boolean}
[(fst json-column) (snd json-column)])
[json-column java.lang.String]
:else
[json-column nil])))))
(def ^:const field-type-map
"We deserialize the JSON in order to determine types,
so the java / clojure types we get have to be matched to MBQL types"
{java.lang.String :type/Text
;; JSON itself has the single number type, but Java serde of JSON is stricter
java.lang.Long :type/Integer
java.lang.Integer :type/Integer
java.lang.Double :type/Float
java.lang.Number :type/Number
java.lang.Boolean :type/Boolean
clojure.lang.PersistentVector :type/Array
clojure.lang.PersistentArrayMap :type/Structured})
(defn- field-types->fields [field-types]
(let [valid-fields (for [[field-path field-type] (seq field-types)]
(if (nil? field-type)
nil
(let [curr-type (get field-type-map field-type :type/*)]
{:name (str/join " \u2192 " (map name field-path)) ;; right arrow
:database-type curr-type
:base-type curr-type
;; Postgres JSONB field, which gets most usage, doesn't maintain JSON object ordering...
:database-position 0
:visibility-type :normal
:nfc-path field-path})))
field-hash (apply hash-set (filter some? valid-fields))]
field-hash))
;; The name's nested field columns but what the people wanted (issue #708)
;; was JSON so what they're getting is JSON.
(defn describe-nested-field-columns
"Default implementation of `describe-nested-field-columns` for SQL JDBC drivers. Goes and queries the table if there are JSON columns for the nested contents."
[driver spec table]
(with-open [conn (jdbc/get-connection spec)]
(let [map-inner (fn [f xs] (map #(into {}
(for [[k v] %]
[k (f v)])) xs))
table-fields (describe-table-fields driver conn table)
json-fields (filter #(= (:semantic-type %) :type/SerializedJSON) table-fields)]
(if (nil? (seq json-fields))
#{}
(let [json-field-names (mapv (comp keyword :name) json-fields)
sql-args (hsql/format {:select json-field-names
:from [(keyword (:name table))]
:limit nested-field-sample-limit} {:quoting :ansi})
query (jdbc/reducible-query spec sql-args)
field-types (transduce describe-json-xform describe-json-rf query)
fields (field-types->fields field-types)]
fields)))))
......@@ -111,6 +111,3 @@
{:added "0.43.0", :arglists '([driver database table])}
driver/dispatch-on-initialized-driver
:hierarchy #'driver/hierarchy)
(defmethod describe-nested-field-columns :sql-jdbc [_ _ _]
nil)
......@@ -44,6 +44,10 @@
:fields #{TableMetadataField}
(s/optional-key :description) (s/maybe su/NonBlankString)})
(def NestedFCMetadata
"Schema for the expected output of `describe-nested-field-columns`."
(s/maybe #{TableMetadataField}))
(def FKMetadataEntry
"Schema for an individual entry in `FKMetadata`."
{:fk-column-name su/NonBlankString
......
......@@ -299,49 +299,58 @@
(deftest describe-nested-field-columns-test
(mt/test-driver :postgres
(testing "flattened-row"
(let [row {:bob {:dobbs 123 :cobbs "boop"}}
flattened {[:mob :bob :dobbs] 123
[:mob :bob :cobbs] "boop"}]
(is (= flattened (#'postgres/flattened-row :mob row)))))
(testing "row->types"
(let [row {:bob {:dobbs {:robbs 123} :cobbs [1 2 3]}}
types {[:bob :cobbs] clojure.lang.PersistentVector
[:bob :dobbs :robbs] java.lang.Long}]
(is (= types (#'postgres/row->types row)))))
(testing "describes json columns and gives types for ones with coherent schemas only"
(drop-if-exists-and-create-db! "describe-json-test")
(let [details (mt/dbdef->connection-details :postgres :db {:database-name "describe-json-test"})
spec (sql-jdbc.conn/connection-details->spec :postgres details)]
(jdbc/execute! spec [(str "CREATE TABLE describe_json_table (coherent_json_val JSON NOT NULL, incoherent_json_val JSON NOT NULL);"
"INSERT INTO describe_json_table (coherent_json_val, incoherent_json_val) VALUES ('{\"a\": 1, \"b\": 2}', '{\"a\": 1, \"b\": 2}');"
"INSERT INTO describe_json_table (coherent_json_val, incoherent_json_val) VALUES ('{\"a\": 2, \"b\": 3}', '{\"a\": [1, 2], \"b\": \"blurgle\"}');")])
(jdbc/with-db-connection [conn (sql-jdbc.conn/connection-details->spec :postgres details)]
(jdbc/execute! spec [(str "CREATE TABLE describe_json_table (coherent_json_val JSON NOT NULL, incoherent_json_val JSON NOT NULL);"
"INSERT INTO describe_json_table (coherent_json_val, incoherent_json_val) VALUES ('{\"a\": 1, \"b\": 2}', '{\"a\": 1, \"b\": 2, \"c\": 3, \"d\": 44}');"
"INSERT INTO describe_json_table (coherent_json_val, incoherent_json_val) VALUES ('{\"a\": 2, \"b\": 3}', '{\"a\": [1, 2], \"b\": \"blurgle\", \"c\": 3.22}');")]))
(mt/with-temp Database [database {:engine :postgres, :details details}]
(is (= :type/SerializedJSON
(->> (sql-jdbc.sync/describe-table :postgres database {:name "describe_json_table"})
(:fields)
(:take 1)
(first)
(:semantic-type))))
(:fields)
(:take 1)
(first)
(:semantic-type))))
(is (= '#{{:name "incoherent_json_val → b",
:database-type nil,
:database-type :type/Text,
:base-type :type/Text,
:database-position 0,
:nfc-path [:incoherent_json_val "b"]}
:nfc-path [:incoherent_json_val "b"]
:visibility-type :normal}
{:name "coherent_json_val → a",
:database-type nil,
:database-type :type/Integer,
:base-type :type/Integer,
:database-position 0,
:nfc-path [:coherent_json_val "a"]}
:nfc-path [:coherent_json_val "a"]
:visibility-type :normal}
{:name "coherent_json_val → b",
:database-type nil,
:database-type :type/Integer,
:base-type :type/Integer,
:database-position 0,
:nfc-path [:coherent_json_val "b"]}}
:nfc-path [:coherent_json_val "b"]
:visibility-type :normal}
{:name "incoherent_json_val → c",
:database-type :type/Number,
:base-type :type/Number,
:database-position 0,
:visibility-type :normal,
:nfc-path [:incoherent_json_val "c"]}
{:name "incoherent_json_val → d",
:database-type :type/Integer,
:base-type :type/Integer,
:database-position 0,
:visibility-type :normal,
:nfc-path [:incoherent_json_val "d"]}}
(sql-jdbc.sync/describe-nested-field-columns
:postgres
database
{:name "describe_json_table"}))))))
:postgres
database
{:name "describe_json_table"}))))))))
(deftest describe-big-nested-field-columns-test
(mt/test-driver :postgres
(testing "blank out if huge. blank out instead of silently limiting"
(drop-if-exists-and-create-db! "big-json-test")
(let [details (mt/dbdef->connection-details :postgres :db {:database-name "big-json-test"})
......@@ -350,7 +359,8 @@
big-json (json/generate-string big-map)
sql (str "CREATE TABLE big_json_table (big_json JSON NOT NULL);"
(format "INSERT INTO big_json_table (big_json) VALUES ('%s');" big-json))]
(jdbc/execute! spec [sql])
(jdbc/with-db-connection [conn (sql-jdbc.conn/connection-details->spec :postgres details)]
(jdbc/execute! spec [sql]))
(mt/with-temp Database [database {:engine :postgres, :details details}]
(is (= #{}
(sql-jdbc.sync/describe-nested-field-columns
......
......@@ -68,3 +68,15 @@
:fields
(filter :semantic-type)
(map (juxt (comp str/lower-case :name) :semantic-type))))))))
(deftest describe-nested-field-columns-test
(testing "flattened-row"
(let [row {:bob {:dobbs 123 :cobbs "boop"}}
flattened {[:mob :bob :dobbs] 123
[:mob :bob :cobbs] "boop"}]
(is (= flattened (#'describe-table/flattened-row :mob row)))))
(testing "row->types"
(let [row {:bob {:dobbs {:robbs 123} :cobbs [1 2 3]}}
types {[:bob :cobbs] clojure.lang.PersistentVector
[:bob :dobbs :robbs] java.lang.Long}]
(is (= types (#'describe-table/row->types row))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment