Skip to content
Snippets Groups Projects
Unverified Commit e71d49da authored by Howon Lee's avatar Howon Lee Committed by GitHub
Browse files

Whack more divergences from JSON column spec (#21543)

Type hierarchy except for dates and arrays fixed. Visibility setting mutation on original columns and field values on deck. Still whacking away at #21534.
parent c0eee45c
No related merge requests found
(ns metabase.driver.postgres
"Database driver for PostgreSQL databases. Builds on top of the SQL JDBC driver, which implements most functionality
for JDBC-based drivers."
(:require [cheshire.core :as json]
[clojure.java.jdbc :as jdbc]
(:require [clojure.java.jdbc :as jdbc]
[clojure.set :as set]
[clojure.string :as str]
[clojure.tools.logging :as log]
......@@ -16,6 +15,7 @@
[metabase.driver.sql-jdbc.connection :as sql-jdbc.conn]
[metabase.driver.sql-jdbc.execute :as sql-jdbc.execute]
[metabase.driver.sql-jdbc.sync :as sql-jdbc.sync]
[metabase.driver.sql-jdbc.sync.describe-table :as sql-jdbc.sync.describe-table]
[metabase.driver.sql.query-processor :as sql.qp]
[metabase.driver.sql.util.unprepare :as unprepare]
[metabase.models.field :as field]
......@@ -176,119 +176,22 @@
(binding [*enum-types* (enum-types driver database)]
(sql-jdbc.sync/describe-table driver database table)))
(def ^:const nested-field-sample-limit
"Number of rows to sample for describe-nested-field-columns"
10000)
(def ^:const max-nested-field-columns
"Maximum number of nested field columns."
100)
(defn- flattened-row [field-name row]
(letfn [(flatten-row [row path]
(lazy-seq
(when-let [[[k v] & xs] (seq row)]
(cond (and (map? v) (not-empty v))
(into (flatten-row v (conj path k))
(flatten-row xs path))
:else
(cons [(conj path k) v]
(flatten-row xs path))))))]
(into {} (flatten-row row [field-name]))))
(defn- row->types [row]
(into {} (for [[field-name field-val] row]
(let [flat-row (flattened-row field-name field-val)]
(into {} (map (fn [[k v]] [k (type v)]) flat-row))))))
(defn- describe-json-xform [member]
((comp (map #(for [[k v] %] [k (json/parse-string v)]))
(map #(into {} %))
(map row->types)) member))
(defn- describe-json-rf
([] nil)
([fst] fst)
([fst snd]
(into {}
(for [json-column (keys snd)]
(cond
(or (nil? fst) (= (hash (fst json-column)) (hash (snd json-column))))
[json-column (snd json-column)]
;; Not too much complexity in type hierarchy because
;; there's not too much complexity in JSON's types
(every? #{java.lang.Long java.lang.Integer} [(fst json-column) (snd json-column)])
[json-column java.lang.Long]
(every? #{java.lang.String java.lang.Long java.lang.Integer java.lang.Double java.lang.Boolean}
[(fst json-column) (snd json-column)])
[json-column java.lang.String]
:else
[json-column nil])))))
(def ^:const field-type-map
"We deserialize the JSON in order to determine types,
so the java / clojure types we get have to be matched to MBQL types"
{java.lang.String :type/Text
;; JSON itself has the single number type, but Java serde of JSON is stricter
java.lang.Long :type/Integer
java.lang.Integer :type/Integer
java.lang.Double :type/Float
java.lang.Boolean :type/Boolean
clojure.lang.PersistentVector :type/Array
clojure.lang.PersistentArrayMap :type/Structured})
(defn- field-types->fields [field-types]
(let [valid-fields (for [[field-path field-type] (seq field-types)]
(if (nil? field-type)
nil
{:name (str/join " \u2192 " (map name field-path)) ;; right arrow
:database-type nil
:base-type (get field-type-map field-type :type/*)
;; Postgres JSONB field, which gets most usage, doesn't maintain JSON object ordering...
:database-position 0
:nfc-path field-path}))
field-hash (apply hash-set (filter some? valid-fields))]
field-hash))
;; The name's nested field columns but what the people wanted (issue #708)
;; was JSON so what they're getting is JSON.
(defn- describe-nested-field-columns*
[driver spec table]
(with-open [conn (jdbc/get-connection spec)]
(let [map-inner (fn [f xs] (map #(into {}
(for [[k v] %]
[k (f v)])) xs))
table-fields (sql-jdbc.sync/describe-table-fields driver conn table)
json-fields (filter #(= (:semantic-type %) :type/SerializedJSON) table-fields)]
(if (nil? (seq json-fields))
#{}
(let [json-field-names (mapv (comp keyword :name) json-fields)
sql-args (hsql/format {:select json-field-names
:from [(keyword (:name table))]
:limit nested-field-sample-limit} {:quoting :ansi})
query (jdbc/reducible-query spec sql-args)
field-types (transduce describe-json-xform describe-json-rf query)
fields (field-types->fields field-types)]
fields)))))
;; Describe the nested fields present in a table (currently and maybe forever just JSON),
;; including if they have proper keyword and type stability.
;; Not to be confused with existing nested field functionality for mongo,
;; since this one only applies to JSON fields, whereas mongo only has BSON (JSON basically) fields.
;; Every single database major is fiddly and weird and different about JSON so there's only a trivial default impl in sql.jdbc
(defmethod sql-jdbc.sync/describe-nested-field-columns :postgres
[driver database table]
(let [spec (sql-jdbc.conn/db->pooled-connection-spec database)
fields (describe-nested-field-columns* driver spec table)]
fields (sql-jdbc.sync.describe-table/describe-nested-field-columns driver spec table)]
(if (> (count fields) max-nested-field-columns)
#{}
fields)))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | metabase.driver.sql impls |
;;; +----------------------------------------------------------------------------------------------------------------+
......
(ns metabase.driver.sql-jdbc.sync.describe-table
"SQL JDBC impl for `describe-table` and `describe-table-fks`."
(:require [clojure.java.jdbc :as jdbc]
"SQL JDBC impl for `describe-table`, `describe-table-fks`, and `describe-nested-field-columns`."
(:require [cheshire.core :as json]
[clojure.java.jdbc :as jdbc]
[clojure.set :as set]
[clojure.string :as str]
[clojure.tools.logging :as log]
[honeysql.core :as hsql]
[medley.core :as m]
[metabase.driver :as driver]
[metabase.driver.sql-jdbc.connection :as sql-jdbc.conn]
......@@ -189,3 +192,106 @@
(let [spec (sql-jdbc.conn/db->pooled-connection-spec db-or-id-or-spec-or-conn)]
(with-open [conn (jdbc/get-connection spec)]
(describe-table-fks* driver conn table db-name-or-nil)))))
(def ^:const nested-field-sample-limit
"Number of rows to sample for describe-nested-field-columns"
10000)
(defn- flattened-row [field-name row]
(letfn [(flatten-row [row path]
(lazy-seq
(when-let [[[k v] & xs] (seq row)]
(cond (and (map? v) (not-empty v))
(into (flatten-row v (conj path k))
(flatten-row xs path))
:else
(cons [(conj path k) v]
(flatten-row xs path))))))]
(into {} (flatten-row row [field-name]))))
(defn- row->types [row]
(into {} (for [[field-name field-val] row]
(let [flat-row (flattened-row field-name field-val)]
(into {} (map (fn [[k v]] [k (type v)]) flat-row))))))
(defn- describe-json-xform [member]
((comp (map #(for [[k v] %] [k (json/parse-string v)]))
(map #(into {} %))
(map row->types)) member))
(defn- describe-json-rf
([] nil)
([fst] fst)
([fst snd]
(into {}
(for [json-column (set/union (keys snd) (keys fst))]
(cond
(or (nil? fst)
(nil? (fst json-column))
(= (hash (fst json-column)) (hash (snd json-column))))
[json-column (snd json-column)]
(or (nil? snd)
(nil? (snd json-column)))
[json-column (fst json-column)]
(every? #(isa? % Number) [(fst json-column) (snd json-column)])
[json-column java.lang.Number]
(every? #{java.lang.String java.lang.Long java.lang.Integer java.lang.Double java.lang.Boolean}
[(fst json-column) (snd json-column)])
[json-column java.lang.String]
:else
[json-column nil])))))
(def ^:const field-type-map
"We deserialize the JSON in order to determine types,
so the java / clojure types we get have to be matched to MBQL types"
{java.lang.String :type/Text
;; JSON itself has the single number type, but Java serde of JSON is stricter
java.lang.Long :type/Integer
java.lang.Integer :type/Integer
java.lang.Double :type/Float
java.lang.Number :type/Number
java.lang.Boolean :type/Boolean
clojure.lang.PersistentVector :type/Array
clojure.lang.PersistentArrayMap :type/Structured})
(defn- field-types->fields [field-types]
(let [valid-fields (for [[field-path field-type] (seq field-types)]
(if (nil? field-type)
nil
(let [curr-type (get field-type-map field-type :type/*)]
{:name (str/join " \u2192 " (map name field-path)) ;; right arrow
:database-type curr-type
:base-type curr-type
;; Postgres JSONB field, which gets most usage, doesn't maintain JSON object ordering...
:database-position 0
:visibility-type :normal
:nfc-path field-path})))
field-hash (apply hash-set (filter some? valid-fields))]
field-hash))
;; The name's nested field columns but what the people wanted (issue #708)
;; was JSON so what they're getting is JSON.
(defn describe-nested-field-columns
"Default implementation of `describe-nested-field-columns` for SQL JDBC drivers. Goes and queries the table if there are JSON columns for the nested contents."
[driver spec table]
(with-open [conn (jdbc/get-connection spec)]
(let [map-inner (fn [f xs] (map #(into {}
(for [[k v] %]
[k (f v)])) xs))
table-fields (describe-table-fields driver conn table)
json-fields (filter #(= (:semantic-type %) :type/SerializedJSON) table-fields)]
(if (nil? (seq json-fields))
#{}
(let [json-field-names (mapv (comp keyword :name) json-fields)
sql-args (hsql/format {:select json-field-names
:from [(keyword (:name table))]
:limit nested-field-sample-limit} {:quoting :ansi})
query (jdbc/reducible-query spec sql-args)
field-types (transduce describe-json-xform describe-json-rf query)
fields (field-types->fields field-types)]
fields)))))
......@@ -111,6 +111,3 @@
{:added "0.43.0", :arglists '([driver database table])}
driver/dispatch-on-initialized-driver
:hierarchy #'driver/hierarchy)
(defmethod describe-nested-field-columns :sql-jdbc [_ _ _]
nil)
......@@ -44,6 +44,10 @@
:fields #{TableMetadataField}
(s/optional-key :description) (s/maybe su/NonBlankString)})
(def NestedFCMetadata
"Schema for the expected output of `describe-nested-field-columns`."
(s/maybe #{TableMetadataField}))
(def FKMetadataEntry
"Schema for an individual entry in `FKMetadata`."
{:fk-column-name su/NonBlankString
......
......@@ -299,49 +299,58 @@
(deftest describe-nested-field-columns-test
(mt/test-driver :postgres
(testing "flattened-row"
(let [row {:bob {:dobbs 123 :cobbs "boop"}}
flattened {[:mob :bob :dobbs] 123
[:mob :bob :cobbs] "boop"}]
(is (= flattened (#'postgres/flattened-row :mob row)))))
(testing "row->types"
(let [row {:bob {:dobbs {:robbs 123} :cobbs [1 2 3]}}
types {[:bob :cobbs] clojure.lang.PersistentVector
[:bob :dobbs :robbs] java.lang.Long}]
(is (= types (#'postgres/row->types row)))))
(testing "describes json columns and gives types for ones with coherent schemas only"
(drop-if-exists-and-create-db! "describe-json-test")
(let [details (mt/dbdef->connection-details :postgres :db {:database-name "describe-json-test"})
spec (sql-jdbc.conn/connection-details->spec :postgres details)]
(jdbc/execute! spec [(str "CREATE TABLE describe_json_table (coherent_json_val JSON NOT NULL, incoherent_json_val JSON NOT NULL);"
"INSERT INTO describe_json_table (coherent_json_val, incoherent_json_val) VALUES ('{\"a\": 1, \"b\": 2}', '{\"a\": 1, \"b\": 2}');"
"INSERT INTO describe_json_table (coherent_json_val, incoherent_json_val) VALUES ('{\"a\": 2, \"b\": 3}', '{\"a\": [1, 2], \"b\": \"blurgle\"}');")])
(jdbc/with-db-connection [conn (sql-jdbc.conn/connection-details->spec :postgres details)]
(jdbc/execute! spec [(str "CREATE TABLE describe_json_table (coherent_json_val JSON NOT NULL, incoherent_json_val JSON NOT NULL);"
"INSERT INTO describe_json_table (coherent_json_val, incoherent_json_val) VALUES ('{\"a\": 1, \"b\": 2}', '{\"a\": 1, \"b\": 2, \"c\": 3, \"d\": 44}');"
"INSERT INTO describe_json_table (coherent_json_val, incoherent_json_val) VALUES ('{\"a\": 2, \"b\": 3}', '{\"a\": [1, 2], \"b\": \"blurgle\", \"c\": 3.22}');")]))
(mt/with-temp Database [database {:engine :postgres, :details details}]
(is (= :type/SerializedJSON
(->> (sql-jdbc.sync/describe-table :postgres database {:name "describe_json_table"})
(:fields)
(:take 1)
(first)
(:semantic-type))))
(:fields)
(:take 1)
(first)
(:semantic-type))))
(is (= '#{{:name "incoherent_json_val → b",
:database-type nil,
:database-type :type/Text,
:base-type :type/Text,
:database-position 0,
:nfc-path [:incoherent_json_val "b"]}
:nfc-path [:incoherent_json_val "b"]
:visibility-type :normal}
{:name "coherent_json_val → a",
:database-type nil,
:database-type :type/Integer,
:base-type :type/Integer,
:database-position 0,
:nfc-path [:coherent_json_val "a"]}
:nfc-path [:coherent_json_val "a"]
:visibility-type :normal}
{:name "coherent_json_val → b",
:database-type nil,
:database-type :type/Integer,
:base-type :type/Integer,
:database-position 0,
:nfc-path [:coherent_json_val "b"]}}
:nfc-path [:coherent_json_val "b"]
:visibility-type :normal}
{:name "incoherent_json_val → c",
:database-type :type/Number,
:base-type :type/Number,
:database-position 0,
:visibility-type :normal,
:nfc-path [:incoherent_json_val "c"]}
{:name "incoherent_json_val → d",
:database-type :type/Integer,
:base-type :type/Integer,
:database-position 0,
:visibility-type :normal,
:nfc-path [:incoherent_json_val "d"]}}
(sql-jdbc.sync/describe-nested-field-columns
:postgres
database
{:name "describe_json_table"}))))))
:postgres
database
{:name "describe_json_table"}))))))))
(deftest describe-big-nested-field-columns-test
(mt/test-driver :postgres
(testing "blank out if huge. blank out instead of silently limiting"
(drop-if-exists-and-create-db! "big-json-test")
(let [details (mt/dbdef->connection-details :postgres :db {:database-name "big-json-test"})
......@@ -350,7 +359,8 @@
big-json (json/generate-string big-map)
sql (str "CREATE TABLE big_json_table (big_json JSON NOT NULL);"
(format "INSERT INTO big_json_table (big_json) VALUES ('%s');" big-json))]
(jdbc/execute! spec [sql])
(jdbc/with-db-connection [conn (sql-jdbc.conn/connection-details->spec :postgres details)]
(jdbc/execute! spec [sql]))
(mt/with-temp Database [database {:engine :postgres, :details details}]
(is (= #{}
(sql-jdbc.sync/describe-nested-field-columns
......
......@@ -68,3 +68,15 @@
:fields
(filter :semantic-type)
(map (juxt (comp str/lower-case :name) :semantic-type))))))))
(deftest describe-nested-field-columns-test
(testing "flattened-row"
(let [row {:bob {:dobbs 123 :cobbs "boop"}}
flattened {[:mob :bob :dobbs] 123
[:mob :bob :cobbs] "boop"}]
(is (= flattened (#'describe-table/flattened-row :mob row)))))
(testing "row->types"
(let [row {:bob {:dobbs {:robbs 123} :cobbs [1 2 3]}}
types {[:bob :cobbs] clojure.lang.PersistentVector
[:bob :dobbs :robbs] java.lang.Long}]
(is (= types (#'describe-table/row->types row))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment