Skip to content
Snippets Groups Projects
Unverified Commit 407aba6c authored by dpsutton's avatar dpsutton Committed by GitHub
Browse files

Don't substring json fields in postgres (#13850)

parent 79134b6b
No related branches found
No related tags found
No related merge requests found
......@@ -96,11 +96,20 @@
(s/maybe {(s/optional-key :truncation-size) s/Int
(s/optional-key :rff) s/Any}))
(defn- text-field?
"Identify text fields which can accept our substring optimization.
JSON and XML fields are now marked as `:type/Structured` but in the past were marked as `:type/Text` so its not
enough to just check the base type."
[{:keys [base_type special_type]}]
(and (= base_type :type/Text)
(not (isa? special_type :type/Structured))))
(defn- table-rows-sample-query
"Returns the mbql query to query a table for sample rows"
[table fields {:keys [truncation-size] :as _opts}]
(let [driver (-> table table/database driver.u/database->driver)
text-fields (filter (comp #{:type/Text} :base_type) fields)
text-fields (filter text-field? fields)
field->expressions (when (and truncation-size (driver/supports? driver :expressions))
(into {} (for [field text-fields]
[field [(str (gensym "substring"))
......
......@@ -72,5 +72,22 @@
(is (seq (get-in query [:query :expressions]))))))
(testing "doesnt' use substrings if driver doesn't support expressions"
(with-redefs [driver/supports? (constantly false)]
(let [query (#'metadata-queries/table-rows-sample-query table fields {:truncation-size 4})]
(is (empty? (get-in query [:query :expressions])))))))
(testing "pre-existing json fields are still marked as `:type/Text`"
(let [table (table/map->TableInstance {:id 1234})
fields [(field/map->FieldInstance {:id 4321, :base_type :type/Text, :special_type :type/SerializedJSON})]]
(with-redefs [driver/supports? (constantly true)]
(let [query (#'metadata-queries/table-rows-sample-query table fields {:truncation-size 4})]
(is (empty? (get-in query [:query :expressions]))))))))))
(deftest text-field?-test
(testing "recognizes fields suitable for fingerprinting"
(doseq [field [{:base_type :type/Text}
{:base_type :type/Text :special_type :type/State}
{:base_type :type/Text :special_type :type/URL}]]
(is (#'metadata-queries/text-field? field)))
(doseq [field [{:base_type :type/Structured} ; json fields in pg
{:base_type :type/Text :special_type :type/SerializedJSON} ; "legacy" json fields in pg
{:base_type :type/Text :special_type :type/XML}]]
(is (not (#'metadata-queries/text-field? field))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment