From 407aba6c3869fc23afef586b798400f884f4b420 Mon Sep 17 00:00:00 2001 From: dpsutton <dan@dpsutton.com> Date: Thu, 19 Nov 2020 15:31:25 -0600 Subject: [PATCH] Don't substring json fields in postgres (#13850) --- src/metabase/db/metadata_queries.clj | 11 ++++++++++- test/metabase/db/metadata_queries_test.clj | 17 +++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/metabase/db/metadata_queries.clj b/src/metabase/db/metadata_queries.clj index d1aff665493..33f1161b78e 100644 --- a/src/metabase/db/metadata_queries.clj +++ b/src/metabase/db/metadata_queries.clj @@ -96,11 +96,20 @@ (s/maybe {(s/optional-key :truncation-size) s/Int (s/optional-key :rff) s/Any})) +(defn- text-field? + "Identify text fields which can accept our substring optimization. + + JSON and XML fields are now marked as `:type/Structured` but in the past were marked as `:type/Text` so its not + enough to just check the base type." + [{:keys [base_type special_type]}] + (and (= base_type :type/Text) + (not (isa? special_type :type/Structured)))) + (defn- table-rows-sample-query "Returns the mbql query to query a table for sample rows" [table fields {:keys [truncation-size] :as _opts}] (let [driver (-> table table/database driver.u/database->driver) - text-fields (filter (comp #{:type/Text} :base_type) fields) + text-fields (filter text-field? fields) field->expressions (when (and truncation-size (driver/supports? driver :expressions)) (into {} (for [field text-fields] [field [(str (gensym "substring")) diff --git a/test/metabase/db/metadata_queries_test.clj b/test/metabase/db/metadata_queries_test.clj index 432d7fb29c2..68a7a38a640 100644 --- a/test/metabase/db/metadata_queries_test.clj +++ b/test/metabase/db/metadata_queries_test.clj @@ -72,5 +72,22 @@ (is (seq (get-in query [:query :expressions])))))) (testing "doesnt' use substrings if driver doesn't support expressions" (with-redefs [driver/supports? (constantly false)] + (let [query (#'metadata-queries/table-rows-sample-query table fields {:truncation-size 4})] + (is (empty? (get-in query [:query :expressions]))))))) + (testing "pre-existing json fields are still marked as `:type/Text`" + (let [table (table/map->TableInstance {:id 1234}) + fields [(field/map->FieldInstance {:id 4321, :base_type :type/Text, :special_type :type/SerializedJSON})]] + (with-redefs [driver/supports? (constantly true)] (let [query (#'metadata-queries/table-rows-sample-query table fields {:truncation-size 4})] (is (empty? (get-in query [:query :expressions])))))))))) + +(deftest text-field?-test + (testing "recognizes fields suitable for fingerprinting" + (doseq [field [{:base_type :type/Text} + {:base_type :type/Text :special_type :type/State} + {:base_type :type/Text :special_type :type/URL}]] + (is (#'metadata-queries/text-field? field))) + (doseq [field [{:base_type :type/Structured} ; json fields in pg + {:base_type :type/Text :special_type :type/SerializedJSON} ; "legacy" json fields in pg + {:base_type :type/Text :special_type :type/XML}]] + (is (not (#'metadata-queries/text-field? field)))))) -- GitLab