From 407aba6c3869fc23afef586b798400f884f4b420 Mon Sep 17 00:00:00 2001
From: dpsutton <dan@dpsutton.com>
Date: Thu, 19 Nov 2020 15:31:25 -0600
Subject: [PATCH] Don't substring json fields in postgres (#13850)

---
 src/metabase/db/metadata_queries.clj       | 11 ++++++++++-
 test/metabase/db/metadata_queries_test.clj | 17 +++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/src/metabase/db/metadata_queries.clj b/src/metabase/db/metadata_queries.clj
index d1aff665493..33f1161b78e 100644
--- a/src/metabase/db/metadata_queries.clj
+++ b/src/metabase/db/metadata_queries.clj
@@ -96,11 +96,20 @@
   (s/maybe {(s/optional-key :truncation-size) s/Int
             (s/optional-key :rff)             s/Any}))
 
+(defn- text-field?
+  "Identify text fields which can accept our substring optimization.
+
+  JSON and XML fields are now marked as `:type/Structured` but in the past were marked as `:type/Text` so its not
+  enough to just check the base type."
+  [{:keys [base_type special_type]}]
+  (and (= base_type :type/Text)
+       (not (isa? special_type :type/Structured))))
+
 (defn- table-rows-sample-query
   "Returns the mbql query to query a table for sample rows"
   [table fields {:keys [truncation-size] :as _opts}]
   (let [driver             (-> table table/database driver.u/database->driver)
-        text-fields        (filter (comp #{:type/Text} :base_type) fields)
+        text-fields        (filter text-field? fields)
         field->expressions (when (and truncation-size (driver/supports? driver :expressions))
                              (into {} (for [field text-fields]
                                         [field [(str (gensym "substring"))
diff --git a/test/metabase/db/metadata_queries_test.clj b/test/metabase/db/metadata_queries_test.clj
index 432d7fb29c2..68a7a38a640 100644
--- a/test/metabase/db/metadata_queries_test.clj
+++ b/test/metabase/db/metadata_queries_test.clj
@@ -72,5 +72,22 @@
               (is (seq (get-in query [:query :expressions]))))))
         (testing "doesnt' use substrings if driver doesn't support expressions"
           (with-redefs [driver/supports? (constantly false)]
+            (let [query (#'metadata-queries/table-rows-sample-query table fields {:truncation-size 4})]
+              (is (empty? (get-in query [:query :expressions])))))))
+      (testing "pre-existing json fields are still marked as `:type/Text`"
+        (let [table (table/map->TableInstance {:id 1234})
+              fields [(field/map->FieldInstance {:id 4321, :base_type :type/Text, :special_type :type/SerializedJSON})]]
+          (with-redefs [driver/supports? (constantly true)]
             (let [query (#'metadata-queries/table-rows-sample-query table fields {:truncation-size 4})]
               (is (empty? (get-in query [:query :expressions]))))))))))
+
+(deftest text-field?-test
+  (testing "recognizes fields suitable for fingerprinting"
+    (doseq [field [{:base_type :type/Text}
+                   {:base_type :type/Text :special_type :type/State}
+                   {:base_type :type/Text :special_type :type/URL}]]
+      (is (#'metadata-queries/text-field? field)))
+    (doseq [field [{:base_type :type/Structured} ; json fields in pg
+                   {:base_type :type/Text :special_type :type/SerializedJSON} ; "legacy" json fields in pg
+                   {:base_type :type/Text :special_type :type/XML}]]
+      (is (not (#'metadata-queries/text-field? field))))))
-- 
GitLab