diff --git a/src/metabase/db/metadata_queries.clj b/src/metabase/db/metadata_queries.clj index d1aff6654936b99cad61781e00718fb026706180..33f1161b78e0c8ba57385f561e74f70019949333 100644 --- a/src/metabase/db/metadata_queries.clj +++ b/src/metabase/db/metadata_queries.clj @@ -96,11 +96,20 @@ (s/maybe {(s/optional-key :truncation-size) s/Int (s/optional-key :rff) s/Any})) +(defn- text-field? + "Identify text fields which can accept our substring optimization. + + JSON and XML fields are now marked as `:type/Structured` but in the past were marked as `:type/Text` so its not + enough to just check the base type." + [{:keys [base_type special_type]}] + (and (= base_type :type/Text) + (not (isa? special_type :type/Structured)))) + (defn- table-rows-sample-query "Returns the mbql query to query a table for sample rows" [table fields {:keys [truncation-size] :as _opts}] (let [driver (-> table table/database driver.u/database->driver) - text-fields (filter (comp #{:type/Text} :base_type) fields) + text-fields (filter text-field? fields) field->expressions (when (and truncation-size (driver/supports? driver :expressions)) (into {} (for [field text-fields] [field [(str (gensym "substring")) diff --git a/test/metabase/db/metadata_queries_test.clj b/test/metabase/db/metadata_queries_test.clj index 432d7fb29c2893fd5ea06ac2e61f237f49523170..68a7a38a640e60f64c52f07f04ee38143d0e0900 100644 --- a/test/metabase/db/metadata_queries_test.clj +++ b/test/metabase/db/metadata_queries_test.clj @@ -72,5 +72,22 @@ (is (seq (get-in query [:query :expressions])))))) (testing "doesnt' use substrings if driver doesn't support expressions" (with-redefs [driver/supports? (constantly false)] + (let [query (#'metadata-queries/table-rows-sample-query table fields {:truncation-size 4})] + (is (empty? (get-in query [:query :expressions]))))))) + (testing "pre-existing json fields are still marked as `:type/Text`" + (let [table (table/map->TableInstance {:id 1234}) + fields [(field/map->FieldInstance {:id 4321, :base_type :type/Text, :special_type :type/SerializedJSON})]] + (with-redefs [driver/supports? (constantly true)] (let [query (#'metadata-queries/table-rows-sample-query table fields {:truncation-size 4})] (is (empty? (get-in query [:query :expressions])))))))))) + +(deftest text-field?-test + (testing "recognizes fields suitable for fingerprinting" + (doseq [field [{:base_type :type/Text} + {:base_type :type/Text :special_type :type/State} + {:base_type :type/Text :special_type :type/URL}]] + (is (#'metadata-queries/text-field? field))) + (doseq [field [{:base_type :type/Structured} ; json fields in pg + {:base_type :type/Text :special_type :type/SerializedJSON} ; "legacy" json fields in pg + {:base_type :type/Text :special_type :type/XML}]] + (is (not (#'metadata-queries/text-field? field))))))