diff --git a/src/metabase/driver.clj b/src/metabase/driver.clj index 3eb090d1542e8e926eab35e62504005828fa29cf..cf7dd70f3b0bf975f1d5eeb5d0cd30b52799f130 100644 --- a/src/metabase/driver.clj +++ b/src/metabase/driver.clj @@ -327,8 +327,8 @@ "Given a sequence of VALUES, return the most common base type." [values] (->> values + (take 100) ; take up to 100 values (filter (complement nil?)) ; filter out `nil` values - (take 1000) ; take up to 1000 values (group-by (comp class->base-type class)) ; now group by their base-type (sort-by (comp (partial * -1) count second)) ; sort the map into pairs of [base-type count] with highest count as first pair ffirst)) ; take the base-type from the first pair diff --git a/src/metabase/query_processor/middleware/annotate_and_sort.clj b/src/metabase/query_processor/middleware/annotate_and_sort.clj index 3a83927e036cc41798386e92713ee75260d5d5d1..3b8b13e43c349d69d740e26ac6946cb09da7b22a 100644 --- a/src/metabase/query_processor/middleware/annotate_and_sort.clj +++ b/src/metabase/query_processor/middleware/annotate_and_sort.clj @@ -1,27 +1,20 @@ (ns metabase.query-processor.middleware.annotate-and-sort "Middleware for annotating (adding type information to) the results of a query and sorting the columns in the results." - ;; TODO - `annotate` and `sort` are technically two seperate steps. We should decouple them and replace - ;; `metabase.query-processor.annotate`, ``metabase.query-processor.sort`, and `metabase.query-processor.middleware.annotate-and-sort` - ;; with two namespaces called `metabase.query-processor.middleware.annotate` and `metabase.query-processor.middleware.sort` + ;; TODO - `annotate` and `sort` are technically two seperate steps. We should decouple everything so we get a namespace + ;; structure looking more like: + ;; `metabase.query-processor.middleware.sort` + ;; `metabase.query-processor.middleware.annotate` + ;; `metabase.query-processor.middleware.mbql` + ;; `metabase.query-processor.middleware.sql` (:require [metabase.driver :as driver] + [metabase.models.humanization :as humanization] [metabase.query-processor [annotate :as annotate] [util :as qputil]])) -(def ^:private ^:const ^Integer max-rows-to-scan-for-column-type-inference - "Maximum number of rows to scan to look for a non-`nil` value to determine type information. - This number is meant to be a good balance between not giving up prematurely and not scanning the entire set of results returned - (which can be millions of rows in some cases)." - 100) - -(defn- vals->base-type - "Given a sequence of VALS, return the Field base type of the first non-`nil` value, scanning up to `max-rows-to-scan-for-column-type-inference` results." - [vs] - (or (some (fn [v] - (when-not (nil? v) - (driver/class->base-type (class v)))) - (take max-rows-to-scan-for-column-type-inference vs)) - :type/*)) +;;; +------------------------------------------------------------------------------------------------------------------------+ +;;; | NATIVE QUERY ANNOTATION | +;;; +------------------------------------------------------------------------------------------------------------------------+ (defn- infer-column-types "Infer the types of columns by looking at the first value for each in the results, and add the relevant information in `:cols`. @@ -29,10 +22,17 @@ [{:keys [columns rows], :as results}] (assoc results :columns (mapv name columns) - :cols (vec (for [i (range (count columns))] - {:name (name (nth columns i)) - :base_type (vals->base-type (for [row rows] - (nth row i)))})))) + :cols (vec (for [i (range (count columns)) + :let [col (nth columns i)]] + {:name (name col) + :display_name (humanization/name->human-readable-name (name col)) + :base_type (driver/values->base-type (for [row rows] + (nth row i)))})))) + + +;;; +------------------------------------------------------------------------------------------------------------------------+ +;;; | GENERAL MIDDLEWARE | +;;; +------------------------------------------------------------------------------------------------------------------------+ (defn annotate-and-sort "Middleware for adding type information to columns returned by running a query, and sorting the columns in the results." diff --git a/test/metabase/driver/druid_test.clj b/test/metabase/driver/druid_test.clj index 4d1dd9db8d4377f75ac29238a0e1044b9efd78ed..224150164f2a4c02a627f67553c382dc6ff73844 100644 --- a/test/metabase/driver/druid_test.clj +++ b/test/metabase/driver/druid_test.clj @@ -40,12 +40,12 @@ :data {:columns ["timestamp" "id" "user_name" "venue_price" "venue_name" "count"] :rows [["2013-01-03T08:00:00.000Z" "931" "Simcha Yan" "1" "Kinaree Thai Bistro" 1] ["2013-01-10T08:00:00.000Z" "285" "Kfir Caj" "2" "Ruen Pair Thai Restaurant" 1]] - :cols [{:name "timestamp", :base_type :type/Text} - {:name "id", :base_type :type/Text} - {:name "user_name", :base_type :type/Text} - {:name "venue_price", :base_type :type/Text} - {:name "venue_name", :base_type :type/Text} - {:name "count", :base_type :type/Integer}] + :cols [{:name "timestamp", :display_name "Timestamp", :base_type :type/Text} + {:name "id", :display_name "ID", :base_type :type/Text} + {:name "user_name", :display_name "User Name", :base_type :type/Text} + {:name "venue_price", :display_name "Venue Price", :base_type :type/Text} + {:name "venue_name", :display_name "Venue Name", :base_type :type/Text} + {:name "count", :display_name "Count", :base_type :type/Integer}] :native_form {:query native-query-1}}} (process-native-query native-query-1)) diff --git a/test/metabase/driver/generic_sql/native_test.clj b/test/metabase/driver/generic_sql/native_test.clj index 1a3621a513f3802974aea70d76d723f0c4dcb340..1f9445e9cfba364c0d47c3c554ddf74cfb26b833 100644 --- a/test/metabase/driver/generic_sql/native_test.clj +++ b/test/metabase/driver/generic_sql/native_test.clj @@ -8,28 +8,28 @@ ;; Just check that a basic query works (expect - {:status :completed + {:status :completed :row_count 2 - :data {:rows [[100] - [99]] - :columns ["ID"] - :cols [{:name "ID", :base_type :type/Integer}] - :native_form {:query "SELECT ID FROM VENUES ORDER BY ID DESC LIMIT 2;"}}} + :data {:rows [[100] + [99]] + :columns ["ID"] + :cols [{:name "ID", :display_name "ID", :base_type :type/Integer}] + :native_form {:query "SELECT ID FROM VENUES ORDER BY ID DESC LIMIT 2;"}}} (qp/process-query {:native {:query "SELECT ID FROM VENUES ORDER BY ID DESC LIMIT 2;"} :type :native :database (id)})) ;; Check that column ordering is maintained (expect - {:status :completed + {:status :completed :row_count 2 - :data {:rows [[100 "Mohawk Bend" 46] - [99 "Golden Road Brewing" 10]] - :columns ["ID" "NAME" "CATEGORY_ID"] - :cols [{:name "ID", :base_type :type/Integer} - {:name "NAME", :base_type :type/Text} - {:name "CATEGORY_ID", :base_type :type/Integer}] - :native_form {:query "SELECT ID, NAME, CATEGORY_ID FROM VENUES ORDER BY ID DESC LIMIT 2;"}}} + :data {:rows [[100 "Mohawk Bend" 46] + [99 "Golden Road Brewing" 10]] + :columns ["ID" "NAME" "CATEGORY_ID"] + :cols [{:name "ID", :display_name "ID", :base_type :type/Integer} + {:name "NAME", :display_name "Name", :base_type :type/Text} + {:name "CATEGORY_ID", :display_name "Category ID", :base_type :type/Integer}] + :native_form {:query "SELECT ID, NAME, CATEGORY_ID FROM VENUES ORDER BY ID DESC LIMIT 2;"}}} (qp/process-query {:native {:query "SELECT ID, NAME, CATEGORY_ID FROM VENUES ORDER BY ID DESC LIMIT 2;"} :type :native :database (id)})) diff --git a/test/metabase/driver/mongo_test.clj b/test/metabase/driver/mongo_test.clj index 5495ead63d547d9679b3a841ef2510f5b2e9c874..102e57808d429e4c6a6adda72cefb6b923af9610 100644 --- a/test/metabase/driver/mongo_test.clj +++ b/test/metabase/driver/mongo_test.clj @@ -77,7 +77,7 @@ :row_count 1 :data {:rows [[1]] :columns ["count"] - :cols [{:name "count", :base_type :type/Integer}] + :cols [{:name "count", :display_name "Count", :base_type :type/Integer}] :native_form {:collection "venues" :query native-query}}} (qp/process-query {:native {:query native-query diff --git a/test/metabase/driver_test.clj b/test/metabase/driver_test.clj index a2651796dbf7225bbe69c1d912acf3f4b14fb517..10c9774d482b8cadcdd2eaa8fe79fe041239d2e7 100644 --- a/test/metabase/driver_test.clj +++ b/test/metabase/driver_test.clj @@ -16,3 +16,42 @@ (expect true (driver/driver-supports? (TestDriver.) :a)) (expect false (driver/driver-supports? (TestDriver.) :b)) + +;; values->base-type +(expect + :type/Text + (driver/values->base-type ["A" "B" "C"])) + +;; should ignore nils +(expect + :type/Text + (driver/values->base-type [nil nil "C"])) + +;; should pick base-type of most common class +(expect + :type/Text + (driver/values->base-type ["A" 100 "C"])) + +;; should fall back to :type/* if no better type is found +(expect + :type/* + (driver/values->base-type [(Object.)])) + +;; Should work with initial nils even if sequence is lazy +(expect + [:type/Integer true] + (let [realized-lazy-seq? (atom false)] + [(driver/values->base-type (lazy-cat [nil nil nil] + (do (reset! realized-lazy-seq? true) + [4 5 6]))) + @realized-lazy-seq?])) + +;; but it should respect laziness and not keep scanning after it finds 100 values +(expect + [:type/Integer false] + (let [realized-lazy-seq? (atom false)] + [(driver/values->base-type (lazy-cat [1 2 3] + (repeat 1000 nil) + (do (reset! realized-lazy-seq? true) + [4 5 6]))) + @realized-lazy-seq?])) diff --git a/test/metabase/query_processor/middleware/annotate_and_sort_test.clj b/test/metabase/query_processor/middleware/annotate_and_sort_test.clj index a770dd0a0d106b2fbb83b0fbe6c3275c10073534..fabdac0715d2da0e09448d95bb13918431673173 100644 --- a/test/metabase/query_processor/middleware/annotate_and_sort_test.clj +++ b/test/metabase/query_processor/middleware/annotate_and_sort_test.clj @@ -3,41 +3,12 @@ [metabase.test.util :as tu])) (tu/resolve-private-vars metabase.query-processor.middleware.annotate-and-sort - vals->base-type infer-column-types) - -;; tests for vals->base-type -(expect - :type/Integer - (vals->base-type [1 "A" "B"])) - -;; should work with some initial nils -(expect - :type/Text - (vals->base-type [nil nil "A"])) - -;; (even if sequence is lazy) -(expect - [:type/Integer true] - (let [realized-lazy-seq? (atom false)] - [(vals->base-type (lazy-cat [nil nil nil] - (do (reset! realized-lazy-seq? true) - [4 5 6]))) - @realized-lazy-seq?])) - -;; but it should respect laziness and not keep scanning after it finds the first non-`nil` value -(expect - [:type/Integer false] - (let [realized-lazy-seq? (atom false)] - [(vals->base-type (lazy-cat [1 2 3] - (do (reset! realized-lazy-seq? true) - [4 5 6]))) - @realized-lazy-seq?])) - + infer-column-types) ;; make sure that `infer-column-types` can still infer types even if the initial value(s) are `nil` (#4256) (expect - [{:name "a", :base_type :type/Integer} - {:name "b", :base_type :type/Integer}] + [{:name "a", :display_name "A", :base_type :type/Integer} + {:name "b", :display_name "B", :base_type :type/Integer}] (:cols (infer-column-types {:columns [:a :b], :rows [[1 nil] [2 nil] [3 nil]