Skip to content
Snippets Groups Projects
Commit f55735b5 authored by Cam Saül's avatar Cam Saül Committed by GitHub
Browse files

Merge pull request #5297 from metabase/remove-duplicate-fn

Remove duplicate fn
parents ea2fef57 74b7d4d4
No related branches found
No related tags found
No related merge requests found
...@@ -327,8 +327,8 @@ ...@@ -327,8 +327,8 @@
"Given a sequence of VALUES, return the most common base type." "Given a sequence of VALUES, return the most common base type."
[values] [values]
(->> values (->> values
(take 100) ; take up to 100 values
(filter (complement nil?)) ; filter out `nil` values (filter (complement nil?)) ; filter out `nil` values
(take 1000) ; take up to 1000 values
(group-by (comp class->base-type class)) ; now group by their base-type (group-by (comp class->base-type class)) ; now group by their base-type
(sort-by (comp (partial * -1) count second)) ; sort the map into pairs of [base-type count] with highest count as first pair (sort-by (comp (partial * -1) count second)) ; sort the map into pairs of [base-type count] with highest count as first pair
ffirst)) ; take the base-type from the first pair ffirst)) ; take the base-type from the first pair
......
(ns metabase.query-processor.middleware.annotate-and-sort (ns metabase.query-processor.middleware.annotate-and-sort
"Middleware for annotating (adding type information to) the results of a query and sorting the columns in the results." "Middleware for annotating (adding type information to) the results of a query and sorting the columns in the results."
;; TODO - `annotate` and `sort` are technically two seperate steps. We should decouple them and replace ;; TODO - `annotate` and `sort` are technically two seperate steps. We should decouple everything so we get a namespace
;; `metabase.query-processor.annotate`, ``metabase.query-processor.sort`, and `metabase.query-processor.middleware.annotate-and-sort` ;; structure looking more like:
;; with two namespaces called `metabase.query-processor.middleware.annotate` and `metabase.query-processor.middleware.sort` ;; `metabase.query-processor.middleware.sort`
;; `metabase.query-processor.middleware.annotate`
;; `metabase.query-processor.middleware.mbql`
;; `metabase.query-processor.middleware.sql`
(:require [metabase.driver :as driver] (:require [metabase.driver :as driver]
[metabase.models.humanization :as humanization]
[metabase.query-processor [metabase.query-processor
[annotate :as annotate] [annotate :as annotate]
[util :as qputil]])) [util :as qputil]]))
(def ^:private ^:const ^Integer max-rows-to-scan-for-column-type-inference ;;; +------------------------------------------------------------------------------------------------------------------------+
"Maximum number of rows to scan to look for a non-`nil` value to determine type information. ;;; | NATIVE QUERY ANNOTATION |
This number is meant to be a good balance between not giving up prematurely and not scanning the entire set of results returned ;;; +------------------------------------------------------------------------------------------------------------------------+
(which can be millions of rows in some cases)."
100)
(defn- vals->base-type
"Given a sequence of VALS, return the Field base type of the first non-`nil` value, scanning up to `max-rows-to-scan-for-column-type-inference` results."
[vs]
(or (some (fn [v]
(when-not (nil? v)
(driver/class->base-type (class v))))
(take max-rows-to-scan-for-column-type-inference vs))
:type/*))
(defn- infer-column-types (defn- infer-column-types
"Infer the types of columns by looking at the first value for each in the results, and add the relevant information in `:cols`. "Infer the types of columns by looking at the first value for each in the results, and add the relevant information in `:cols`.
...@@ -29,10 +22,17 @@ ...@@ -29,10 +22,17 @@
[{:keys [columns rows], :as results}] [{:keys [columns rows], :as results}]
(assoc results (assoc results
:columns (mapv name columns) :columns (mapv name columns)
:cols (vec (for [i (range (count columns))] :cols (vec (for [i (range (count columns))
{:name (name (nth columns i)) :let [col (nth columns i)]]
:base_type (vals->base-type (for [row rows] {:name (name col)
(nth row i)))})))) :display_name (humanization/name->human-readable-name (name col))
:base_type (driver/values->base-type (for [row rows]
(nth row i)))}))))
;;; +------------------------------------------------------------------------------------------------------------------------+
;;; | GENERAL MIDDLEWARE |
;;; +------------------------------------------------------------------------------------------------------------------------+
(defn annotate-and-sort (defn annotate-and-sort
"Middleware for adding type information to columns returned by running a query, and sorting the columns in the results." "Middleware for adding type information to columns returned by running a query, and sorting the columns in the results."
......
...@@ -40,12 +40,12 @@ ...@@ -40,12 +40,12 @@
:data {:columns ["timestamp" "id" "user_name" "venue_price" "venue_name" "count"] :data {:columns ["timestamp" "id" "user_name" "venue_price" "venue_name" "count"]
:rows [["2013-01-03T08:00:00.000Z" "931" "Simcha Yan" "1" "Kinaree Thai Bistro" 1] :rows [["2013-01-03T08:00:00.000Z" "931" "Simcha Yan" "1" "Kinaree Thai Bistro" 1]
["2013-01-10T08:00:00.000Z" "285" "Kfir Caj" "2" "Ruen Pair Thai Restaurant" 1]] ["2013-01-10T08:00:00.000Z" "285" "Kfir Caj" "2" "Ruen Pair Thai Restaurant" 1]]
:cols [{:name "timestamp", :base_type :type/Text} :cols [{:name "timestamp", :display_name "Timestamp", :base_type :type/Text}
{:name "id", :base_type :type/Text} {:name "id", :display_name "ID", :base_type :type/Text}
{:name "user_name", :base_type :type/Text} {:name "user_name", :display_name "User Name", :base_type :type/Text}
{:name "venue_price", :base_type :type/Text} {:name "venue_price", :display_name "Venue Price", :base_type :type/Text}
{:name "venue_name", :base_type :type/Text} {:name "venue_name", :display_name "Venue Name", :base_type :type/Text}
{:name "count", :base_type :type/Integer}] {:name "count", :display_name "Count", :base_type :type/Integer}]
:native_form {:query native-query-1}}} :native_form {:query native-query-1}}}
(process-native-query native-query-1)) (process-native-query native-query-1))
......
...@@ -8,28 +8,28 @@ ...@@ -8,28 +8,28 @@
;; Just check that a basic query works ;; Just check that a basic query works
(expect (expect
{:status :completed {:status :completed
:row_count 2 :row_count 2
:data {:rows [[100] :data {:rows [[100]
[99]] [99]]
:columns ["ID"] :columns ["ID"]
:cols [{:name "ID", :base_type :type/Integer}] :cols [{:name "ID", :display_name "ID", :base_type :type/Integer}]
:native_form {:query "SELECT ID FROM VENUES ORDER BY ID DESC LIMIT 2;"}}} :native_form {:query "SELECT ID FROM VENUES ORDER BY ID DESC LIMIT 2;"}}}
(qp/process-query {:native {:query "SELECT ID FROM VENUES ORDER BY ID DESC LIMIT 2;"} (qp/process-query {:native {:query "SELECT ID FROM VENUES ORDER BY ID DESC LIMIT 2;"}
:type :native :type :native
:database (id)})) :database (id)}))
;; Check that column ordering is maintained ;; Check that column ordering is maintained
(expect (expect
{:status :completed {:status :completed
:row_count 2 :row_count 2
:data {:rows [[100 "Mohawk Bend" 46] :data {:rows [[100 "Mohawk Bend" 46]
[99 "Golden Road Brewing" 10]] [99 "Golden Road Brewing" 10]]
:columns ["ID" "NAME" "CATEGORY_ID"] :columns ["ID" "NAME" "CATEGORY_ID"]
:cols [{:name "ID", :base_type :type/Integer} :cols [{:name "ID", :display_name "ID", :base_type :type/Integer}
{:name "NAME", :base_type :type/Text} {:name "NAME", :display_name "Name", :base_type :type/Text}
{:name "CATEGORY_ID", :base_type :type/Integer}] {:name "CATEGORY_ID", :display_name "Category ID", :base_type :type/Integer}]
:native_form {:query "SELECT ID, NAME, CATEGORY_ID FROM VENUES ORDER BY ID DESC LIMIT 2;"}}} :native_form {:query "SELECT ID, NAME, CATEGORY_ID FROM VENUES ORDER BY ID DESC LIMIT 2;"}}}
(qp/process-query {:native {:query "SELECT ID, NAME, CATEGORY_ID FROM VENUES ORDER BY ID DESC LIMIT 2;"} (qp/process-query {:native {:query "SELECT ID, NAME, CATEGORY_ID FROM VENUES ORDER BY ID DESC LIMIT 2;"}
:type :native :type :native
:database (id)})) :database (id)}))
......
...@@ -77,7 +77,7 @@ ...@@ -77,7 +77,7 @@
:row_count 1 :row_count 1
:data {:rows [[1]] :data {:rows [[1]]
:columns ["count"] :columns ["count"]
:cols [{:name "count", :base_type :type/Integer}] :cols [{:name "count", :display_name "Count", :base_type :type/Integer}]
:native_form {:collection "venues" :native_form {:collection "venues"
:query native-query}}} :query native-query}}}
(qp/process-query {:native {:query native-query (qp/process-query {:native {:query native-query
......
...@@ -16,3 +16,42 @@ ...@@ -16,3 +16,42 @@
(expect true (driver/driver-supports? (TestDriver.) :a)) (expect true (driver/driver-supports? (TestDriver.) :a))
(expect false (driver/driver-supports? (TestDriver.) :b)) (expect false (driver/driver-supports? (TestDriver.) :b))
;; values->base-type
(expect
:type/Text
(driver/values->base-type ["A" "B" "C"]))
;; should ignore nils
(expect
:type/Text
(driver/values->base-type [nil nil "C"]))
;; should pick base-type of most common class
(expect
:type/Text
(driver/values->base-type ["A" 100 "C"]))
;; should fall back to :type/* if no better type is found
(expect
:type/*
(driver/values->base-type [(Object.)]))
;; Should work with initial nils even if sequence is lazy
(expect
[:type/Integer true]
(let [realized-lazy-seq? (atom false)]
[(driver/values->base-type (lazy-cat [nil nil nil]
(do (reset! realized-lazy-seq? true)
[4 5 6])))
@realized-lazy-seq?]))
;; but it should respect laziness and not keep scanning after it finds 100 values
(expect
[:type/Integer false]
(let [realized-lazy-seq? (atom false)]
[(driver/values->base-type (lazy-cat [1 2 3]
(repeat 1000 nil)
(do (reset! realized-lazy-seq? true)
[4 5 6])))
@realized-lazy-seq?]))
...@@ -3,41 +3,12 @@ ...@@ -3,41 +3,12 @@
[metabase.test.util :as tu])) [metabase.test.util :as tu]))
(tu/resolve-private-vars metabase.query-processor.middleware.annotate-and-sort (tu/resolve-private-vars metabase.query-processor.middleware.annotate-and-sort
vals->base-type infer-column-types) infer-column-types)
;; tests for vals->base-type
(expect
:type/Integer
(vals->base-type [1 "A" "B"]))
;; should work with some initial nils
(expect
:type/Text
(vals->base-type [nil nil "A"]))
;; (even if sequence is lazy)
(expect
[:type/Integer true]
(let [realized-lazy-seq? (atom false)]
[(vals->base-type (lazy-cat [nil nil nil]
(do (reset! realized-lazy-seq? true)
[4 5 6])))
@realized-lazy-seq?]))
;; but it should respect laziness and not keep scanning after it finds the first non-`nil` value
(expect
[:type/Integer false]
(let [realized-lazy-seq? (atom false)]
[(vals->base-type (lazy-cat [1 2 3]
(do (reset! realized-lazy-seq? true)
[4 5 6])))
@realized-lazy-seq?]))
;; make sure that `infer-column-types` can still infer types even if the initial value(s) are `nil` (#4256) ;; make sure that `infer-column-types` can still infer types even if the initial value(s) are `nil` (#4256)
(expect (expect
[{:name "a", :base_type :type/Integer} [{:name "a", :display_name "A", :base_type :type/Integer}
{:name "b", :base_type :type/Integer}] {:name "b", :display_name "B", :base_type :type/Integer}]
(:cols (infer-column-types {:columns [:a :b], :rows [[1 nil] (:cols (infer-column-types {:columns [:a :b], :rows [[1 nil]
[2 nil] [2 nil]
[3 nil] [3 nil]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment