Skip to content
Snippets Groups Projects
Commit 1e2b13f1 authored by Cam Saul's avatar Cam Saul
Browse files

move annotation to its own file

parent 4a527334
No related branches found
No related tags found
No related merge requests found
......@@ -7,12 +7,13 @@
:min-lein-version "2.5.0"
:aliases {"test" ["with-profile" "+expectations" "expectations"]}
:dependencies [[org.clojure/clojure "1.7.0"]
[org.clojure/core.logic "0.8.10"]
[org.clojure/core.match "0.3.0-alpha4"] ; optimized pattern matching library for Clojure
[org.clojure/math.numeric-tower "0.0.4"] ; math functions like `ceil`
[org.clojure/core.memoize "0.5.7"] ; needed by core.match; has useful FIFO, LRU, etc. caching mechanisms
[org.clojure/data.csv "0.1.2"] ; CSV parsing / generation
[org.clojure/java.classpath "0.2.2"]
[org.clojure/java.jdbc "0.3.7"] ; basic jdbc access from clojure
[org.clojure/math.numeric-tower "0.0.4"] ; math functions like `ceil`
[org.clojure/tools.logging "0.3.1"] ; logging framework
[org.clojure/tools.macro "0.1.5"] ; tools for writing macros
[org.clojure/tools.namespace "0.2.10"]
......
......@@ -8,7 +8,8 @@
[swiss.arrows :refer [<<-]]
[metabase.db :refer :all]
[metabase.driver.interface :as i]
[metabase.driver.query-processor.expand :as expand]
(metabase.driver.query-processor [annotate :as annotate]
[expand :as expand])
(metabase.models [field :refer [Field], :as field]
[foreign-key :refer [ForeignKey]])
[metabase.util :as u]))
......@@ -195,219 +196,6 @@
(qp query)))
;; +----------------------------------------------------------------------------------------------------+
;; | ANNOTATION |
;; +----------------------------------------------------------------------------------------------------+
;; ## Ordering
;;
;; Fields should be returned in the following order:
;; 1. Breakout Fields
;;
;; 2. Aggregation Fields (e.g. sum, count)
;;
;; 3. Fields clause Fields, if they were added explicitly
;;
;; 4. All other Fields, sorted by:
;; A. :position (ascending)
;; Users can manually specify default Field ordering for a Table in the Metadata admin. In that case, return Fields in the specified
;; order; most of the time they'll have the default value of 0, in which case we'll compare...
;;
;; B. :special_type "group" -- :id Fields, then :name Fields, then everyting else
;; Attempt to put the most relevant Fields first. Order the Fields as follows:
;; 1. :id Fields
;; 2. :name Fields
;; 3. all other Fields
;;
;; C. Field Name
;; When two Fields have the same :position and :special_type "group", fall back to sorting Fields alphabetically by name.
;; This is arbitrary, but it makes the QP deterministic by keeping the results in a consistent order, which makes it testable.
(defn- order-cols
"Construct a sequence of column keywords that should be used for pulling ordered rows from RESULTS.
FIELDS should be a sequence of all `Fields` for the `Table` associated with QUERY."
[{{breakout-fields :breakout, {ag-type :aggregation-type} :aggregation, fields-fields :fields, fields-is-implicit :fields-is-implicit} :query} results fields]
(let [;; Get all the column name keywords returned by the results
result-kws (set (keys (first results)))
valid-kw? (partial contains? result-kws)
breakout-ids (map :field-id breakout-fields)
breakout-kws (->> (for [field breakout-fields]
(->> (rest (expand/qualified-name-components field)) ; TODO - this "qualified name for results" should be calculated in the Query expander
(interpose ".")
(apply str)
keyword))
(filter valid-kw?))
fields-ids (map :field-id fields-fields)
field-id->field (zipmap (map :id fields) fields)
;; Get IDs from Fields clause *if* it was added explicitly and other all other Field IDs for Table.
fields-ids (when-not fields-is-implicit fields-ids)
all-field-ids (->> fields ; Sort the Fields.
(sort-by (fn [{:keys [position special_type name]}] ; For each field generate a vector of
[position ; [position special-type-group name]
(cond ; and Clojure will take care of the rest.
(= special_type :id) 0
(= special_type :name) 1
:else 2)
name]))
(map :id)) ; Return the sorted IDs
;; Get the aggregate column if any
ag-kws (when (and ag-type
(not= ag-type :rows))
(let [ag (if (= ag-type :distinct) :count
ag-type)]
[ag]))
;; Make a helper function that will take a sequence of Field IDs and convert them to corresponding column name keywords.
;; Don't include names that aren't part of RESULT-KWS: we fetch *all* the Fields for a Table regardless of the Query, so
;; there are likely some unused ones.
ids->kws (fn [field-ids]
(some->> (map field-id->field field-ids)
(map :name)
(map keyword)
(filter valid-kw?)))
;; Concat the Fields clause IDs + the sequence of all Fields ID for the Table.
;; Then filter out ones that appear in breakout clause and remove duplicates
;; which effectively gives us parts #3 and #4 from above.
non-breakout-ids (->> (concat fields-ids all-field-ids)
(filter (complement (partial contains? (set breakout-ids))))
distinct)
;; Use fn above to get the keyword column names of other non-aggregation fields [#3 and #4]
non-breakout-kws (->> (ids->kws non-breakout-ids)
(filter (complement (partial contains? (set ag-kws)))))
;; Collect all other Fields
other-kws (->> result-kws
(filter (complement (partial contains? (set (concat breakout-kws non-breakout-kws ag-kws)))))
sort)] ; sort by name so results are deterministic
(when (seq other-kws)
(log/warn (u/format-color 'red "Warning: not 100%% sure how to order these columns: %s" (vec other-kws))))
;; Now combine the breakout [#1] + aggregate [#2] + "non-breakout" [#3 & #4] column name keywords into a single sequence
(when-not *disable-qp-logging*
(log/debug (u/format-color 'magenta "Using this ordering: breakout: %s, ag: %s, non-breakout: %s, other: %s"
(vec breakout-kws) (vec ag-kws) (vec non-breakout-kws) (vec other-kws))))
(let [ordered-kws (concat breakout-kws ag-kws non-breakout-kws other-kws)]
(assert (and (= (set ordered-kws) result-kws)
(= (count ordered-kws) (count result-kws)))
(format "Order-cols returned invalid results: expected %s, got %s\nbreakout: %s, ag: %s, non-breakout: %s, other: %s" result-kws (vec ordered-kws)
(vec breakout-kws) (vec ag-kws) (vec non-breakout-kws) (vec other-kws)))
ordered-kws)))
(defn- add-fields-extra-info
"Add `:extra_info` about `ForeignKeys` to `Fields` whose `special_type` is `:fk`."
[fields]
;; Get a sequence of add Field IDs that have a :special_type of FK
(let [fk-field-ids (->> fields
(filter #(= (:special_type %) :fk))
(map :id)
(filter identity))
;; Look up the Foreign keys info if applicable.
;; Build a map of FK Field IDs -> Destination Field IDs
field-id->dest-field-id (when (seq fk-field-ids)
(sel :many :field->field [ForeignKey :origin_id :destination_id], :origin_id [in fk-field-ids], :destination_id [not= nil]))
;; Build a map of Destination Field IDs -> Destination Fields
dest-field-id->field (when (and (seq fk-field-ids)
(seq (vals field-id->dest-field-id)))
(sel :many :id->fields [Field :id :name :table_id :description :base_type :special_type], :id [in (vals field-id->dest-field-id)]))]
;; Add the :extra_info + :target to every Field. For non-FK Fields, these are just {} and nil, respectively.
(for [{field-id :id, :as field} fields]
(let [dest-field (when (seq fk-field-ids)
(some->> field-id
field-id->dest-field-id
dest-field-id->field))]
(assoc field
:target dest-field
:extra_info (if-not dest-field {}
{:target_table_id (:table_id dest-field)}))))))
(defn- get-cols-info
"Get column info for the `:cols` part of the QP results."
[{{{ag-type :aggregation-type, ag-field :field} :aggregation} :query} fields ordered-col-kws join-table-ids]
(let [field-kw->field (zipmap (map #(keyword (:name %)) fields)
fields)
field-id->field (delay (zipmap (map :id fields) ; a delay since we probably won't need it
fields))]
(->> (for [col-kw ordered-col-kws]
(or
;; If col-kw is a known Field return that
(field-kw->field col-kw)
;; Otherwise if this Query included any joins then attempt to lookup a matching Field from one of the join tables
(and (seq join-table-ids)
(sel :one :fields [Field :id :table_id :name :description :base_type :special_type], :name (name col-kw), :table_id [in join-table-ids]))
;; Otherwise if this is a nested Field recursively find the appropriate info
(let [name-components (s/split (name col-kw) #"\.")]
(when (> (count name-components) 1)
;; Find the nested Field by recursing through each Field's :children
(loop [field-kw->field field-kw->field, [component & more] (map keyword name-components)]
(when-let [f (field-kw->field component)]
(if-not (seq more)
;; If the are no more components to recurse through give the resulting Field a qualified name like "source.service" and return it
(assoc f :name (apply str (interpose "." name-components)))
;; Otherwise recurse with a map of child-name-kw -> child and the rest of the name components
(recur (zipmap (map (comp keyword :name) (:children f))
(:children f))
more))))))
;; Otherwise it is an aggregation column like :sum, build a map of information to return
(merge (assert ag-type)
{:name (name col-kw)
:id nil
:table_id nil
:description nil}
(cond
;; avg, stddev, and sum should inherit the base_type and special_type from the Field they're aggregating
(contains? #{:avg :stddev :sum} col-kw) {:base_type (:base-type ag-field)
:special_type (:special-type ag-field)}
;; count should always be IntegerField/number
(= col-kw :count) {:base_type :IntegerField
:special_type :number}
;; Otherwise something went wrong !
:else (do (log/error (u/format-color 'red "Annotation failed: don't know what to do with Field '%s'.\nExpected these Fields:\n%s"
col-kw
(u/pprint-to-str field-kw->field)))
{:base_type :UnknownField
:special_type nil})))))
;; Add FK info the the resulting Fields
add-fields-extra-info
;; Remove extra data from the resulting Fields
(map (u/rpartial dissoc :children :parent_id)))))
(defn- post-annotate
"Take a sequence of RESULTS of executing QUERY and return the \"annotated\" results we pass to postprocessing -- the map with `:cols`, `:columns`, and `:rows`.
RESULTS should be a sequence of *maps*, keyed by result column -> value."
[qp]
(fn [{{:keys [join-tables] {source-table-id :id} :source-table} :query, :as query}]
(let [{:keys [results uncastify-fn]} (qp query)
results (if-not uncastify-fn results
(for [row results]
(m/map-keys uncastify-fn row)))
_ (when-not *disable-qp-logging*
(log/debug (u/format-color 'magenta "Driver QP returned results with keys: %s." (vec (keys (first results))))))
join-table-ids (set (map :table-id join-tables))
fields (field/unflatten-nested-fields (sel :many :fields [Field :id :table_id :name :description :base_type :special_type :parent_id], :table_id source-table-id, :active true))
ordered-col-kws (order-cols query results fields)]
{:rows (for [row results]
(mapv row ordered-col-kws)) ; might as well return each row and col info as vecs because we're not worried about making
:columns (mapv name ordered-col-kws) ; making them lazy, and results are easier to play with in the REPL / paste into unit tests
:cols (vec (get-cols-info query fields ordered-col-kws join-table-ids))}))) ; as vecs. Make sure :rows stays lazy!
;; +------------------------------------------------------------------------------------------------------------------------+
;; | QUERY PROCESSOR |
;; +------------------------------------------------------------------------------------------------------------------------+
......@@ -460,7 +248,7 @@
post-convert-unix-timestamps-to-dates
cumulative-sum
limit
post-annotate
annotate/post-annotate
pre-log-query
wrap-guard-multiple-calls
driver-process-query) query)))
......
(ns metabase.driver.query-processor.annotate
(:require [clojure.string :as s]
[clojure.tools.logging :as log]
[medley.core :as m]
[metabase.db :refer [sel]]
[metabase.driver.query-processor.expand :as expand]
(metabase.models [field :refer [Field], :as field]
[foreign-key :refer [ForeignKey]])
[metabase.util :as u]))
;; ## Ordering
;;
;; Fields should be returned in the following order:
;; 1. Breakout Fields
;;
;; 2. Aggregation Fields (e.g. sum, count)
;;
;; 3. Fields clause Fields, if they were added explicitly
;;
;; 4. All other Fields, sorted by:
;; A. :position (ascending)
;; Users can manually specify default Field ordering for a Table in the Metadata admin. In that case, return Fields in the specified
;; order; most of the time they'll have the default value of 0, in which case we'll compare...
;;
;; B. :special_type "group" -- :id Fields, then :name Fields, then everyting else
;; Attempt to put the most relevant Fields first. Order the Fields as follows:
;; 1. :id Fields
;; 2. :name Fields
;; 3. all other Fields
;;
;; C. Field Name
;; When two Fields have the same :position and :special_type "group", fall back to sorting Fields alphabetically by name.
;; This is arbitrary, but it makes the QP deterministic by keeping the results in a consistent order, which makes it testable.
(defn- order-cols
"Construct a sequence of column keywords that should be used for pulling ordered rows from RESULTS.
FIELDS should be a sequence of all `Fields` for the `Table` associated with QUERY."
[{{breakout-fields :breakout, {ag-type :aggregation-type} :aggregation, fields-fields :fields, fields-is-implicit :fields-is-implicit} :query} results fields]
(let [;; Get all the column name keywords returned by the results
result-kws (set (keys (first results)))
valid-kw? (partial contains? result-kws)
breakout-ids (map :field-id breakout-fields)
breakout-kws (->> (for [field breakout-fields]
(->> (rest (expand/qualified-name-components field)) ; TODO - this "qualified name for results" should be calculated in the Query expander
(interpose ".")
(apply str)
keyword))
(filter valid-kw?))
fields-ids (map :field-id fields-fields)
field-id->field (zipmap (map :id fields) fields)
;; Get IDs from Fields clause *if* it was added explicitly and other all other Field IDs for Table.
fields-ids (when-not fields-is-implicit fields-ids)
all-field-ids (->> fields ; Sort the Fields.
(sort-by (fn [{:keys [position special_type name]}] ; For each field generate a vector of
[position ; [position special-type-group name]
(cond ; and Clojure will take care of the rest.
(= special_type :id) 0
(= special_type :name) 1
:else 2)
name]))
(map :id)) ; Return the sorted IDs
;; Get the aggregate column if any
ag-kws (when (and ag-type
(not= ag-type :rows))
(let [ag (if (= ag-type :distinct) :count
ag-type)]
[ag]))
;; Make a helper function that will take a sequence of Field IDs and convert them to corresponding column name keywords.
;; Don't include names that aren't part of RESULT-KWS: we fetch *all* the Fields for a Table regardless of the Query, so
;; there are likely some unused ones.
ids->kws (fn [field-ids]
(some->> (map field-id->field field-ids)
(map :name)
(map keyword)
(filter valid-kw?)))
;; Concat the Fields clause IDs + the sequence of all Fields ID for the Table.
;; Then filter out ones that appear in breakout clause and remove duplicates
;; which effectively gives us parts #3 and #4 from above.
non-breakout-ids (->> (concat fields-ids all-field-ids)
(filter (complement (partial contains? (set breakout-ids))))
distinct)
;; Use fn above to get the keyword column names of other non-aggregation fields [#3 and #4]
non-breakout-kws (->> (ids->kws non-breakout-ids)
(filter (complement (partial contains? (set ag-kws)))))
;; Collect all other Fields
other-kws (->> result-kws
(filter (complement (partial contains? (set (concat breakout-kws non-breakout-kws ag-kws)))))
sort)] ; sort by name so results are deterministic
(when (seq other-kws)
(log/warn (u/format-color 'red "Warning: not 100%% sure how to order these columns: %s" (vec other-kws))))
;; Now combine the breakout [#1] + aggregate [#2] + "non-breakout" [#3 & #4] column name keywords into a single sequence
(when-not @(ns-resolve 'metabase.driver.query-processor '*disable-qp-logging*)
(log/debug (u/format-color 'magenta "Using this ordering: breakout: %s, ag: %s, non-breakout: %s, other: %s"
(vec breakout-kws) (vec ag-kws) (vec non-breakout-kws) (vec other-kws))))
(let [ordered-kws (concat breakout-kws ag-kws non-breakout-kws other-kws)]
(assert (and (= (set ordered-kws) result-kws)
(= (count ordered-kws) (count result-kws)))
(format "Order-cols returned invalid results: expected %s, got %s\nbreakout: %s, ag: %s, non-breakout: %s, other: %s" result-kws (vec ordered-kws)
(vec breakout-kws) (vec ag-kws) (vec non-breakout-kws) (vec other-kws)))
ordered-kws)))
(defn- add-fields-extra-info
"Add `:extra_info` about `ForeignKeys` to `Fields` whose `special_type` is `:fk`."
[fields]
;; Get a sequence of add Field IDs that have a :special_type of FK
(let [fk-field-ids (->> fields
(filter #(= (:special_type %) :fk))
(map :id)
(filter identity))
;; Look up the Foreign keys info if applicable.
;; Build a map of FK Field IDs -> Destination Field IDs
field-id->dest-field-id (when (seq fk-field-ids)
(sel :many :field->field [ForeignKey :origin_id :destination_id], :origin_id [in fk-field-ids], :destination_id [not= nil]))
;; Build a map of Destination Field IDs -> Destination Fields
dest-field-id->field (when (and (seq fk-field-ids)
(seq (vals field-id->dest-field-id)))
(sel :many :id->fields [Field :id :name :table_id :description :base_type :special_type], :id [in (vals field-id->dest-field-id)]))]
;; Add the :extra_info + :target to every Field. For non-FK Fields, these are just {} and nil, respectively.
(for [{field-id :id, :as field} fields]
(let [dest-field (when (seq fk-field-ids)
(some->> field-id
field-id->dest-field-id
dest-field-id->field))]
(assoc field
:target dest-field
:extra_info (if-not dest-field {}
{:target_table_id (:table_id dest-field)}))))))
(defn- get-cols-info
"Get column info for the `:cols` part of the QP results."
[{{{ag-type :aggregation-type, ag-field :field} :aggregation} :query} fields ordered-col-kws join-table-ids]
(let [field-kw->field (zipmap (map #(keyword (:name %)) fields)
fields)
field-id->field (delay (zipmap (map :id fields) ; a delay since we probably won't need it
fields))]
(->> (for [col-kw ordered-col-kws]
(or
;; If col-kw is a known Field return that
(field-kw->field col-kw)
;; Otherwise if this Query included any joins then attempt to lookup a matching Field from one of the join tables
(and (seq join-table-ids)
(sel :one :fields [Field :id :table_id :name :description :base_type :special_type], :name (name col-kw), :table_id [in join-table-ids]))
;; Otherwise if this is a nested Field recursively find the appropriate info
(let [name-components (s/split (name col-kw) #"\.")]
(when (> (count name-components) 1)
;; Find the nested Field by recursing through each Field's :children
(loop [field-kw->field field-kw->field, [component & more] (map keyword name-components)]
(when-let [f (field-kw->field component)]
(if-not (seq more)
;; If the are no more components to recurse through give the resulting Field a qualified name like "source.service" and return it
(assoc f :name (apply str (interpose "." name-components)))
;; Otherwise recurse with a map of child-name-kw -> child and the rest of the name components
(recur (zipmap (map (comp keyword :name) (:children f))
(:children f))
more))))))
;; Otherwise it is an aggregation column like :sum, build a map of information to return
(merge (assert ag-type)
{:name (name col-kw)
:id nil
:table_id nil
:description nil}
(cond
;; avg, stddev, and sum should inherit the base_type and special_type from the Field they're aggregating
(contains? #{:avg :stddev :sum} col-kw) {:base_type (:base-type ag-field)
:special_type (:special-type ag-field)}
;; count should always be IntegerField/number
(= col-kw :count) {:base_type :IntegerField
:special_type :number}
;; Otherwise something went wrong !
:else (do (log/error (u/format-color 'red "Annotation failed: don't know what to do with Field '%s'.\nExpected these Fields:\n%s"
col-kw
(u/pprint-to-str field-kw->field)))
{:base_type :UnknownField
:special_type nil})))))
;; Add FK info the the resulting Fields
add-fields-extra-info
;; Remove extra data from the resulting Fields
(map (u/rpartial dissoc :children :parent_id)))))
(defn post-annotate
"Take a sequence of RESULTS of executing QUERY and return the \"annotated\" results we pass to postprocessing -- the map with `:cols`, `:columns`, and `:rows`.
RESULTS should be a sequence of *maps*, keyed by result column -> value."
[qp]
(fn [{{:keys [join-tables] {source-table-id :id} :source-table} :query, :as query}]
(let [{:keys [results uncastify-fn]} (qp query)
results (if-not uncastify-fn results
(for [row results]
(m/map-keys uncastify-fn row)))
_ (when-not @(ns-resolve 'metabase.driver.query-processor '*disable-qp-logging*)
(log/debug (u/format-color 'magenta "Driver QP returned results with keys: %s." (vec (keys (first results))))))
join-table-ids (set (map :table-id join-tables))
fields (field/unflatten-nested-fields (sel :many :fields [Field :id :table_id :name :description :base_type :special_type :parent_id], :table_id source-table-id, :active true))
ordered-col-kws (order-cols query results fields)]
{:rows (for [row results]
(mapv row ordered-col-kws)) ; might as well return each row and col info as vecs because we're not worried about making
:columns (mapv name ordered-col-kws) ; making them lazy, and results are easier to play with in the REPL / paste into unit tests
:cols (vec (get-cols-info query fields ordered-col-kws join-table-ids))}))) ; as vecs. Make sure :rows stays lazy!
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment