Skip to content
Snippets Groups Projects
Commit 499d2de0 authored by Cam Saül's avatar Cam Saül
Browse files

re-worked annotation QP post-processing step.

parent 165a83cc
Branches
Tags
No related merge requests found
......@@ -8,7 +8,6 @@
[metabase.driver.query-processor :as qp]
(metabase.driver.generic-sql [native :as native]
[util :refer :all])
[metabase.driver.generic-sql.query-processor.annotate :as annotate]
(metabase.models [database :refer [Database]]
[field :refer [Field]]
[table :refer [Table]])
......@@ -34,6 +33,15 @@
`(let [entity# (table-id->korma-entity ~source_table)]
(select entity# ~@forms)))))
(defn- uncastify
"Remove CAST statements from a column name if needed.
(uncastify \"DATE\") -> \"DATE\"
(uncastify \"CAST(DATE AS DATE)\") -> \"DATE\""
[column-name]
(let [column-name (name column-name)]
(keyword (or (second (re-find #"CAST\(([^\s]+) AS [\w]+\)" column-name))
column-name))))
(defn process-structured
"Convert QUERY into a korma `select` form, execute it, and annotate the results."
......@@ -42,9 +50,9 @@
(map? (:query query))
(= (name (:type query)) "query")]}
(try
(->> (process query)
eval
(annotate/annotate query))
(as-> (process query) results
(eval results)
(qp/annotate query results uncastify))
(catch java.sql.SQLException e
(let [^String message (or (->> (.getMessage e) ; error message comes back like "Error message ... [status-code]" sometimes
(re-find #"(?s)(^.*)\s+\[[\d-]+\]$") ; status code isn't useful and makes unit tests hard to write so strip it off
......@@ -141,11 +149,13 @@
{lon-kw ['> lon-min]}]))
[_ field-id & _] {(field-id->kw field-id)
;; If the field in question is a date field we need to cast the YYYY-MM-DD string that comes back from the UI to a SQL date
(let [cast-value-if-needed (cond
(date-field-id? field-id) u/parse-date-yyyy-mm-dd
(= (field-id->special-type field-id)
:timestamp_seconds) u/date-yyyy-mm-dd->unix-timestamp
:else identity)]
(let [cast-value-if-needed (fn [v]
(cond (= (type v) java.sql.Date) `(raw ~(format "CAST('%s' AS DATE)" (.toString ^java.sql.Date v)))
(not (string? v)) v
(date-field-id? field-id) (u/parse-date-yyyy-mm-dd v)
(= (field-id->special-type field-id)
:timestamp_seconds) (u/date-yyyy-mm-dd->unix-timestamp v)
:else v))]
(match subclause
["NOT_NULL" _] ['not= nil]
["IS_NULL" _] ['= nil]
......
(ns metabase.driver.generic-sql.query-processor.annotate
"Functions related to annotating results returned by the Query Processor."
(:require [metabase.db :refer :all]
[metabase.driver.query-processor :as qp]
[metabase.driver.generic-sql.util :as gsu]
[metabase.models.field :refer [Field field->fk-table]]))
(declare get-column-names
get-column-info
uncastify)
(defn annotate
"Take raw RESULTS from running QUERY and convert them to the format expected by the front-end.
Add the following columns (under `:data`):
* `:rows` a sequence of result rows
* `:columns` ordered sequence of column names
* `:cols` ordered sequence of information about each column, such as `:base_type` and `:special_type`"
[query results]
(let [column-names (get-column-names query results)
column-name-kws (map keyword column-names)]
{:rows (->> results
(map (fn [row]
(map row column-name-kws))))
:columns (map uncastify column-names)
:cols (get-column-info query column-names)}))
(defn- order-columns
[query castified-field-names]
(binding [qp/*uncastify-fn* uncastify]
(qp/order-columns query castified-field-names)))
(defn- get-column-names
"Get an ordered seqences of column names for the results.
If a `fields` clause was specified in the Query Dict, we want to return results in the same order."
[query results]
(let [field-ids (-> query :query :fields)
fields-clause-fields (when-not (or (empty? field-ids)
(= field-ids [nil]))
(let [field-id->name (->> (sel :many [Field :id :name :base_type]
:id [in field-ids]) ; Fetch names of fields from `fields` clause
(map (fn [{:keys [id name base_type]}] ; build map of field-id -> field-name
{id (gsu/field-name+base-type->castified-key name base_type)}))
(into {}))]
(map field-id->name field-ids))) ; now get names in same order as the IDs
other-fields (->> (first results)
keys ; Get the names of any other fields that were returned (i.e., `sum`)
(filter #(not (contains? (set fields-clause-fields) %)))
(order-columns query))]
(->> (concat fields-clause-fields other-fields) ; Return a combined vector. Convert them to strs, otherwise korma
(filter identity) ; remove any nils -- don't want a NullPointerException
(map name)))) ; will qualify them like `"METABASE_FIELD"."FOLLOWERS_COUNT"
(defn- uncastify
"Remove CAST statements from a column name if needed.
(uncastify \"DATE\") -> \"DATE\"
(uncastify \"CAST(DATE AS DATE)\") -> \"DATE\""
[column-name]
(or (second (re-find #"CAST\(([^\s]+) AS [\w]+\)" column-name))
column-name))
(defn get-column-info
"Wrapper for `metabase.driver.query-processor/get-column-info` that calls `uncastify` on column names."
[query column-names]
(qp/get-column-info query (map uncastify column-names)))
......@@ -257,15 +257,8 @@
;; TODO - This is similar to the implementation in generic-sql; can we combine them and move it into metabase.driver.query-processor?
(defn annotate-results
"Add column information, `row_count`, etc. to the results of a Mongo QP query."
[{:keys [source_table] :as query} results]
{:pre [(integer? source_table)]}
(let [field-name->field (sel :many :field->obj [Field :name] :table_id source_table)
column-keys (qp/order-columns {:query query} (keys (first results)))
column-names (map name column-keys)]
{:columns column-names
:cols (qp/get-column-info {:query query} column-names)
:rows (map #(map % column-keys)
results)}))
[query results]
(qp/annotate query results))
;; ## CLAUSE APPLICATION 2.0
......@@ -308,15 +301,12 @@
(memoize
(fn [field-id]
(let [{base-type :base_type, field-name :name, special-type :special_type} (sel :one [Field :base_type :name :special_type] :id field-id)]
(cond
(contains? #{:DateField :DateTimeField} base-type) u/parse-date-yyyy-mm-dd
(= special-type :timestamp_seconds) u/date-yyyy-mm-dd->unix-timestamp
(and (= field-name "_id")
(= base-type :UnknownField)) ->ObjectId
:else identity))))))
(if (and (= field-name "_id")
(= base-type :UnknownField)) ->ObjectId
identity))))))
(defn- cast-value-if-needed
"* Convert dates (which come back as `YYYY-MM-DD` strings) to `java.util.Date`
"* Convert dates (which come back as `YYYY-MM-DD` strings) to `java.sql.Date`
* Convert ID strings to `ObjectId`
* Return other values as-is"
[field-id ^String value]
......
......@@ -3,9 +3,11 @@
(:require [clojure.core.match :refer [match]]
[clojure.tools.logging :as log]
[korma.core :refer :all]
[medley.core :as m]
[metabase.db :refer :all]
[metabase.driver.interface :as i]
[metabase.models.field :refer [Field field->fk-table]]
(metabase.models [field :refer [Field]]
[foreign-key :refer [ForeignKey]])
[metabase.util :as u]))
(declare add-implicit-breakout-order-by
......@@ -232,7 +234,14 @@
"Apply post-processing steps to the RESULTS of a QUERY, such as applying cumulative sum."
[driver query results]
{:pre [(map? query)
(map? results)]}
(map? results)
(sequential? (:columns results))
(sequential? (:cols results))
(sequential? (:rows results))]}
;; Double-check that there are no duplicate columns in results
(assert (= (count (:columns results))
(count (set (:columns results))))
(format "Duplicate columns in results: %s" (vec (:columns results))))
(->> results
limit-max-result-rows
(#(case (keyword (:type query))
......@@ -241,111 +250,115 @@
add-row-count-and-status))
;; # COMMON ANNOTATION FNS
(defn get-column-info
"Get extra information about result columns. This is done by looking up matching `Fields` for the `Table` in QUERY or looking up
information about special columns such as `count` via `get-special-column-info`."
[{{table-id :source_table} :query, :as query} column-names]
{:pre [(integer? table-id)
(every? string? column-names)]}
(let [columns (->> (sel :many [Field :id :table_id :name :description :base_type :special_type] ; lookup columns with matching names for this Table
:table_id table-id :name [in (set column-names)])
(map (fn [{:keys [name] :as column}] ; build map of column-name -> column
{name (-> (select-keys column [:id :table_id :name :description :base_type :special_type])
(assoc :extra_info (if-let [fk-table (field->fk-table column)]
{:target_table_id (:id fk-table)}
{})))}))
(into {}))]
(->> column-names
(map (fn [column-name]
(try
(or (columns column-name) ; try to get matching column from the map we build earlier
(get-special-column-info query column-name)) ; if it's not there then it's a special column like `count`
(catch Throwable _ ; If for some reason column info lookup failed just return empty info map
{:name column-name ; TODO - should we log this ? It shouldn't be happening ideally
:id nil
:table_id nil
:description nil
:base_type :UnknownField
:special_type nil})))))))
(defn get-special-column-info
"Get info like `:base_type` and `:special_type` for a special aggregation column like `count` or `sum`."
[query column-name]
{:pre [(:query query)]}
(merge {:name column-name
:id nil
:table_id nil
:description nil}
(let [aggregation-type (keyword column-name) ; For aggregations of a specific Field (e.g. `sum`)
field-aggregation? (contains? #{:avg :stddev :sum} aggregation-type)] ; lookup the field we're aggregating and return its
(if field-aggregation? (sel :one :fields [Field :base_type :special_type] ; type info. (The type info of the aggregate result
:id (-> query :query :aggregation second)) ; will be the same.)
(case aggregation-type ; Otherwise for general aggregations such as `count`
:count {:base_type :IntegerField ; just return hardcoded type info
:special_type :number})))))
(def ^:dynamic *uncastify-fn*
"Function that should be called to transform a column name from the set of results to one that matches a `Field` in the DB.
The default implementation returns the column name as is; others, such as `generic-sql`, provide implementations that remove
remove casting statements and the like."
identity)
;; TODO - since this was moved over from generic SQL some of its functionality should be reworked. And dox updated.
;; (Since castification is basically SQL-specific it would make sense to handle castification / decastification separately)
;; Fix this when I'm not burnt out on driver code
(defn -order-columns
"Don't use this directly; use `order-columns`.
This broken out for testability -- it doesn't depend on data from the DB."
[fields breakout-field-ids field-field-ids castified-field-names]
;; Basically we want to convert both BREAKOUT-FIELD-IDS and CASTIFIED-FIELD-NAMES to maps like:
;; {:name "updated_at"
;; :id 224
;; :castified (keyword "CAST(updated_at AS DATE)")
;; :position 21}
;; Then we can order things appropriately and return the castified names.
(let [uncastified->castified (zipmap (map #(*uncastify-fn* (name %)) castified-field-names) castified-field-names)
fields (map #(assoc % :castified (uncastified->castified (:name %)))
fields)
id->field (zipmap (map :id fields) fields)
castified->field (zipmap (map :castified fields) fields)
breakout-fields (->> breakout-field-ids
(map id->field))
field-fields (->> field-field-ids
(map id->field))
other-fields (->> castified-field-names
(map (fn [castified-name]
(or (castified->field castified-name)
{:castified castified-name ; for aggregate fields like 'count' create a fake map
:position 0}))) ; with position 0 so it is returned ahead of the other fields
(filter #(not (or (contains? (set breakout-field-ids)
(:id %))
(contains? (set field-field-ids)
(:id %)))))
(sort-by :position))]
(->> (concat breakout-fields field-fields other-fields)
(map :castified)
(filter identity))))
(defn order-columns
"Return CASTIFIED-FIELD-NAMES in the order we'd like to display them in the output.
They should be ordered as follows:
1. All breakout fields, in the same order as BREAKOUT-FIELD-IDS
2. Any aggregate fields like `count`
3. Fields included in the `fields` clause
4. All other columns in the same order as `Field.position`."
[{{source-table :source_table, breakout-field-ids :breakout, field-field-ids :fields} :query} castified-field-names]
{:post [(every? keyword? %)]}
(try
(-order-columns (sel :many :fields [Field :id :name :position] :table_id source-table)
breakout-field-ids
(when-not (:fields-is-implicit @*internal-context*) field-field-ids)
castified-field-names)
(catch Exception e
(.printStackTrace e)
(log/error (.getMessage e)))))
;; # ANNOTATION 2.0
;; ## Ordering
;;
;; Fields should be returned in the following order:
;;
;; 1. Breakout Fields
;; 2. Aggregation Fields (e.g. sum, count)
;; 3. Fields clause Fields, if they were added explicitly
;; 4. All other Fields, sorted by :position
(defn- order-cols
"Construct a sequence of column keywords that should be used for pulling ordered rows from RESULTS.
FIELDS should be a sequence of all `Fields` for the `Table` associated with QUERY."
[{{breakout-ids :breakout, fields-ids :fields} :query} results fields]
{:post [(= (set %)
(set (keys (first results))))]}
;; Order needs to be [breakout-cols aggregate-cols fields-cols other-cols]
(let [field-id->field (zipmap (map :id fields) fields)
;; Get IDs from Fields clause *if* it was added explicitly and other all other Field IDs for Table. Filter out :breakout field IDs
fields-ids (when-not (:fields-is-implicit @*internal-context*) fields-ids)
all-field-ids (map :id fields)
non-breakout-ids (->> (concat fields-ids all-field-ids)
(filter (complement (partial contains? (set breakout-ids))))
distinct)
;; Get all the keywords returned by the results
result-kws (set (keys (first results)))
;; Convert breakout/non-breakout IDs to keywords
ids->kws #(some->> (map field-id->field %)
(map :name)
(map keyword)
(filter (partial contains? result-kws)))
breakout-kws (ids->kws breakout-ids)
non-breakout-kws (ids->kws non-breakout-ids)
;; Get the results kws specific to :aggregation (not part of breakout/non-breakout-kws)
ag-kws (->> result-kws
(filter (complement (partial contains? (set (concat breakout-kws non-breakout-kws))))))]
;; Create a combined sequence of aggregate result KWs + other ordered kws
(concat breakout-kws ag-kws non-breakout-kws)))
(defn- add-fields-extra-info
"Add `:extra_info` about `ForeignKeys` to `Fields` whose `special_type` is `:fk`."
[fields]
;; Get a sequence of add Field IDs that have a :special_type of FK
(let [fk-field-ids (->> fields
(filter #(= (:special_type %) :fk))
(map :id)
(filter identity))
;; Fetch maps of the info we need for :extra_info if there are any FK Fields
field-id->dest-field-id (when (seq fk-field-ids)
(sel :many :field->field [ForeignKey :origin_id :destination_id], :origin_id [in fk-field-ids]))
dest-field-id->table-id (when (seq fk-field-ids)
(sel :many :id->field [Field :table_id], :id [in (vals field-id->dest-field-id)]))]
;; Add :extra_info to every Field. Empty if it's not an FK, otherwise add a map with target Table ID
(for [{:keys [special_type], :as field} fields]
(cond-> field
(:id field) (assoc :extra_info (if-not (= special_type :fk) {}
{:target_table_id (->> (:id field)
field-id->dest-field-id
dest-field-id->table-id)}))))))
(defn- get-cols-info
"Get column info for the `:cols` part of the QP results."
[{{[ag-type ag-field-id] :aggregation} :query} fields ordered-col-kws]
(let [field-kw->field (zipmap (map #(keyword (:name %)) fields)
fields)
field-id->field (delay (zipmap (map :id fields) ; a delay since we probably won't need it
fields))]
(->> (for [col-kw ordered-col-kws]
(or
;; If col-kw is a known Field return that
(field-kw->field col-kw)
;; Otherwise it is an aggregation column like :sum, build a map of information to return
(merge (assert ag-type)
{:name (name col-kw)
:id nil
:table_id nil
:description nil}
(cond
;; avg, stddev, and sum should inherit the base_type and special_type from the Field they're aggregating
(contains? #{:avg :stddev :sum} col-kw) (-> (@field-id->field ag-field-id)
(select-keys [:base_type :special_type]))
;; count should always be IntegerField/number
(= col-kw :count) {:base_type :IntegerField
:special_type :number}
;; Otherwise something went wrong !
:else (throw (Exception. (format "Don't know what to do with Field '%s'." col-kw)))))))
;; Add FK info the the resulting Fields
add-fields-extra-info)))
(defn annotate
"Take a sequence of RESULTS of executing QUERY and return the \"annotated\" results we pass to postprocessing -- the map with `:cols`, `:columns`, and `:rows`.
RESULTS should be a sequence of *maps*, keyed by result column -> value."
[{{:keys [source_table]} :query, :as query}, results & [uncastify-fn]]
{:pre [(integer? source_table)]}
(let [results (if-not uncastify-fn results
(for [row results]
(m/map-keys uncastify-fn row)))
fields (sel :many :fields [Field :id :table_id :name :description :base_type :special_type]
:table_id source_table
:active true
(order :position :asc)
(order :id :desc)) ; not sure why we're ordering things this way but this is what the tests expect so (?)
ordered-col-kws (order-cols query results fields)]
{:rows (for [row results]
(map row ordered-col-kws))
:columns (map name ordered-col-kws)
:cols (get-cols-info query fields ordered-col-kws)}))
......@@ -50,9 +50,11 @@
(java.text.SimpleDateFormat. "yyyy-MM-dd"))
(defn parse-date-yyyy-mm-dd
"Parse a date in the `yyyy-mm-dd` format and return a `java.util.Date`."
^java.util.Date [^String date]
(.parse simple-date-format date))
"Parse a date in the `yyyy-mm-dd` format and return a `java.sql.Date`."
^java.sql.Date [^String date]
(-> (.parse simple-date-format date)
.getTime
java.sql.Date.))
(defn date-yyyy-mm-dd->unix-timestamp
"Convert a string DATE in the `YYYY-MM-DD` format to a Unix timestamp in seconds."
......
(ns metabase.driver.generic-sql.query-processor.annotate-test
(:require [expectations :refer :all]
[metabase.driver.generic-sql.query-processor.annotate :refer :all]
[metabase.driver.query-processor :as qp]
[metabase.test.util :refer [resolve-private-fns]]))
(resolve-private-fns metabase.driver.generic-sql.query-processor.annotate uncastify)
(defn -order-columns [& args]
(binding [qp/*uncastify-fn* uncastify]
(apply qp/-order-columns args)))
;; ## TESTS FOR -ORDER-COLUMNS
(def ^:const ^:private mock-fields
[{:position 0, :name "id", :id 224}
{:position 1, :name "name", :id 341}
{:position 2, :name "author_name", :id 453}
{:position 3, :name "allows_suggestions", :id 433}
{:position 4, :name "author_url", :id 455}
{:position 5, :name "content_creator_id", :id 446}
{:position 6, :name "created_at", :id 263}
{:position 7, :name "description", :id 375}
{:position 8, :name "external_url", :id 424}
{:position 9, :name "updated_at", :id 284}])
(def ^:const ^:private mock-castified-field-names
[:allows_suggestions
:description
:author_url
:name
(keyword "CAST(updated_at AS DATE)")
:id
:author_name
:external_url
:content_creator_id
(keyword "CAST(created_at AS DATE)")])
;; Check that `Field.order` is respected. No breakout fields
(expect [:id
:name
:author_name
:allows_suggestions
:author_url
:content_creator_id
(keyword "CAST(created_at AS DATE)")
:description
:external_url
(keyword "CAST(updated_at AS DATE)")]
(-order-columns mock-fields [] [] mock-castified-field-names))
;; Check that breakout fields are returned first, in order, before other fields
(expect [:description
:allows_suggestions
:id
:name
:author_name
:author_url
:content_creator_id
(keyword "CAST(created_at AS DATE)")
:external_url
(keyword "CAST(updated_at AS DATE)")]
(-order-columns mock-fields [375 433] [] mock-castified-field-names))
;; Check that aggregate fields are returned ahead of other fields
(expect [:allows_suggestions
:description
:count
:id
:name
:author_name
:author_url
:content_creator_id
(keyword "CAST(created_at AS DATE)")
:external_url
(keyword "CAST(updated_at AS DATE)")]
(-order-columns mock-fields [433 375] [] (concat [:count] mock-castified-field-names)))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment