From 7977fd45a9d85d82793585f1d457f4b477829b35 Mon Sep 17 00:00:00 2001
From: Ryan Senior <ryan@metabase.com>
Date: Tue, 26 Jun 2018 12:50:33 -0500
Subject: [PATCH] Add support for binning of nested queries

To support this Cards now include fingerprints in the
result_metadata. That fingerprint is included in the query dictionary
and is available to the query pipeline. The expand/resolve/binning
middleware can make use of that fingerprint data to build the binned
query. Dimension options for these kinds of queries will also now be
available.

Note that binning is only available on questions that have result
metadata. If no result metadata (and thus no fingerprint) is
available, no dimensions options will be included and an exception
will be thrown if a binned query is attempted. In most cases, just
running a card once will populate that data.

Fixes #5938
---
 src/metabase/automagic_dashboards/core.clj    | 10 +++---
 src/metabase/query_processor/interface.clj    | 17 +++++----
 .../query_processor/middleware/binning.clj    | 11 +++---
 .../query_processor/middleware/expand.clj     |  2 +-
 .../middleware/expand_macros.clj              |  9 ++---
 .../middleware/fetch_source_query.clj         | 21 +++++++----
 .../query_processor/middleware/resolve.clj    | 30 ++++++++++++++--
 .../middleware/results_metadata.clj           |  6 ++--
 src/metabase/query_processor/util.clj         | 15 +++++++-
 test/metabase/api/table_test.clj              | 26 ++++++++++++++
 .../middleware/fetch_source_query_test.clj    | 19 +++++-----
 .../middleware/results_metadata_test.clj      |  4 ++-
 .../query_processor_test/breakout_test.clj    | 36 ++++++++++++++++++-
 13 files changed, 156 insertions(+), 50 deletions(-)

diff --git a/src/metabase/automagic_dashboards/core.clj b/src/metabase/automagic_dashboards/core.clj
index 844cf3725b2..36859f386c7 100644
--- a/src/metabase/automagic_dashboards/core.clj
+++ b/src/metabase/automagic_dashboards/core.clj
@@ -430,12 +430,10 @@
 
 (defn- build-query
   ([context bindings filters metrics dimensions limit order_by]
-   (walk/postwalk
-    (fn [subform]
-      (if (rules/dimension-form? subform)
-        (let [[_ identifier opts] subform]
-          (->reference :mbql (-> identifier bindings (merge opts))))
-        subform))
+   (qp.util/postwalk-pred
+    rules/dimension-form?
+    (fn [[_ identifier opts]]
+      (->reference :mbql (-> identifier bindings (merge opts))))
     {:type     :query
      :database (-> context :root :database)
      :query    (cond-> {:source_table (if (->> context :source (instance? (type Table)))
diff --git a/src/metabase/query_processor/interface.clj b/src/metabase/query_processor/interface.clj
index 42359622cc3..c1215ac47bf 100644
--- a/src/metabase/query_processor/interface.clj
+++ b/src/metabase/query_processor/interface.clj
@@ -177,11 +177,18 @@
   [unit]
   (contains? relative-datetime-value-units (keyword unit)))
 
+(def binning-strategies
+  "Valid binning strategies for a `BinnedField`"
+  #{:num-bins :bin-width :default})
+
 ;; TODO - maybe we should figure out some way to have the schema validate that the driver supports field literals,
 ;; like we do for some of the other clauses. Ideally we'd do that in a more generic way (perhaps in expand, we could
 ;; make the clauses specify required feature metadata and have that get checked automatically?)
-(s/defrecord FieldLiteral [field-name    :- su/NonBlankString
-                           base-type     :- su/FieldType]
+(s/defrecord FieldLiteral [field-name       :- su/NonBlankString
+                           base-type        :- su/FieldType
+                           binning-strategy :- (s/maybe (apply s/enum binning-strategies))
+                           binning-param    :- (s/maybe s/Num)
+                           fingerprint      :- (s/maybe i/Fingerprint)]
   nil
   :load-ns true
   clojure.lang.Named
@@ -210,11 +217,7 @@
   nil
   :load-ns true)
 
-(def binning-strategies
-  "Valid binning strategies for a `BinnedField`"
-  #{:num-bins :bin-width :default})
-
-(s/defrecord BinnedField [field     :- Field
+(s/defrecord BinnedField [field     :- (s/cond-pre Field FieldLiteral)
                           strategy  :- (apply s/enum binning-strategies)
                           num-bins  :- s/Int
                           min-value :- s/Num
diff --git a/src/metabase/query_processor/middleware/binning.clj b/src/metabase/query_processor/middleware/binning.clj
index 1cde0589b6d..53c333bb23e 100644
--- a/src/metabase/query_processor/middleware/binning.clj
+++ b/src/metabase/query_processor/middleware/binning.clj
@@ -3,7 +3,8 @@
             [clojure.walk :as walk]
             [metabase
              [public-settings :as public-settings]
-             [util :as u]])
+             [util :as u]]
+            [metabase.query-processor.util :as qputil])
   (:import [metabase.query_processor.interface BetweenFilter BinnedField ComparisonFilter]))
 
 (defn- update!
@@ -162,8 +163,6 @@
   (fn [query]
     (let [filter-field-map (filter->field-map (get-in query [:query :filter]))]
       (qp
-       (walk/postwalk (fn [node]
-                        (if (instance? BinnedField node)
-                          (update-binned-field node filter-field-map)
-                          node))
-                      query)))))
+       (qputil/postwalk-pred #(instance? BinnedField %)
+                             #(update-binned-field % filter-field-map)
+                             query)))))
diff --git a/src/metabase/query_processor/middleware/expand.clj b/src/metabase/query_processor/middleware/expand.clj
index 19ca6f57fd6..3ce1dc965d5 100644
--- a/src/metabase/query_processor/middleware/expand.clj
+++ b/src/metabase/query_processor/middleware/expand.clj
@@ -225,7 +225,7 @@
 
 ;;; ## breakout & fields
 
-(s/defn ^:ql binning-strategy :- FieldPlaceholder
+(s/defn ^:ql binning-strategy :- (s/cond-pre FieldPlaceholder FieldLiteral)
   "Reference to a `BinnedField`. This is just a `Field` reference with an associated `STRATEGY-NAME` and
   `STRATEGY-PARAM`"
   ([f strategy-name & [strategy-param]]
diff --git a/src/metabase/query_processor/middleware/expand_macros.clj b/src/metabase/query_processor/middleware/expand_macros.clj
index b40b54d579a..aa7f74ddc40 100644
--- a/src/metabase/query_processor/middleware/expand_macros.clj
+++ b/src/metabase/query_processor/middleware/expand_macros.clj
@@ -105,12 +105,9 @@
     (maybe-unnest-ag-clause ag-clause)))
 
 (defn- expand-metrics-in-ag-clause [query-dict filter-clauses-atom]
-  (walk/postwalk
-   (fn [form]
-     (if-not (metric? form)
-       form
-       (expand-metric form filter-clauses-atom)))
-   query-dict))
+  (qputil/postwalk-pred metric?
+                        #(expand-metric % filter-clauses-atom)
+                        query-dict))
 
 (defn merge-filter-clauses
   "Merge filter clauses."
diff --git a/src/metabase/query_processor/middleware/fetch_source_query.clj b/src/metabase/query_processor/middleware/fetch_source_query.clj
index c8498e41269..997d146e13c 100644
--- a/src/metabase/query_processor/middleware/fetch_source_query.clj
+++ b/src/metabase/query_processor/middleware/fetch_source_query.clj
@@ -24,7 +24,7 @@
 (defn- card-id->source-query
   "Return the source query info for Card with CARD-ID."
   [card-id]
-  (let [card       (db/select-one ['Card :dataset_query :database_id] :id card-id)
+  (let [card       (db/select-one ['Card :dataset_query :database_id :result_metadata] :id card-id)
         card-query (:dataset_query card)]
     (assoc (or (:query card-query)
                (when-let [native (:native card-query)]
@@ -33,7 +33,8 @@
                (throw (Exception. (str "Missing source query in Card " card-id))))
       ;; include database ID as well; we'll pass that up the chain so it eventually gets put in its spot in the
       ;; outer-query
-      :database (:database card-query))))
+      :database        (:database card-query)
+      :result_metadata (:result_metadata card))))
 
 (defn- source-table-str->source-query
   "Given a SOURCE-TABLE-STR like `card__100` return the appropriate source query."
@@ -41,7 +42,10 @@
   (let [[_ card-id-str] (re-find #"^card__(\d+)$" source-table-str)]
     (u/prog1 (card-id->source-query (Integer/parseInt card-id-str))
       (when-not i/*disable-qp-logging*
-        (log/info "\nFETCHED SOURCE QUERY FROM CARD" card-id-str ":\n" (u/pprint-to-str 'yellow <>))))))
+        (log/infof "\nFETCHED SOURCE QUERY FROM CARD %s:\n%s"
+                   card-id-str
+                   ;; No need to include result metadata here, it can be large and will clutter the logs
+                   (u/pprint-to-str 'yellow (dissoc <> :result_metadata)))))))
 
 (defn- expand-card-source-tables
   "If `source-table` is a Card reference (a string like `card__100`) then replace that with appropriate
@@ -58,8 +62,9 @@
             ;; Add new `source-query` info in its place. Pass the database ID up the chain, removing it from the
             ;; source query
             (assoc
-              :source-query (dissoc source-query :database)
-              :database     (:database source-query)))))))
+              :source-query    (dissoc source-query :database :result_metadata)
+              :database        (:database source-query)
+              :result_metadata (:result_metadata source-query)))))))
 
 (defn- fetch-source-query* [{inner-query :query, :as outer-query}]
   (if-not inner-query
@@ -68,9 +73,11 @@
     ;; otherwise attempt to expand any source queries as needed
     (let [expanded-inner-query (expand-card-source-tables inner-query)]
       (merge outer-query
-             {:query (dissoc expanded-inner-query :database)}
+             {:query (dissoc expanded-inner-query :database :result_metadata)}
              (when-let [database (:database expanded-inner-query)]
-               {:database database})))))
+               {:database database})
+             (when-let [result-metadata (:result_metadata expanded-inner-query)]
+               {:result_metadata result-metadata})))))
 
 (defn fetch-source-query
   "Middleware that assocs the `:source-query` for this query if it was specified using the shorthand `:source-table`
diff --git a/src/metabase/query_processor/middleware/resolve.clj b/src/metabase/query_processor/middleware/resolve.clj
index 78cf8066fce..eacd0d3475b 100644
--- a/src/metabase/query_processor/middleware/resolve.clj
+++ b/src/metabase/query_processor/middleware/resolve.clj
@@ -25,7 +25,7 @@
              [db :as db]
              [hydrate :refer [hydrate]]])
   (:import java.util.TimeZone
-           [metabase.query_processor.interface DateTimeField DateTimeValue ExpressionRef Field FieldPlaceholder
+           [metabase.query_processor.interface DateTimeField DateTimeValue ExpressionRef Field FieldLiteral FieldPlaceholder
             RelativeDatetime RelativeDateTimeValue TimeField TimeValue Value ValuePlaceholder]))
 
 ;;; ---------------------------------------------------- UTIL FNS ----------------------------------------------------
@@ -161,7 +161,7 @@
 
 ;;; ----------------------------------------------- FIELD PLACEHOLDER ------------------------------------------------
 
-(defn- resolve-binned-field [{:keys [binning-strategy binning-param] :as field-ph} field]
+(defn- resolve-binned-field [binning-strategy binning-param field]
   (let [binned-field (i/map->BinnedField {:field    field
                                           :strategy binning-strategy})]
     (case binning-strategy
@@ -200,7 +200,7 @@
       (i/map->TimeField {:field field})
 
       binning-strategy
-      (resolve-binned-field this field)
+      (resolve-binned-field binning-strategy binning-param field)
 
       :else field)
     ;; If that fails just return ourselves as-is
@@ -399,6 +399,29 @@
           (assoc-in <> [:query :join-tables]  joined-tables)
           (walk/postwalk #(resolve-table % fk-id+table-id->table) <>))))))
 
+(defn- resolve-field-literals
+  "When resolving a field, we connect a `field-id` with a `Field` in our metadata tables. This is a similar process
+  for `FieldLiteral`s, except we are attempting to connect a `FieldLiteral` with an associated entry in the
+  `result_metadata` attached to the query (typically from the `Card` of a nested query)."
+  [{:keys [result_metadata] :as expanded-query-dict}]
+  (let [name->fingerprint (zipmap (map :name result_metadata)
+                                  (map :fingerprint result_metadata))]
+    (qputil/postwalk-pred #(instance? FieldLiteral %)
+                          (fn [{:keys [binning-strategy binning-param] :as node}]
+                            (let [fingerprint     (get name->fingerprint (:field-name node))
+                                  node-with-print (assoc node :fingerprint fingerprint)]
+                              (cond
+                                ;; We can't bin without min/max values found from a fingerprint
+                                (and binning-strategy (not fingerprint))
+                                (throw (Exception. "Binning not supported on a field literal with no fingerprint"))
+
+                                (and fingerprint binning-strategy)
+                                (resolve-binned-field binning-strategy binning-param node-with-print)
+
+                                :else
+                                node-with-print)))
+                          expanded-query-dict)))
+
 
 ;;; ------------------------------------------------ PUBLIC INTERFACE ------------------------------------------------
 
@@ -408,6 +431,7 @@
   (some-> expanded-query-dict
           record-fk-field-ids
           resolve-fields
+          resolve-field-literals
           resolve-tables))
 
 (defn resolve-middleware
diff --git a/src/metabase/query_processor/middleware/results_metadata.clj b/src/metabase/query_processor/middleware/results_metadata.clj
index 839a4ea8ebf..76110b33ed0 100644
--- a/src/metabase/query_processor/middleware/results_metadata.clj
+++ b/src/metabase/query_processor/middleware/results_metadata.clj
@@ -7,6 +7,7 @@
             [clojure.tools.logging :as log]
             [metabase.models.humanization :as humanization]
             [metabase.query-processor.interface :as i]
+            [metabase.sync.interface :as si]
             [metabase.util :as u]
             [metabase.util
              [encryption :as encryption]
@@ -29,7 +30,8 @@
                                         (s/optional-key :description)  (s/maybe su/NonBlankString)
                                         :base_type                     su/FieldTypeKeywordOrString
                                         (s/optional-key :special_type) (s/maybe su/FieldTypeKeywordOrString)
-                                        (s/optional-key :unit)         (s/maybe DateTimeUnitKeywordOrString)}]
+                                        (s/optional-key :unit)         (s/maybe DateTimeUnitKeywordOrString)
+                                        (s/optional-key :fingerprint)  (s/maybe si/Fingerprint)}]
                                       "Valid array of results column metadata maps")
     "value must be an array of valid results column metadata maps."))
 
@@ -44,7 +46,7 @@
           ;; if base-type isn't set put a default one in there. Similarly just use humanized value of `:name` for `:display_name` if one isn't set
           {:base_type    :type/*
            :display_name (humanization/name->human-readable-name (name (:name col)))}
-          (u/select-non-nil-keys col [:name :display_name :description :base_type :special_type :unit])
+          (u/select-non-nil-keys col [:name :display_name :description :base_type :special_type :unit :fingerprint])
           ;; since years are actually returned as text they can't be used for breakout purposes so don't advertise them as DateTime columns
           (when (= (:unit col) :year)
             {:base_type :type/Text
diff --git a/src/metabase/query_processor/util.clj b/src/metabase/query_processor/util.clj
index 5441fa30c01..fedcc1671da 100644
--- a/src/metabase/query_processor/util.clj
+++ b/src/metabase/query_processor/util.clj
@@ -4,7 +4,9 @@
              [codecs :as codecs]
              [hash :as hash]]
             [cheshire.core :as json]
-            [clojure.string :as str]
+            [clojure
+             [string :as str]
+             [walk :as walk]]
             [metabase.util :as u]
             [metabase.util.schema :as su]
             [schema.core :as s]))
@@ -138,3 +140,14 @@
     (when (string? source-table)
       (when-let [[_ card-id-str] (re-matches #"^card__(\d+$)" source-table)]
         (Integer/parseInt card-id-str)))))
+
+;;; ---------------------------------------- General Tree Manipulation Helpers ---------------------------------------
+
+(defn postwalk-pred
+  "Transform `form` by applying `f` to a node in the tree when that `pred` returns true for that node"
+  [pred f form]
+  (walk/postwalk (fn [node]
+                   (if (pred node)
+                     (f node)
+                     node))
+                 form))
diff --git a/test/metabase/api/table_test.clj b/test/metabase/api/table_test.clj
index 63682bd2c89..573b4bad625 100644
--- a/test/metabase/api/table_test.clj
+++ b/test/metabase/api/table_test.clj
@@ -667,3 +667,29 @@
 (expect
   #{:metrics :segments :linked-from :linking-to :tables}
   (-> ((user->client :crowberto) :get 200 (format "table/%s/related" (data/id :venues))) keys set))
+
+;; Nested queries with a fingerprint should have dimension options for binning
+(datasets/expect-with-engines (qpt/non-timeseries-engines-with-feature :binning)
+  (repeat 2 (var-get #'table-api/coordinate-dimension-indexes))
+  (tt/with-temp Card [card {:database_id   (data/id)
+                            :dataset_query {:database (data/id)
+                                            :type    :query
+                                            :query    {:source-query {:source-table (data/id :venues)}}}}]
+    ;; run the Card which will populate its result_metadata column
+    ((user->client :crowberto) :post 200 (format "card/%d/query" (u/get-id card)))
+    (let [response ((user->client :crowberto) :get 200 (format "table/card__%d/query_metadata" (u/get-id card)))]
+      (map #(dimension-options-for-field response %)
+           ["latitude" "longitude"]))))
+
+;; Nested queries missing a fingerprint should not show binning-options
+(datasets/expect-with-engines (qpt/non-timeseries-engines-with-feature :binning)
+  [nil nil]
+  (tt/with-temp Card [card {:database_id   (data/id)
+                            :dataset_query {:database (data/id)
+                                            :type    :query
+                                            :query    {:source-query {:source-table (data/id :venues)}}}}]
+    ;; By default result_metadata will be nil (and no fingerprint). Just asking for query_metadata after the card was
+    ;; created but before it was ran should not allow binning
+    (let [response ((user->client :crowberto) :get 200 (format "table/card__%d/query_metadata" (u/get-id card)))]
+      (map #(dimension-options-for-field response %)
+           ["latitude" "longitude"]))))
diff --git a/test/metabase/query_processor/middleware/fetch_source_query_test.clj b/test/metabase/query_processor/middleware/fetch_source_query_test.clj
index 0b9d4936ddb..8905749e71f 100644
--- a/test/metabase/query_processor/middleware/fetch_source_query_test.clj
+++ b/test/metabase/query_processor/middleware/fetch_source_query_test.clj
@@ -76,21 +76,22 @@
                        :query    {:source-table (str "card__" (u/get-id card))}})))
 
 (expect
-  (default-expanded-results
-   {:source-query {:source-table {:schema "PUBLIC" :name "CHECKINS" :id (data/id :checkins)}, :join-tables nil}
-    :filter {:filter-type :between,
-             :field {:field-name "date", :base-type :type/Date},
-             :min-val {:value (tcoerce/to-timestamp (du/str->date-time "2015-01-01"))
-                       :field {:field {:field-name "date", :base-type :type/Date}, :unit :default}},
-             :max-val {:value (tcoerce/to-timestamp (du/str->date-time "2015-02-01"))
-                       :field {:field {:field-name "date", :base-type :type/Date}, :unit :default}}}})
+  (let [date-field-literal {:field-name "date", :base-type :type/Date, :binning-strategy nil, :binning-param nil, :fingerprint nil}]
+    (default-expanded-results
+     {:source-query {:source-table {:schema "PUBLIC" :name "CHECKINS" :id (data/id :checkins)}, :join-tables nil}
+      :filter       {:filter-type :between,
+                     :field       date-field-literal
+                     :min-val     {:value (tcoerce/to-timestamp (du/str->date-time "2015-01-01"))
+                                   :field {:field date-field-literal, :unit :default}},
+                     :max-val     {:value (tcoerce/to-timestamp (du/str->date-time "2015-02-01"))
+                                   :field {:field date-field-literal, :unit :default}}}}))
   (tt/with-temp Card [card {:dataset_query {:database (data/id)
                                             :type     :query
                                             :query    {:source-table (data/id :checkins)}}}]
     (expand-and-scrub {:database database/virtual-id
                        :type     :query
                        :query    {:source-table (str "card__" (u/get-id card))
-                                  :filter ["BETWEEN" ["field-id" ["field-literal" "date" "type/Date"]] "2015-01-01" "2015-02-01"]}})))
+                                  :filter       ["BETWEEN" ["field-id" ["field-literal" "date" "type/Date"]] "2015-01-01" "2015-02-01"]}})))
 
 ;; make sure that nested nested queries work as expected
 (expect
diff --git a/test/metabase/query_processor/middleware/results_metadata_test.clj b/test/metabase/query_processor/middleware/results_metadata_test.clj
index 301942ad7b9..e681b7e3186 100644
--- a/test/metabase/query_processor/middleware/results_metadata_test.clj
+++ b/test/metabase/query_processor/middleware/results_metadata_test.clj
@@ -91,7 +91,9 @@
   [{:base_type    "type/Text"
     :display_name "Date"
     :name         "DATE"
-    :unit         nil}
+    :unit         nil
+    :fingerprint  {:global {:distinct-count 618}, :type {:type/DateTime {:earliest "2013-01-03T00:00:00.000Z"
+                                                                         :latest "2015-12-29T00:00:00.000Z"}}}}
    {:base_type    "type/Integer"
     :display_name "count"
     :name         "count"
diff --git a/test/metabase/query_processor_test/breakout_test.clj b/test/metabase/query_processor_test/breakout_test.clj
index cdca9fa8241..a0a7e040ae6 100644
--- a/test/metabase/query_processor_test/breakout_test.clj
+++ b/test/metabase/query_processor_test/breakout_test.clj
@@ -5,9 +5,11 @@
              [query-processor-test :refer :all]
              [util :as u]]
             [metabase.models
+             [card :refer [Card]]
              [dimension :refer [Dimension]]
              [field :refer [Field]]
              [field-values :refer [FieldValues]]]
+            [metabase.query-processor :as qp]
             [metabase.query-processor.middleware
              [add-dimension-projections :as add-dim-projections]
              [expand :as ql]]
@@ -17,7 +19,8 @@
             [metabase.test.data
              [dataset-definitions :as defs]
              [datasets :as datasets]]
-            [toucan.db :as db]))
+            [toucan.db :as db]
+            [toucan.util.test :as tt]))
 
 ;;; single column
 (qp-expect-with-all-engines
@@ -245,3 +248,34 @@
                         (ql/aggregation (ql/count))
                         (ql/breakout (ql/binning-strategy $latitude :default)))
         (select-keys [:status :class :error]))))
+
+(defn- field->result-metadata [field]
+  (select-keys field [:name :display_name :description :base_type :special_type :unit :fingerprint]))
+
+(defn- nested-venues-query [card-or-card-id]
+  {:database metabase.models.database/virtual-id
+   :type :query
+   :query {:source-table (str "card__" (u/get-id card-or-card-id))
+           :aggregation  [:count]
+           :breakout     [(ql/binning-strategy (ql/field-literal "LATITUDE" :type/Float) :num-bins 20)]}})
+
+;; Binning should be allowed on nested queries that have result metadata
+(datasets/expect-with-engines (non-timeseries-engines-with-feature :binning)
+  [[10.0 1] [32.0 4] [34.0 57] [36.0 29] [40.0 9]]
+  (tt/with-temp Card [card {:dataset_query {:database (data/id)
+                                            :type :query
+                                            :query {:source-query {:source-table (data/id :venues)}}}
+                            :result_metadata (mapv field->result-metadata (db/select Field :table_id (data/id :venues)))}]
+    (-> (nested-venues-query card)
+        qp/process-query
+        rows)))
+
+;; Binning is not supported when there is no fingerprint to determine boundaries
+(datasets/expect-with-engines (non-timeseries-engines-with-feature :binning)
+  Exception
+  (tt/with-temp Card [card {:dataset_query {:database (data/id)
+                                            :type :query
+                                            :query {:source-query {:source-table (data/id :venues)}}}}]
+    (-> (nested-venues-query card)
+        qp/process-query
+        rows)))
-- 
GitLab