From f5d209ccc9a9ec5f91502009b19a1246c9c35e63 Mon Sep 17 00:00:00 2001
From: Simon Belak <simon@metabase.com>
Date: Fri, 11 Sep 2020 21:21:10 +0200
Subject: [PATCH] Lift dimensions from nested queries (#13202)

* Lift dimensions from nested queries

* Don't try to lift native queries

* Fix tests

* Minor refactor

* Use actual field IDs in tests

* Touchups

* More tests

* Better names

* Add missing require

* add end-to-end test

* Remove stale

* Add breakout support

* Add e2e test for breakout

* Update test [ci drivers]

* Change order [ci drivers]

* Ignore name case [ci drivers]

* make tests portable [ci drivers]

* Ensure vector [ci drivers]

* Test: stable ordering [ci drivers]
---
 .../middleware/add_dimension_projections.clj  | 123 ++++++++++++------
 .../add_dimension_projections_test.clj        |  72 +++++++---
 .../query_processor_test/remapping_test.clj   |  59 ++++++++-
 3 files changed, 194 insertions(+), 60 deletions(-)

diff --git a/src/metabase/query_processor/middleware/add_dimension_projections.clj b/src/metabase/query_processor/middleware/add_dimension_projections.clj
index f949f92e524..2895097e56e 100644
--- a/src/metabase/query_processor/middleware/add_dimension_projections.clj
+++ b/src/metabase/query_processor/middleware/add_dimension_projections.clj
@@ -9,7 +9,7 @@
   of values happens on the frontend, so this middleware simply adds the column to be used for replacement (e.g.
   `category.name`) to the `:fields` clause in pre-processing, so the Field will be fetched. Recall that Fields
   referenced via with `:fk->` clauses imply that JOINs will take place, which are automatically handled later in the
-  Query Processor pipeline. Additionally, this middleware will swap out and `:order-by` clauses referencing the
+  Query Processor pipeline. Additionally, this middleware will swap out `:breakout` and `:order-by` clauses referencing the
   original Field with ones referencing the remapped Field (for example, so we would sort by `category.name` instead of
   `category_id`).
 
@@ -23,10 +23,13 @@
   appropriate `:remapped_from` and `:remapped_to` attributes in the result `:cols` in post-processing.
   `:remapped_from` and `:remapped_to` are the names of the columns, e.g. `category_id` is `:remapped_to` `name`, and
   `name` is `:remapped_from` `:category_id`."
-  (:require [metabase.mbql
+  (:require [medley.core :as m]
+            [metabase.mbql
              [schema :as mbql.s]
              [util :as mbql.u]]
-            [metabase.models.dimension :refer [Dimension]]
+            [metabase.models
+             [dimension :refer [Dimension]]
+             [field :refer [Field]]]
             [metabase.util :as u]
             [metabase.util.schema :as su]
             [schema.core :as s]
@@ -37,9 +40,11 @@
 (def ^:private ExternalRemappingDimension
   "Schema for the info we fetch about `external` type Dimensions that will be used for remappings in this Query. Fetched
   by the pre-processing portion of the middleware, and passed along to the post-processing portion."
-  {:name                    su/NonBlankString       ; display name for the remapping
-   :field_id                su/IntGreaterThanZero   ; ID of the Field being remapped
-   :human_readable_field_id su/IntGreaterThanZero}) ; ID of the FK Field to remap values to
+  {:name                                       su/NonBlankString       ; display name for the remapping
+   :field_id                                   su/IntGreaterThanZero   ; ID of the Field being remapped
+   :human_readable_field_id                    su/IntGreaterThanZero   ; ID of the FK Field to remap values to
+   (s/optional-key :field_name)                su/NonBlankString       ; Name of the Field being remapped
+   (s/optional-key :human_readable_field_name) su/NonBlankString})     ; Name of the FK field to remap values to
 
 
 ;;; ----------------------------------------- add-fk-remaps (pre-processing) -----------------------------------------
@@ -62,16 +67,20 @@
   get hidden when displayed anyway?)"
   [fields :- [mbql.s/Field]]
   (when-let [field-id->remapping-dimension (fields->field-id->remapping-dimension fields)]
-    (vec
-     (mbql.u/match fields
-       ;; don't match Field IDs nested in other clauses
-       [(_ :guard keyword?) [:field-id _] & _] nil
+    ;; Reconstruct how we uniquify names in `metabase.query-processor.middleware.annotate`
+    (let [unique-name (comp (mbql.u/unique-name-generator) :name Field)]
+      (vec
+       (mbql.u/match fields
+         ;; don't match Field IDs nested in other clauses
+         [(_ :guard keyword?) [:field-id _] & _] nil
 
-       [:field-id (id :guard field-id->remapping-dimension)]
-       (let [dimension (field-id->remapping-dimension id)]
-         [&match
-          [:fk-> &match [:field-id (:human_readable_field_id dimension)]]
-          dimension])))))
+         [:field-id (id :guard field-id->remapping-dimension)]
+         (let [dimension (field-id->remapping-dimension id)]
+           [&match
+            [:fk-> &match [:field-id (:human_readable_field_id dimension)]]
+            (assoc dimension
+              :field_name                (-> dimension :field_id unique-name)
+              :human_readable_field_name (-> dimension :human_readable_field_id unique-name))]))))))
 
 (s/defn ^:private update-remapped-order-by :- [mbql.s/OrderBy]
   "Order by clauses that include an external remapped column should be replace that original column in the order by with
@@ -84,26 +93,41 @@
        [direction remapped-col]
        order-by-clause))))
 
+(defn- update-remapped-breakout
+  [field->remapped-col breakout-clause]
+  (vec (mapcat (fn [field]
+                 (if-let [remapped-col (get field->remapped-col field)]
+                   [remapped-col field]
+                   [field]))
+               breakout-clause)))
+
 (s/defn ^:private add-fk-remaps :- [(s/one (s/maybe [ExternalRemappingDimension]) "external remapping dimensions")
                                     (s/one mbql.s/Query "query")]
   "Add any Fields needed for `:external` remappings to the `:fields` clause of the query, and update `:order-by`
-  clause as needed. Returns a pair like `[external-remapping-dimensions updated-query]`."
-  [{{:keys [fields order-by]} :query, :as query} :- mbql.s/Query]
-  ;; TODO - I think we need to handle Fields in `:breakout` here as well...
-  ;; fetch remapping column pairs if any exist...
-  (if-let [remap-col-tuples (seq (create-remap-col-tuples fields))]
-    ;; if they do, update `:fields` and `:order-by` clauses accordingly and add to the query
-    (let [new-fields   (vec (concat fields (map second remap-col-tuples)))
-          ;; make a map of field-id-clause -> fk-clause from the tuples
-          new-order-by (update-remapped-order-by (into {} (for [[field-clause fk-clause] remap-col-tuples]
-                                                            [field-clause fk-clause]))
-                                                 order-by)]
-      ;; return the Dimensions we are using and the query
-      [(map last remap-col-tuples)
-       (cond-> (assoc-in query [:query :fields] new-fields)
-         (seq new-order-by) (assoc-in [:query :order-by] new-order-by))])
-    ;; otherwise return query as-is
-    [nil query]))
+  and `breakout` clauses as needed. Returns a pair like `[external-remapping-dimensions updated-query]`."
+  [{{:keys [fields order-by breakout source-query]} :query, :as query} :- mbql.s/Query]
+  (let [[source-query-remappings query]
+        (if (and source-query (not (:native source-query))) ;; Only do lifting if source is MBQL query
+          (let [[source-query-remappings source-query] (add-fk-remaps (assoc query :query source-query))]
+            [source-query-remappings (assoc-in query [:query :source-query] (:query source-query))])
+          [nil query])]
+    ;; fetch remapping column pairs if any exist...
+    (if-let [remap-col-tuples (seq (create-remap-col-tuples (concat fields breakout)))]
+      ;; if they do, update `:fields`, `:order-by` and `:breakout` clauses accordingly and add to the query
+      (let [new-fields          (into fields (map second) remap-col-tuples)
+            ;; make a map of field-id-clause -> fk-clause from the tuples
+            field->remapped-col (into {} (for [[field-clause fk-clause] remap-col-tuples]
+                                           [field-clause fk-clause]))
+            new-breakout        (update-remapped-breakout field->remapped-col breakout)
+            new-order-by        (update-remapped-order-by field->remapped-col order-by)]
+        ;; return the Dimensions we are using and the query
+        [(concat source-query-remappings (map last remap-col-tuples))
+         (cond-> query
+           (seq fields)   (assoc-in [:query :fields] new-fields)
+           (seq order-by) (assoc-in [:query :order-by] new-order-by)
+           (seq breakout) (assoc-in [:query :breakout] new-breakout))])
+      ;; otherwise return query as-is
+      [source-query-remappings query])))
 
 
 ;;; ---------------------------------------- remap-results (post-processing) -----------------------------------------
@@ -116,28 +140,43 @@
   [columns                :- [su/Map]
    remapping-dimensions   :- (s/maybe [ExternalRemappingDimension])
    internal-remap-columns :- (s/maybe [su/Map])]
-  (let [column-id->column              (u/key-by :id columns)
+  ;; We have to complicate our lives a bit and account for the possibility that dimensions might be
+  ;; used in an upstream `source-query`. If so, `columns` will treat them as `:field-value`s, erasing
+  ;; IDs. In that case reconstruct the mappings using names.
+  ;;
+  ;; TODO:
+  ;; Matching by name is brittle and might produce wrong results when there are name clashes
+  ;; in the source fields.
+  (let [column-id->column              (u/key-by (some-fn :id :name) columns)
         name->internal-remapped-to-col (u/key-by :remapped_from internal-remap-columns)
-        id->remapped-to-dimension      (u/key-by :field_id                remapping-dimensions)
-        id->remapped-from-dimension    (u/key-by :human_readable_field_id remapping-dimensions)]
+        id->remapped-to-dimension      (merge (u/key-by :field_id remapping-dimensions)
+                                              (u/key-by :field_name remapping-dimensions))
+        id->remapped-from-dimension    (merge (u/key-by :human_readable_field_id remapping-dimensions)
+                                              (u/key-by :human_readable_field_name remapping-dimensions))
+        get-first-key                  (fn [m & ks]
+                                         (some-> (m/find-first m ks) m))]
     (for [{:keys [id], column-name :name, :as column} columns]
       (merge
        {:base_type :type/*}
        column
        ;; if one of the internal remapped columns says it's remapped from this column, add a matching `:remapped_to`
        ;; entry
-       (when-let [{remapped-to-name :name} (get name->internal-remapped-to-col column-name)]
+       (when-let [{remapped-to-name :name} (name->internal-remapped-to-col column-name)]
          {:remapped_to remapped-to-name})
        ;; if the pre-processing remapping Dimension info contains an entry where this Field's ID is `:field_id`, add
        ;; an entry noting the name of the Field it gets remapped to
-       (when-let [{remapped-to-id :human_readable_field_id} (get id->remapped-to-dimension id)]
-         {:remapped_to (:name (get column-id->column remapped-to-id))})
+       (when-let [{remapped-to-id :human_readable_field_id
+                   remapped-to-name :human_readable_field_name}
+                  (id->remapped-to-dimension (or id column-name))]
+         {:remapped_to (:name (get-first-key column-id->column remapped-to-id remapped-to-name))})
        ;; if the pre-processing remapping Dimension info contains an entry where this Field's ID is
        ;; `:human_readable_field_id`, add an entry noting the name of the Field it gets remapped from, and use the
        ;; `:display_name` of the Dimension
-       (when-let [{dimension-name :name, remapped-from-id :field_id} (get id->remapped-from-dimension id)]
+       (when-let [{dimension-name :name
+                   remapped-from-id :field_id
+                   remapped-from-name :field_name} (id->remapped-from-dimension (or id column-name))]
          {:display_name  dimension-name
-          :remapped_from (:name (get column-id->column remapped-from-id))})))))
+          :remapped_from (:name (get-first-key column-id->column remapped-from-id remapped-from-name))})))))
 
 (defn- create-remapped-col [col-name remapped-from base-type]
   {:description   nil
@@ -241,7 +280,7 @@
   added and each row flowing through needs to include the remapped data for the new column. For external remappings,
   the column information needs to be updated with what it's being remapped from and the user specified name for the
   remapped column."
-  [{:keys [cols], :as metadata} {:keys [internal-only-dims]} rf]
+  [{:keys [internal-only-dims]} rf]
   (if-let [remap-fn (make-row-map-fn internal-only-dims)]
     (fn
       ([]
@@ -258,7 +297,7 @@
   (fn [metadata]
     (let [internal-cols-info (internal-columns-info (:cols metadata))
           metadata           (add-remapped-cols metadata remapping-dimensions internal-cols-info)]
-      (remap-results-xform metadata internal-cols-info (rff metadata)))))
+      (remap-results-xform internal-cols-info (rff metadata)))))
 
 (defn add-remapping
   "Query processor middleware. `qp` is the query processor, returns a function that works on a `query` map. Delgates to
diff --git a/test/metabase/query_processor/middleware/add_dimension_projections_test.clj b/test/metabase/query_processor/middleware/add_dimension_projections_test.clj
index 05827bbd941..dfb89cc9000 100644
--- a/test/metabase/query_processor/middleware/add_dimension_projections_test.clj
+++ b/test/metabase/query_processor/middleware/add_dimension_projections_test.clj
@@ -1,5 +1,6 @@
 (ns metabase.query-processor.middleware.add-dimension-projections-test
   (:require [clojure.test :refer :all]
+            [medley.core :as m]
             [metabase.query-processor.middleware.add-dimension-projections :as add-dim-projections]
             [metabase.test :as mt]
             [metabase.test.fixtures :as fixtures]
@@ -10,44 +11,83 @@
 ;;; ----------------------------------------- add-fk-remaps (pre-processing) -----------------------------------------
 
 (def ^:private example-query
-  {:database 1
+  {:database (mt/id)
    :type     :query
-   :query    {:source-table 1
-              :fields       [[:field-id 1]
-                             [:field-id 2]
-                             [:field-id 3]]}})
+   :query    {:source-table (mt/id :venues)
+              :fields       [[:field-id (mt/id :venues :price)]
+                             [:field-id (mt/id :venues :longitude)]
+                             [:field-id (mt/id :venues :category_id)]]}})
+
+(def ^:private remapped-field
+  {:name                      "Product"
+   :field_id                  (mt/id :venues :category_id)
+   :human_readable_field_id   (mt/id :categories :name)
+   :field_name                "CATEGORY_ID"
+   :human_readable_field_name "NAME"})
 
 (defn- do-with-fake-remappings-for-field-3 [f]
   (with-redefs [add-dim-projections/fields->field-id->remapping-dimension
                 (constantly
-                 {3 {:name "Product", :field_id 3, :human_readable_field_id 4}})]
+                 {(mt/id :venues :category_id) {:name                    "Product"
+                                                :field_id                (mt/id :venues :category_id)
+                                                :human_readable_field_id (mt/id :categories :name)}})]
     (f)))
 
 (deftest create-remap-col-tuples
   (testing "make sure we create the remap column tuples correctly"
     (do-with-fake-remappings-for-field-3
      (fn []
-       (is (= [[[:field-id 3]
-                [:fk-> [:field-id 3] [:field-id 4]]
-                {:name "Product", :field_id 3, :human_readable_field_id 4}]]
-              (#'add-dim-projections/create-remap-col-tuples [[:field-id 1] [:field-id 2] [:field-id 3]])))))))
+       (is (= [[[:field-id (mt/id :venues :category_id)]
+                [:fk-> [:field-id (mt/id :venues :category_id)] [:field-id (mt/id :categories :name)]]
+                remapped-field]]
+              (#'add-dim-projections/create-remap-col-tuples [[:field-id (mt/id :venues :price)]
+                                                              [:field-id (mt/id :venues :longitude)]
+                                                              [:field-id (mt/id :venues :category_id)]])))))))
 
 (deftest add-fk-remaps-test
   (do-with-fake-remappings-for-field-3
    (fn []
      (testing "make sure FK remaps add an entry for the FK field to `:fields`, and returns a pair of [dimension-info updated-query]"
-       (is (= [[{:name "Product", :field_id 3, :human_readable_field_id 4}]
+       (is (= [[remapped-field]
                (update-in example-query [:query :fields]
-                          conj [:fk-> [:field-id 3] [:field-id 4]])]
+                          conj [:fk-> [:field-id (mt/id :venues :category_id)] [:field-id (mt/id :categories :name)]])]
               (#'add-dim-projections/add-fk-remaps example-query))))
 
      (testing "adding FK remaps should replace any existing order-bys for a field with order bys for the FK remapping Field"
-       (is (= [[{:name "Product", :field_id 3, :human_readable_field_id 4}]
+       (is (= [[remapped-field]
                (-> example-query
-                   (assoc-in [:query :order-by] [[:asc [:fk-> [:field-id 3] [:field-id 4]]]])
+                   (assoc-in [:query :order-by]
+                             [[:asc [:fk-> [:field-id (mt/id :venues :category_id)]
+                                     [:field-id (mt/id :categories :name)]]]])
                    (update-in [:query :fields]
-                              conj [:fk-> [:field-id 3] [:field-id 4]]))]
-              (#'add-dim-projections/add-fk-remaps (assoc-in example-query [:query :order-by] [[:asc [:field-id 3]]]))))))))
+                              conj [:fk-> [:field-id (mt/id :venues :category_id)]
+                                    [:field-id (mt/id :categories :name)]]))]
+              (-> example-query
+                  (assoc-in [:query :order-by] [[:asc [:field-id (mt/id :venues :category_id)]]])
+                  (#'add-dim-projections/add-fk-remaps)))))
+
+     (testing "adding FK remaps should replace any existing breakouts for a field with order bys for the FK remapping Field"
+       (is (= [[remapped-field]
+               (-> example-query
+                   (assoc-in [:query :aggregation] [[:count]])
+                   (assoc-in [:query :breakout]
+                             [[:fk-> [:field-id (mt/id :venues :category_id)]
+                               [:field-id (mt/id :categories :name)]]
+                              [:field-id (mt/id :venues :category_id)]])
+                   (m/dissoc-in [:query :fields]))]
+              (-> example-query
+                  (m/dissoc-in [:query :fields])
+                  (assoc-in [:query :aggregation] [[:count]])
+                  (assoc-in [:query :breakout] [[:field-id (mt/id :venues :category_id)]])
+                  (#'add-dim-projections/add-fk-remaps)))))
+
+     (testing "make sure FK remaps work with nested queries"
+       (let [example-query (assoc example-query :query {:source-query (:query example-query)})]
+         (is (= [[remapped-field]
+                 (update-in example-query [:query :source-query :fields]
+                            conj [:fk-> [:field-id (mt/id :venues :category_id)]
+                                  [:field-id (mt/id :categories :name)]])]
+                (#'add-dim-projections/add-fk-remaps example-query))))))))
 
 
 ;;; ---------------------------------------- remap-results (post-processing) -----------------------------------------
diff --git a/test/metabase/query_processor_test/remapping_test.clj b/test/metabase/query_processor_test/remapping_test.clj
index b21584e2e1b..c1c7fc81f58 100644
--- a/test/metabase/query_processor_test/remapping_test.clj
+++ b/test/metabase/query_processor_test/remapping_test.clj
@@ -28,7 +28,62 @@
                  (mt/run-mbql-query venues
                    {:fields   [$name $category_id]
                     :order-by [[:asc $name]]
-                    :limit    4}))))))))
+                    :limit    4})))))))
+  (mt/test-drivers (mt/normal-drivers-with-feature :foreign-keys)
+    (mt/with-temp-objects
+      (data/create-venue-category-fk-remapping! "Name")
+      (is (= {:rows [["American" 2 8]
+                     ["Artisan"  3 2]
+                     ["Asian"    4 2]]
+              :cols [(-> (qp.test/col :categories :name)
+                         (assoc :remapped_from (mt/format-name "category_id"))
+                         (assoc :field_ref [:fk-> [:field-id (mt/id :venues :category_id)]
+                                            [:field-id (mt/id :categories :name)]])
+                         (assoc :fk_field_id (mt/id :venues :category_id))
+                         (assoc :source :breakout))
+                     (-> (qp.test/col :venues :category_id)
+                         (assoc :remapped_to (mt/format-name "name"))
+                         (assoc :source :breakout))
+                     {:field_ref    [:aggregation 0]
+                      :source       :aggregation
+                      :display_name "Count"
+                      :name         "count"
+                      :special_type :type/Number}]}
+             (-> (qp.test/format-rows-by [str int int]
+                   (mt/run-mbql-query venues
+                     {:aggregation [[:count]]
+                      :breakout    [$category_id]
+                      :limit       3}))
+                 qp.test/rows-and-cols
+                 (update :cols (fn [[c1 c2 agg]]
+                                 [c1 c2 (dissoc agg :base_type)]))))))))
+
+(deftest nested-remapping-test
+  (mt/test-drivers (mt/normal-drivers-with-feature :nested-queries)
+    (mt/with-temp-objects
+      (data/create-venue-category-remapping! "Foo")
+      (is (= {:rows [["20th Century Cafe"               12 "Café"]
+                     ["25°"                             11 "Burger"]
+                     ["33 Taps"                          7 "Bar"]
+                     ["800 Degrees Neapolitan Pizzeria" 58 "Pizza"]]
+              :cols [(-> (qp.test/col :venues :name)
+                         (assoc :field_ref [:field-literal (mt/format-name "name") :type/Text])
+                         (dissoc :description :parent_id :visibility_type))
+                     (-> (qp.test/col :venues :category_id)
+                         (assoc :remapped_to "Foo")
+                         (assoc :field_ref [:field-literal (mt/format-name"category_id")])
+                         (dissoc :description :parent_id :visibility_type))
+                     (#'add-dimension-projections/create-remapped-col "Foo" (mt/format-name "category_id") :type/Text)]}
+             (-> (qp.test/format-rows-by [str int str]
+                   (mt/run-mbql-query venues
+                     {:source-query {:source-table (mt/id :venues)
+                                     :fields       [[:field-id (mt/id :venues :name)]
+                                                    [:field-id  (mt/id :venues :category_id)]]
+                                     :order-by     [[:asc [:field-id (mt/id :venues :name)]]]
+                                     :limit        4}}))
+                 qp.test/rows-and-cols
+                 (update :cols (fn [[c1 c2 c3]]
+                                 [c1 (update c2 :field_ref (comp vec butlast)) c3]))))))))
 
 (defn- select-columns
   "Focuses the given resultset to columns that return true when passed to `columns-pred`. Typically this would be done
@@ -93,7 +148,7 @@
            :order-by [[:asc $name]]
            :limit    4})))))
 
-;; Test that we can remap inside an MBQL nested query
+;; Test that we can remap inside an MBQL query
 (datasets/expect-with-drivers (mt/normal-drivers-with-feature :foreign-keys :nested-queries)
   ["Kinaree Thai Bistro" "Ruen Pair Thai Restaurant" "Yamashiro Hollywood" "Spitz Eagle Rock" "The Gumbo Pot"]
   (mt/with-temp-objects
-- 
GitLab