From 7b617a4db85e858e707a34c7e9664814c10e9bcb Mon Sep 17 00:00:00 2001
From: Cam Saul <1455846+camsaul@users.noreply.github.com>
Date: Fri, 31 Mar 2023 22:08:06 -0700
Subject: [PATCH] MLv2: return expressions in default fields (#29737)

* Metadata/refs overhaul

* Fix Kondo error

* Fix #29702

* Fix #29701

* Revert silly changes

* Fix syntax error

* MLv2 metadata calculation: expressions SHOULD be included in default fields
---
 src/metabase/lib/dev.cljc              |  6 +-
 src/metabase/lib/expression.cljc       |  1 +
 src/metabase/lib/join.cljc             | 37 ++++++-----
 src/metabase/lib/metadata.cljc         |  4 +-
 src/metabase/lib/stage.cljc            | 29 ++++++---
 src/metabase/lib/table.cljc            | 20 ++++++
 test/metabase/lib/expression_test.cljc |  4 +-
 test/metabase/lib/order_by_test.cljc   | 65 +++++++++++--------
 test/metabase/lib/stage_test.cljc      | 87 ++++++++++++++++++++++++++
 9 files changed, 195 insertions(+), 58 deletions(-)

diff --git a/src/metabase/lib/dev.cljc b/src/metabase/lib/dev.cljc
index a28321ed179..86cd3c53841 100644
--- a/src/metabase/lib/dev.cljc
+++ b/src/metabase/lib/dev.cljc
@@ -54,6 +54,6 @@
 
 (mu/defn table :- fn?
   "Returns a function that can be resolved to Table metadata. For use with a [[lib/join]] or something like that."
-  ([id :- ::lib.schema.id/table]
-   (fn [query _stage-number]
-     (lib.metadata/table query id))))
+  [id :- ::lib.schema.id/table]
+  (fn [query _stage-number]
+    (lib.metadata/table query id)))
diff --git a/src/metabase/lib/expression.cljc b/src/metabase/lib/expression.cljc
index aa5aa295749..395f54c036e 100644
--- a/src/metabase/lib/expression.cljc
+++ b/src/metabase/lib/expression.cljc
@@ -20,6 +20,7 @@
   "Given `:metadata/field` column metadata for an expression, construct an `:expression` reference."
   [metadata :- lib.metadata/ColumnMetadata]
   (let [options {:lib/uuid       (str (random-uuid))
+                 :base-type      (:base_type metadata)
                  :effective-type ((some-fn :effective_type :base_type) metadata)}]
     [:expression options (:name metadata)]))
 
diff --git a/src/metabase/lib/join.cljc b/src/metabase/lib/join.cljc
index 330871592f8..329cda5694f 100644
--- a/src/metabase/lib/join.cljc
+++ b/src/metabase/lib/join.cljc
@@ -98,7 +98,8 @@
               (column-from-join-fields query stage-number field-metadata join-alias))
             field-metadatas))))
 
-(defmulti ^:private ->join-clause
+(defmulti join-clause-method
+  "Convert something to a join clause."
   {:arglists '([query stage-number x])}
   (fn [_query _stage-number x]
     (lib.dispatch/dispatch-value x)))
@@ -106,33 +107,33 @@
 ;; TODO -- should the default implementation call [[metabase.lib.query/query]]? That way if we implement a method to
 ;; create an MBQL query from a `Table`, then we'd also get [[join]] support for free?
 
-(defmethod ->join-clause :mbql/join
+(defmethod join-clause-method :mbql/join
   [_query _stage-number a-join-clause]
   a-join-clause)
 
-(defmethod ->join-clause :mbql/query
+;;; TODO -- this probably ought to live in [[metabase.lib.query]]
+(defmethod join-clause-method :mbql/query
   [_query _stage-number another-query]
   (-> {:lib/type :mbql/join
        :stages   (:stages (lib.util/pipeline another-query))}
       lib.options/ensure-uuid))
 
-(defmethod ->join-clause :mbql.stage/mbql
+;;; TODO -- this probably ought to live in [[metabase.lib.stage]]
+(defmethod join-clause-method :mbql.stage/mbql
   [_query _stage-number mbql-stage]
   (-> {:lib/type :mbql/join
        :stages   [mbql-stage]}
       lib.options/ensure-uuid))
 
-(defmethod ->join-clause :metadata/table
-  [query stage-number table-metadata]
-  (->join-clause query
-                 stage-number
-                 {:lib/type     :mbql.stage/mbql
-                  :lib/options  {:lib/uuid (str (random-uuid))}
-                  :source-table (:id table-metadata)}))
-
-(defmethod ->join-clause :dispatch-type/fn
+(defmethod join-clause-method :dispatch-type/fn
   [query stage-number f]
-  (->join-clause query stage-number (f query stage-number)))
+  (join-clause-method query
+                      stage-number
+                      (or (f query stage-number)
+                          (throw (ex-info "Error creating join clause: (f query stage-number) returned nil"
+                                          {:query        query
+                                           :stage-number stage-number
+                                           :f            f})))))
 
 ;; TODO this is basically the same as lib.common/->op-args,
 ;; but requiring lib.common leads to crircular dependencies:
@@ -180,12 +181,13 @@
      (join-clause query stage-number x condition)))
 
   ([query stage-number x]
-   (->join-clause query stage-number x))
+   (join-clause-method query stage-number x))
 
   ([query stage-number x condition]
    (cond-> (join-clause query stage-number x)
      condition (assoc :condition (join-condition query stage-number condition)))))
 
+
 (mu/defn with-join-fields
   "Update a join (or a function that will return a join) to include `:fields`, either `:all`, `:none`, or a sequence of
   references."
@@ -209,8 +211,9 @@
 
   ([query stage-number x condition]
    (let [stage-number (or stage-number -1)
-         new-join     (cond-> (->join-clause query stage-number x)
-                        condition (assoc :condition (join-condition query stage-number condition)))]
+         new-join     (if (some? condition) ; I guess technically `false` could be a valid join condition?
+                        (join-clause query stage-number x condition)
+                        (join-clause query stage-number x))]
      (lib.util/update-query-stage query stage-number update :joins (fn [joins]
                                                                      (conj (vec joins) new-join))))))
 
diff --git a/src/metabase/lib/metadata.cljc b/src/metabase/lib/metadata.cljc
index 8c429db1b5b..279827c8560 100644
--- a/src/metabase/lib/metadata.cljc
+++ b/src/metabase/lib/metadata.cljc
@@ -48,7 +48,9 @@
    ;; introduced by a join, not necessarily ultimately returned.
    :source/joins
    ;; Introduced by `:expressions`; not necessarily ultimately returned.
-   :source/expressions])
+   :source/expressions
+   ;; Not even introduced, but 'visible' because this column is implicitly joinable.
+   :source/implicitly-joinable])
 
 (def ColumnMetadata
   "Malli schema for a valid map of column metadata, which can mean one of two things:
diff --git a/src/metabase/lib/stage.cljc b/src/metabase/lib/stage.cljc
index 5c0b2cf0b22..ab64cae8ef8 100644
--- a/src/metabase/lib/stage.cljc
+++ b/src/metabase/lib/stage.cljc
@@ -95,8 +95,13 @@
    stage-number :- :int]
   (when-let [{fields :fields} (lib.util/query-stage query stage-number)]
     (not-empty
-     (for [field-ref fields]
-       (assoc (lib.metadata.calculation/metadata query stage-number field-ref) :lib/source :source/fields)))))
+     (for [[tag :as ref-clause] fields]
+       (assoc (lib.metadata.calculation/metadata query stage-number ref-clause)
+              :lib/source (case tag
+                            ;; you can't have an `:aggregation` reference in `:fields`; anything in `:aggregations` is
+                            ;; returned automatically anyway by [[aggregations-columns]] above.
+                            :field       :source/fields
+                            :expression  :source/expressions))))))
 
 (mu/defn ^:private breakout-ags-fields-columns :- [:maybe StageMetadataColumns]
   [query        :- ::lib.schema/query
@@ -166,8 +171,9 @@
   [query        :- ::lib.schema/query
    stage-number :- :int
    join         :- ::lib.schema.join/join]
-  (for [col (lib.metadata.calculation/metadata query stage-number join)]
-    (assoc col :lib/source :source/joins)))
+  (not-empty
+   (for [col (lib.metadata.calculation/metadata query stage-number join)]
+     (assoc col :lib/source :source/joins))))
 
 (mu/defn ^:private default-columns-added-by-joins :- [:maybe StageMetadataColumns]
   [query        :- ::lib.schema/query
@@ -205,7 +211,11 @@
 
   PLUS
 
-  2. Columns added by joins at this stage"
+  2. Expressions (aka calculated columns) added in this stage
+
+  PLUS
+
+  3. Columns added by joins at this stage"
   [query        :- ::lib.schema/query
    stage-number :- :int]
   (concat
@@ -225,7 +235,9 @@
         ;; 1d: `:lib/stage-metadata` for the (presumably native) query
         (for [col (:columns (:lib/stage-metadata this-stage))]
           (assoc col :lib/source :source/native)))))
-   ;; 2: columns added by joins at this stage
+   ;; 2: expressions (aka calculated columns) added in this stage
+   (lib.expression/expressions query stage-number)
+   ;; 3: columns added by joins at this stage
    (default-columns-added-by-joins query stage-number)))
 
 (defn- ensure-distinct-names [metadatas]
@@ -297,7 +309,9 @@
                 (m/distinct-by :table_id)
                 (mapcat (fn [{table-id :table_id, ::keys [source-field-id]}]
                           (for [field (source-table-default-fields query table-id)]
-                            (assoc field :fk_field_id source-field-id)))))
+                            (assoc field :fk_field_id source-field-id))))
+                (map (fn [metadata]
+                       (assoc metadata :lib/source :source/implicitly-joinable))))
           column-metadatas)))
 
 (mu/defn visible-columns :- StageMetadataColumns
@@ -307,7 +321,6 @@
   (let [query   (lib.util/update-query-stage query stage-number dissoc :fields :breakout :aggregation)
         columns (default-columns query stage-number)]
     (concat
-     (lib.expression/expressions query stage-number)
      columns
      (implicitly-joinable-columns query columns))))
 
diff --git a/src/metabase/lib/table.cljc b/src/metabase/lib/table.cljc
index 5ef144bdf67..7e6f609ff81 100644
--- a/src/metabase/lib/table.cljc
+++ b/src/metabase/lib/table.cljc
@@ -1,5 +1,6 @@
 (ns metabase.lib.table
   (:require
+   [metabase.lib.join :as lib.join]
    [metabase.lib.metadata :as lib.metadata]
    [metabase.lib.metadata.calculation :as lib.metadata.calculation]
    [metabase.lib.util :as lib.util]
@@ -42,3 +43,22 @@
         :else
         (throw (ex-info (i18n/tru "Unexpected source table ID {0}" (pr-str source-table-id))
                         {:query query, :source-table-id source-table-id}))))))
+
+(defmethod lib.join/with-join-alias-method :metadata/table
+  [table-metadata join-alias]
+  (assoc table-metadata ::join-alias join-alias))
+
+(defmethod lib.join/current-join-alias-method :metadata/table
+  [table-metadata]
+  (::join-alias table-metadata))
+
+(defmethod lib.join/join-clause-method :metadata/table
+  [query stage-number {::keys [join-alias], :as table-metadata}]
+  (lib.join/join-clause-method query
+                               stage-number
+                               (merge
+                                {:lib/type     :mbql.stage/mbql
+                                 :lib/options  {:lib/uuid (str (random-uuid))}
+                                 :source-table (:id table-metadata)}
+                                (when join-alias
+                                  {:alias join-alias}))))
diff --git a/test/metabase/lib/expression_test.cljc b/test/metabase/lib/expression_test.cljc
index 954a18afc49..26c610d8e22 100644
--- a/test/metabase/lib/expression_test.cljc
+++ b/test/metabase/lib/expression_test.cljc
@@ -110,8 +110,8 @@
     (is (=? [{:name         "prev_month"
               :display_name "prev_month"
               :base_type    :type/DateTime
-              :lib/source   :source/fields}]
-            (lib.metadata.calculation/metadata query -1 query)))))
+              :lib/source   :source/expressions}]
+            (lib.metadata.calculation/metadata query)))))
 
 (deftest ^:parallel date-interval-names-test
   (let [clause [:datetime-add
diff --git a/test/metabase/lib/order_by_test.cljc b/test/metabase/lib/order_by_test.cljc
index 637278d25e8..32e72626d93 100644
--- a/test/metabase/lib/order_by_test.cljc
+++ b/test/metabase/lib/order_by_test.cljc
@@ -223,17 +223,17 @@
     (let [query (-> (lib/query-for-table-name meta/metadata-provider "VENUES")
                     (lib/expression "Category ID + 1"  (lib/+ (lib/field "VENUES" "CATEGORY_ID") 1)))]
       (testing (lib.util/format "Query =\n%s" (u/pprint-to-str query))
-        (is (=? [{:lib/type     :metadata/field
-                  :base_type    :type/Integer
-                  :name         "Category ID + 1"
-                  :display_name "Category ID + 1"
-                  :lib/source   :source/expressions}
-                 {:id (meta/id :venues :id) :name "ID"}
+        (is (=? [{:id (meta/id :venues :id) :name "ID"}
                  {:id (meta/id :venues :name) :name "NAME"}
                  {:id (meta/id :venues :category-id) :name "CATEGORY_ID"}
                  {:id (meta/id :venues :latitude) :name "LATITUDE"}
                  {:id (meta/id :venues :longitude) :name "LONGITUDE"}
                  {:id (meta/id :venues :price) :name "PRICE"}
+                 {:lib/type     :metadata/field
+                  :base_type    :type/Integer
+                  :name         "Category ID + 1"
+                  :display_name "Category ID + 1"
+                  :lib/source   :source/expressions}
                  {:id (meta/id :categories :id) :name "ID"}
                  {:id (meta/id :categories :name) :name "NAME"}]
                 (lib/orderable-columns query)))))))
@@ -362,35 +362,46 @@
 
 (deftest ^:parallel orderable-columns-include-expressions-test
   (testing "orderable-columns should include expressions"
-    (is (=? [{:lib/type     :metadata/field
-              :name         "expr"
-              :display_name "expr"
-              :lib/source   :source/expressions}
-             {:name "ID"}
+    (is (=? [{:name "ID"}
              {:name "NAME"}
              {:name "CATEGORY_ID"}
              {:name "LATITUDE"}
              {:name "LONGITUDE"}
              {:name "PRICE"}
-             {:name "ID"}
-             {:name "NAME"}]
+             {:lib/type     :metadata/field
+              :name         "expr"
+              :display_name "expr"
+              :lib/source   :source/expressions}
+             {:name "ID", :lib/source :source/implicitly-joinable}
+             {:name "NAME", :lib/source :source/implicitly-joinable}]
             (-> (lib/query-for-table-name meta/metadata-provider "VENUES")
                 (lib/expression "expr" (lib/absolute-datetime "2020" :month))
                 (lib/fields [(lib/field "VENUES" "ID")])
                 (lib/orderable-columns))))))
 
 (deftest ^:parallel order-by-expression-test
-  (let [query  (-> (lib/query-for-table-name meta/metadata-provider "VENUES")
-                   (lib/expression "expr" (lib/absolute-datetime "2020" :month))
-                   (lib/fields [(lib/field "VENUES" "ID")]))
-        [expr] (lib/orderable-columns query)]
-    (is (=? {:lib/type   :metadata/field
-             :lib/source :source/expressions
-             :name       "expr"}
-            expr))
-    (let [updated-query (lib/order-by query expr)]
-      (is (=? {:stages [{:order-by [[:asc {} [:expression {} "expr"]]]}]}
-              updated-query))
-      (testing "description"
-        (is (= "Venues, Sorted by expr ascending"
-               (lib/describe-query updated-query)))))))
+  (let [query (-> (lib/query-for-table-name meta/metadata-provider "VENUES")
+                  (lib/expression "expr" (lib/absolute-datetime "2020" :month))
+                  (lib/fields [(lib/field "VENUES" "ID")]))]
+    (is (= [{:id (meta/id :venues :id),          :name "ID",          :display_name "ID",          :lib/source :source/table-defaults}
+            {:id (meta/id :venues :name),        :name "NAME",        :display_name "Name",        :lib/source :source/table-defaults}
+            {:id (meta/id :venues :category-id), :name "CATEGORY_ID", :display_name "Category ID", :lib/source :source/table-defaults}
+            {:id (meta/id :venues :latitude),    :name "LATITUDE",    :display_name "Latitude",    :lib/source :source/table-defaults}
+            {:id (meta/id :venues :longitude),   :name "LONGITUDE",   :display_name "Longitude",   :lib/source :source/table-defaults}
+            {:id (meta/id :venues :price),       :name "PRICE",       :display_name "Price",       :lib/source :source/table-defaults}
+            {:name "expr", :display_name "expr", :lib/source :source/expressions}
+            {:id (meta/id :categories :id),   :name "ID",   :display_name "ID",   :lib/source :source/implicitly-joinable}
+            {:id (meta/id :categories :name), :name "NAME", :display_name "Name", :lib/source :source/implicitly-joinable}]
+           (map #(select-keys % [:id :name :display_name :lib/source])
+                (lib/orderable-columns query))))
+    (let [expr (m/find-first #(= (:name %) "expr") (lib/orderable-columns query))]
+      (is (=? {:lib/type   :metadata/field
+               :lib/source :source/expressions
+               :name       "expr"}
+              expr))
+      (let [updated-query (lib/order-by query expr)]
+        (is (=? {:stages [{:order-by [[:asc {} [:expression {} "expr"]]]}]}
+                updated-query))
+        (testing "description"
+          (is (= "Venues, Sorted by expr ascending"
+                 (lib/describe-query updated-query))))))))
diff --git a/test/metabase/lib/stage_test.cljc b/test/metabase/lib/stage_test.cljc
index 5ff8d19cadd..c5bbed5511a 100644
--- a/test/metabase/lib/stage_test.cljc
+++ b/test/metabase/lib/stage_test.cljc
@@ -75,3 +75,90 @@
                (lib/drop-stage))))
     (testing "Dropping with 1 stage should error"
       (is (thrown-with-msg? #?(:cljs :default :clj Exception) #"Cannot drop the only stage" (-> query (lib/drop-stage)))))))
+
+(defn- query-with-expressions []
+  (let [query (-> (lib/query-for-table-name meta/metadata-provider "VENUES")
+                  (lib/expression "ID + 1" (lib/+ (lib/field "VENUES" "ID") 1))
+                  (lib/expression "ID + 2" (lib/+ (lib/field "VENUES" "ID") 2)))]
+    (is (=? {:stages [{:expressions {"ID + 1" [:+ {} [:field {} (meta/id :venues :id)] 1]
+                                     "ID + 2" [:+ {} [:field {} (meta/id :venues :id)] 2]}}]}
+            query))
+    query))
+
+(deftest ^:parallel default-fields-metadata-include-expressions-test
+  (testing "all expressions should come back by default if `:fields` is not specified (#29734)"
+    (is (=? [{:id (meta/id :venues :id),          :name "ID",          :lib/source :source/table-defaults}
+             {:id (meta/id :venues :name),        :name "NAME",        :lib/source :source/table-defaults}
+             {:id (meta/id :venues :category-id), :name "CATEGORY_ID", :lib/source :source/table-defaults}
+             {:id (meta/id :venues :latitude),    :name "LATITUDE",    :lib/source :source/table-defaults}
+             {:id (meta/id :venues :longitude),   :name "LONGITUDE",   :lib/source :source/table-defaults}
+             {:id (meta/id :venues :price),       :name "PRICE",       :lib/source :source/table-defaults}
+             {:name "ID + 1", :lib/source :source/expressions}
+             {:name "ID + 2", :lib/source :source/expressions}]
+            (lib.metadata.calculation/metadata (query-with-expressions))))))
+
+(deftest ^:parallel default-fields-metadata-return-expressions-before-joins-test
+  (testing "expressions should come back BEFORE columns from joins"
+    (let [query (-> (query-with-expressions)
+                    (lib/join (-> (lib/join-clause (lib/table (meta/id :categories)))
+                                  (lib/with-join-alias "Cat")
+                                  (lib/with-join-fields :all))
+                              (lib/=
+                               (lib/field "VENUES" "CATEGORY_ID")
+                               (-> (lib/field "CATEGORIES" "ID") (lib/with-join-alias "Cat")))))]
+      (is (=? {:stages [{:joins       [{:alias     "Cat"
+                                        :stages    [{:source-table (meta/id :categories)}]
+                                        :condition [:=
+                                                    {}
+                                                    [:field {} (meta/id :venues :category-id)]
+                                                    [:field {:join-alias "Cat"} (meta/id :categories :id)]]
+                                        :fields    :all}]
+                         :expressions {"ID + 1" [:+ {} [:field {} (meta/id :venues :id)] 1]
+                                       "ID + 2" [:+ {} [:field {} (meta/id :venues :id)] 2]}}]}
+              query))
+      (is (=? [{:id (meta/id :venues :id),          :name "ID",          :lib/source :source/table-defaults}
+               {:id (meta/id :venues :name),        :name "NAME",        :lib/source :source/table-defaults}
+               {:id (meta/id :venues :category-id), :name "CATEGORY_ID", :lib/source :source/table-defaults}
+               {:id (meta/id :venues :latitude),    :name "LATITUDE",    :lib/source :source/table-defaults}
+               {:id (meta/id :venues :longitude),   :name "LONGITUDE",   :lib/source :source/table-defaults}
+               {:id (meta/id :venues :price),       :name "PRICE",       :lib/source :source/table-defaults}
+               {:name "ID + 1", :lib/source :source/expressions}
+               {:name "ID + 2", :lib/source :source/expressions}
+               {:id           (meta/id :categories :id)
+                :name         "ID_2"
+                :lib/source   :source/joins
+                :source_alias "Cat"
+                :display_name "Categories → ID"}
+               {:id           (meta/id :categories :name)
+                :name         "NAME_2"
+                :lib/source   :source/joins
+                :source_alias "Cat"
+                :display_name "Categories → Name"}]
+              (lib.metadata.calculation/metadata query))))))
+
+(deftest ^:parallel metadata-with-fields-only-include-expressions-in-fields-test
+  (testing "If query includes :fields, only return expressions that are in :fields"
+    (let [query     (query-with-expressions)
+          id-plus-1 (first (lib/expressions query))]
+      (is (=? {:lib/type     :metadata/field
+               :base_type    :type/Integer
+               :name         "ID + 1"
+               :display_name "ID + 1"
+               :lib/source   :source/expressions}
+              id-plus-1))
+      (let [query' (-> query
+                       (lib/fields [id-plus-1]))]
+        (is (=? {:stages [{:expressions {"ID + 1" [:+ {} [:field {} (meta/id :venues :id)] 1]
+                                         "ID + 2" [:+ {} [:field {} (meta/id :venues :id)] 2]}
+                           :fields      [[:expression {} "ID + 1"]]}]}
+                query'))
+        (testing "If `:fields` is specified, expressions should only come back if they are in `:fields`"
+          (is (=? [{:name       "ID + 1"
+                    ;; TODO -- I'm not really sure whether the source should be `:source/expressions` here, or
+                    ;; `:source/fields`, since it's present in BOTH...
+                    :lib/source :source/expressions}]
+                  (lib.metadata.calculation/metadata query')))
+          (testing "Should be able to convert the metadata back into a reference"
+            (let [[id-plus-1] (lib.metadata.calculation/metadata query')]
+              (is (=? [:expression {:base-type :type/Integer, :effective-type :type/Integer} "ID + 1"]
+                      (lib/ref id-plus-1))))))))))
-- 
GitLab