Skip to content
Snippets Groups Projects
Unverified Commit a2dfc160 authored by dpsutton's avatar dpsutton Committed by GitHub
Browse files

Only select type information on type inference of coalesce (#19008)

* Only select type information on type inference of coalesce

This previously just selected hte inferred information from the second
argument to the coalesce. For fields this would return the entire field
object which includes an id.

Later, in `source-metadata->fields` this assumes that since an id is
present, we should select `[:field id nil]`, but this confused that the
type of the coalesce column is the same as that field-id, not that we
want to select that field id.

```clojure
add-source-metadata=> (pprint
                        (mbql-source-query->metadata {:source-table 2
                                                      :joins [{:fields [[:field
                                                                         22
                                                                         {:join-alias "People - User"}]
                                                                        [:field
                                                                         15
                                                                         {:join-alias "People - User"}]]
                                                               :source-table 3
                                                               :condition [:=
                                                                           [:field 3 nil]
                                                                           [:field
                                                                            22
                                                                            {:join-alias "People - User"}]]
                                                               :alias "People - User"}]
                                                      :expressions {:coalesce [:coalesce
                                                                               [:field 3 nil]
                                                                               [:field
                                                                                22
                                                                                {:join-alias "People - User"}]]}
                                                      :aggregation [[:count]]
                                                      :breakout [[:expression "coalesce"]]}))
({:semantic_type :type/FK,
  :table_id 2,
  :coercion_strategy nil,
  :name "coalesce",                    ;; merging maintains the correct name
  :settings nil,
  :field_ref [:expression "coalesce"], ;; merging maintains the field ref
  :effective_type :type/Integer,
  :parent_id nil,
  :id 3,                               ;; the type inference selected
                                       ;; the entire field, including its id
  :display_name "coalesce",
  :fingerprint {:global {:distinct-count 929, :nil% 0.0}},
  :base_type :type/Integer}
 {:name "count",
  :display_name "Count",
  :base_type :type/BigInteger,
  :semantic_type :type/Quantity,
  :field_ref [:aggregation 0]})
nil
```

updating to only select the type information:

```clojure
add-source-metadata=> (pprint
                        (mbql-source-query->metadata {:source-table 2
                                                      :joins [{:fields [[:field
                                                                         22
                                                                         {:join-alias "People - User"}]
                                                                        [:field
                                                                         15
                                                                         {:join-alias "People - User"}]]
                                                               :source-table 3
                                                               :condition [:=
                                                                           [:field 3 nil]
                                                                           [:field
                                                                            22
                                                                            {:join-alias "People - User"}]]
                                                               :alias "People - User"}]
                                                      :expressions {:coalesce [:coalesce
                                                                               [:field 3 nil]
                                                                               [:field
                                                                                22
                                                                                {:join-alias "People - User"}]]}
                                                      :aggregation [[:count]]
                                                      :breakout [[:expression "coalesce"]]}))
({:name "coalesce",
  :display_name "coalesce",
  :base_type :type/Integer,
  :effective_type :type/Integer,  ;; no more field information beyond
                                  ;; the types
  :coercion_strategy nil,
  :semantic_type :type/FK,
  :field_ref [:expression "coalesce"]}
 {:name "count",
  :display_name "Count",
  :base_type :type/BigInteger,
  :semantic_type :type/Quantity,
  :field_ref [:aggregation 0]})
```

* Update question in repro now that it doesn't fail on the backend

previously this question would actually fail on the backend and
independently the frontend would go into a stack overflow killing the
page. Fixing the bug actually broke the test as it asserted that the
query would break and look for the error message. Now that the query
returns assert we can find the name of the question "test question"

* Correct same bug for case

When inferring type for a case, we were returning the type information
for the first field we could find in the case statement. And that
information included the field id. We always merge the current column's
name on top of that to remove the analyzed field's name and other
things, but the field-id would remain. `source-metadata->fields` in
add_implicit_clauses will add a field reference if it finds a field id.

Before, the following was returned

```clojure
({:description "The date and time an order was submitted."
  :semantic_type :type/CreationTimestamp
  :table_id 2
  :coercion_strategy nil
  :unit :month
  :name "CREATED_AT"
  :settings nil
  :source :breakout
  :field_ref [:field
              4
              {:temporal-unit :month}]
  :effective_type :type/DateTime
  :parent_id nil
  :id 4
  :visibility_type :normal
  :display_name "Created At"
  :fingerprint {:global {:distinct-count 9998
                         :nil% 0.0}
                :type {:type/DateTime {:earliest "2016-04-30T18:56:13.352Z"
                                       :latest "2020-04-19T14:07:15.657Z"}}}
  :base_type :type/DateTime}
 {:description "The raw, pre-tax cost of the order. Note that this might be different in the future from the product price due to promotions, credits, etc."
  :semantic_type :type/Quantity
  :table_id 2
  :coercion_strategy nil
  :name "distinct case"
  :settings nil
  :source :aggregation
  :field_ref [:aggregation 0]
  :effective_type :type/Float
  :parent_id nil
  :id 6
  :visibility_type :normal
  :display_name "distinct case"
  :fingerprint {:global {:distinct-count 340
                         :nil% 0.0}
                :type {:type/Number {:min 15.691943673970439
                                     :q1 49.74894519060184
                                     :q3 105.42965746993103
                                     :max 148.22900526552291
                                     :sd 32.53705013056317
                                     :avg 77.01295465356547}}}
  :base_type :type/BigInteger})
```

The important bit above is that the metadata for the "distinct case" has
the correct field_ref but also has a field id.

After, we now return the following metadata:
```clojure
({:description "The date and time an order was submitted."
  :semantic_type :type/CreationTimestamp
  :table_id 2
  :coercion_strategy nil
  :unit :month
  :name "CREATED_AT"
  :settings nil
  :source :fields
  :field_ref [:field
              4
              {:temporal-unit :default}]
  :effective_type :type/DateTime
  :parent_id nil
  :id 4
  :visibility_type :normal
  :display_name "Created At"
  :fingerprint {:global {:distinct-count 9998
                         :nil% 0.0}
                :type {:type/DateTime {:earliest "2016-04-30T18:56:13.352Z"
                                       :latest "2020-04-19T14:07:15.657Z"}}}
  :base_type :type/DateTime}
 {:base_type :type/BigInteger
  :effective_type :type/Float
  :coercion_strategy nil
  :semantic_type :type/Quantity
  :name "distinct case"
  :display_name "distinct case"
  :source :fields
  :field_ref [:field
              "distinct case"
              {:base-type :type/BigInteger}]}
 {:semantic_type :type/Quantity
  :coercion_strategy nil
  :name "cc"
  :expression_name "cc"
  :source :fields
  :field_ref [:expression "cc"]
  :effective_type :type/Float
  :display_name "cc"
  :base_type :type/Float})
```

* Add tests looking for full field analysis without id
parent f6950640
No related branches found
No related tags found
No related merge requests found
......@@ -56,8 +56,9 @@ describe("issue 18630", () => {
{ visitQuestion: true },
);
// The query itself is not expected to run,
// it just shouldn't stuck on the loading phase
cy.findByText("There was a problem with your question");
// The query runs and we assert the page is not blank,
// rather than an infinite loop and stack overflow.
// 'test question' is the name of the question.
cy.findByText("test question");
});
});
......@@ -119,6 +119,11 @@
(declare col-info-for-field-clause)
(def type-info-columns
"Columns to select from a field to get its type information without getting information that is specific to that
column."
[:base_type :effective_type :coercion_strategy :semantic_type])
(defn infer-expression-type
"Infer base-type/semantic-type information about an `expression` clause."
[expression]
......@@ -133,7 +138,7 @@
(col-info-for-field-clause {} expression)
(mbql.u/is-clause? :coalesce expression)
(infer-expression-type (second expression))
(select-keys (infer-expression-type (second expression)) type-info-columns)
(mbql.u/is-clause? :length expression)
{:base_type :type/BigInteger}
......@@ -147,7 +152,7 @@
(or (not (mbql.u/is-clause? :value expression))
(let [[_ value] expression]
(not= value nil))))
(infer-expression-type expression)))
(select-keys (infer-expression-type expression) type-info-columns)))
clauses))
(mbql.u/datetime-arithmetics? expression)
......
......@@ -438,11 +438,72 @@
(-> (add-column-info (mt/mbql-query venues {:expressions {"expr" expr}
:fields [[:expression "expr"]]
:limit 10})
{})
{})
:cols
first
(select-keys [:base_type :semantic_type])))
(deftest computed-columns-inference
(letfn [(infer [expr] (-> (mt/mbql-query venues
{:expressions {"expr" expr}
:fields [[:expression "expr"]]
:limit 10})
(add-column-info {})
:cols
first))]
(testing "Coalesce"
(testing "Uses the first clause"
(testing "Gets the type information from the field"
(is (= {:semantic_type :type/Name,
:coercion_strategy nil,
:name "expr",
:expression_name "expr",
:source :fields,
:field_ref [:expression "expr"],
:effective_type :type/Text,
:display_name "expr",
:base_type :type/Text}
(infer [:coalesce [:field (mt/id :venues :name) nil] "bar"])))
(testing "Does not contain a field id in its analysis (#18513)"
(is (false? (contains? (infer [:coalesce [:field (mt/id :venues :name) nil] "bar"])
:id)))))
(testing "Gets the type information from the literal"
(is (= {:base_type :type/Text,
:name "expr",
:display_name "expr",
:expression_name "expr",
:field_ref [:expression "expr"],
:source :fields}
(infer [:coalesce "bar" [:field (mt/id :venues :name) nil]]))))))
(testing "Case"
(testing "Uses first available type information"
(testing "From a field"
(is (= {:semantic_type :type/Name,
:coercion_strategy nil,
:name "expr",
:expression_name "expr",
:source :fields,
:field_ref [:expression "expr"],
:effective_type :type/Text,
:display_name "expr",
:base_type :type/Text}
(infer [:coalesce [:field (mt/id :venues :name) nil] "bar"])))
(testing "does not contain a field id in its analysis (#17512)"
(is (false?
(contains? (infer [:coalesce [:field (mt/id :venues :name) nil] "bar"])
:id))))))
(is (= {:base_type :type/Text}
(infered-col-type [:case [[[:> [:field (mt/id :venues :price) nil] 2] "big"]]])))
(is (= {:base_type :type/Float}
(infered-col-type [:case [[[:> [:field (mt/id :venues :price) nil] 2]
[:+ [:field (mt/id :venues :price) nil] 1]]]])))
(testing "Make sure we skip nils when infering case return type"
(is (= {:base_type :type/Number}
(infered-col-type [:case [[[:< [:field (mt/id :venues :price) nil] 10] [:value nil {:base_type :type/Number}]]
[[:> [:field (mt/id :venues :price) nil] 2] 10]]]))))
(is (= {:base_type :type/Float}
(infered-col-type [:case [[[:> [:field (mt/id :venues :price) nil] 2] [:+ [:field (mt/id :venues :price) nil] 1]]]]))))))
(deftest test-string-extracts
(is (= {:base_type :type/Text}
(infered-col-type [:trim "foo"])))
......@@ -470,19 +531,6 @@
:semantic_type :type/Name}
(infered-col-type [:coalesce [:field (mt/id :venues :name) nil] "bar"]))))
(deftest test-case
(is (= {:base_type :type/Text}
(infered-col-type [:case [[[:> [:field (mt/id :venues :price) nil] 2] "big"]]])))
(is (= {:base_type :type/Float}
(infered-col-type [:case [[[:> [:field (mt/id :venues :price) nil] 2]
[:+ [:field (mt/id :venues :price) nil] 1]]]])))
(testing "Make sure we skip nils when infering case return type"
(is (= {:base_type :type/Number}
(infered-col-type [:case [[[:< [:field (mt/id :venues :price) nil] 10] [:value nil {:base_type :type/Number}]]
[[:> [:field (mt/id :venues :price) nil] 2] 10]]]))))
(is (= {:base_type :type/Float}
(infered-col-type [:case [[[:> [:field (mt/id :venues :price) nil] 2] [:+ [:field (mt/id :venues :price) nil] 1]]]]))))
(deftest unique-name-key-test
(testing "Make sure `:cols` always come back with a unique `:name` key (#8759)"
(is (= {:cols
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment