Skip to content
Snippets Groups Projects
Unverified Commit 4d929086 authored by Case Nelson's avatar Case Nelson Committed by GitHub
Browse files

[MLv2] Export find-matching-column (#35678)

parent b6bfe3c0
No related branches found
No related tags found
No related merge requests found
......@@ -111,7 +111,8 @@
pivot-columns-for-type
pivot-types]
[lib.equality
find-column-for-legacy-ref]
find-column-for-legacy-ref
find-matching-column]
[lib.expression
expression
expressions
......
......@@ -205,7 +205,7 @@
[:map [:generous? {:optional true} :boolean]])
(mu/defn find-matching-column :- [:maybe ::lib.schema.metadata/column]
"Given `a-ref` and a list of `columns`, finds the column that best matches this ref.
"Given `a-ref-or-column` and a list of `columns`, finds the column that best matches this ref or column.
Matching is based on finding the basically plausible matches first. There is often zero or one plausible matches, and
this can return quickly.
......@@ -250,21 +250,26 @@
(disambiguate-matches a-ref plausible)))
(throw (ex-info "Unknown type of ref" {:ref a-ref}))))
([query stage-number a-ref columns]
(find-matching-column query stage-number a-ref columns {}))
([query :- [:maybe ::lib.schema/query]
stage-number :- :int
[_ref-kind _opts ref-id :as a-ref] :- ::lib.schema.ref/ref
columns :- [:sequential ::lib.schema.metadata/column]
opts :- FindMatchingColumnOptions]
(or (find-matching-column a-ref columns opts)
;; We failed to match by ID, so try again with the column's name. Any columns with `:id` set are dropped.
;; Why? Suppose there are two CREATED_AT columns in play - if one has an :id and it failed to match above, then
;; it certainly shouldn't match by name just because of the coincidence of column names!
([query stage-number a-ref-or-column columns]
(find-matching-column query stage-number a-ref-or-column columns {}))
([query :- [:maybe ::lib.schema/query]
stage-number :- :int
a-ref-or-column :- [:or ::lib.schema.metadata/column ::lib.schema.ref/ref]
columns :- [:sequential ::lib.schema.metadata/column]
opts :- FindMatchingColumnOptions]
(let [[_ref-kind _opts ref-id :as a-ref] (if (lib.util/clause? a-ref-or-column)
a-ref-or-column
(lib.ref/ref a-ref-or-column))]
(or (find-matching-column a-ref columns opts)
;; We failed to match by ID, so try again with the column's name. Any columns with `:id` set are dropped.
;; Why? Suppose there are two CREATED_AT columns in play - if one has an :id and it failed to match above, then
;; it certainly shouldn't match by name just because of the coincidence of column names!
(when (and query (number? ref-id))
(when-let [no-id-columns (not-empty (remove :id columns))]
(when-let [resolved (resolve-field-id query stage-number ref-id)]
(when-let [resolved (if (lib.util/clause? a-ref-or-column)
(resolve-field-id query stage-number ref-id)
a-ref-or-column)]
(find-matching-column (-> (assoc a-ref 2 (or (:lib/desired-column-alias resolved)
(:name resolved)))
;; make sure the :field ref has a `:base-type`, it's against the rules for a
......@@ -273,7 +278,7 @@
;; from.
(lib.options/update-options (partial merge {:base-type :type/*})))
no-id-columns
opts)))))))
opts))))))))
(defn- ref-id-or-name [[_ref-kind _opts id-or-name]]
id-or-name)
......
......@@ -997,6 +997,33 @@
(some-> offset-unit keyword)
(js->clj options :keywordize-keys true)))
(defn ^:export find-matching-column
"Given `a-ref-or-column` and a list of `columns`, finds the column that best matches this ref or column.
Matching is based on finding the basically plausible matches first. There is often zero or one plausible matches, and
this can return quickly.
If there are multiple plausible matches, they are disambiguated by the most important extra included in the `ref`.
(`:join-alias` first, then `:temporal-unit`, etc.)
- Integer IDs in the `ref` are matched by ID; this usually is unambiguous.
- If there are multiple joins on one table (including possible implicit joins), check `:join-alias` next.
- If `a-ref` has a `:join-alias`, only a column which matches it can be the match, and it should be unique.
- If `a-ref` doesn't have a `:join-alias`, prefer the column with no `:join-alias`, and prefer already selected
columns over implicitly joinable ones.
- There may be broken cases where the ref has an ID but the column does not. Therefore the ID must be resolved to a
name or `:lib/desired-column-alias` and matched that way.
- `query` and `stage-number` are required for this case, since they're needed to resolve the correct name.
- Columns with `:id` set are dropped to prevent them matching. (If they didn't match by `:id` above they shouldn't
match by name due to a coincidence of column names in different tables.)
- String IDs are checked against `:lib/desired-column-alias` first.
- If that doesn't match any columns, `:name` is compared next.
- The same disambiguation (by `:join-alias` etc.) is applied if there are multiple plausible matches.
Returns the matching column, or nil if no match is found."
[a-query stage-number a-ref columns]
(lib.core/find-matching-column a-query stage-number a-ref columns))
(defn ^:export stage-count
"Returns the count of stages in query"
[a-query]
......
......@@ -255,6 +255,21 @@
(lib.equality/find-matching-column a-ref cols)
(lib.equality/find-matching-column query -1 a-ref cols))))))
(deftest ^:parallel find-matching-column-from-column-test
(let [query (-> lib.tu/venues-query
(lib/breakout (meta/field-metadata :venues :id)))
filterable-cols (lib/filterable-columns query)
matched-from-col (lib.equality/find-matching-column query -1 (m/find-first :breakout-position (lib/breakoutable-columns query)) filterable-cols)
matched-from-ref (lib.equality/find-matching-column query -1 (first (lib/breakouts query)) filterable-cols)]
(is (=?
{:id (meta/id :venues :id)}
matched-from-ref))
(is (=?
{:id (meta/id :venues :id)}
matched-from-col))
(is (= matched-from-ref
matched-from-col))))
(deftest ^:parallel find-matching-column-by-name-test
(testing "find-matching-column should find columns based on matching name"
(let [query (lib/append-stage lib.tu/query-with-join)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment