Skip to content
Snippets Groups Projects
Unverified Commit d0f9ef1c authored by Chris Truter's avatar Chris Truter Committed by GitHub
Browse files

Track table references, even if we don't query their columns (#46128)

parent a0e8395f
Branches
Tags
No related merge requests found
......@@ -80,7 +80,7 @@
:mbql/query true
false)))
(defn- explicit-references [field-ids]
(defn- explicit-field-references [field-ids]
(when (seq field-ids)
;; We add this on in code as `true` in MySQL-based drivers would be returned as 1.
(map #(assoc % :explicit-reference true)
......@@ -90,6 +90,11 @@
:join [[(t2/table-name :model/Table) :t] [:= :t.id :f.table_id]]
:where [:in :f.id field-ids]}))))
(defn- explicit-references [field-ids]
(let [field-refs (explicit-field-references field-ids)]
{:fields field-refs
:tables (distinct (map #(dissoc % :field-id :field) field-refs))}))
(defn- query-references
"Find out ids of all fields used in a query. Conforms to the same protocol as [[query-analyzer/field-ids-for-sql]],
so returns `{:explicit #{...int ids}}` map.
......@@ -124,7 +129,7 @@
{:card_id card-id
:field_id field-id
:explicit_reference explicit-reference}))
query-field-rows (map reference->row references)]
query-field-rows (map reference->row (:fields references))]
(query-field/update-query-fields-for-card! card-id query-field-rows)))))
(defn- replaced-inner-query-for-native-card
......
......@@ -97,21 +97,51 @@
(field-query :f.name (:column column))
(into [:or] (map table-query tables))]))
(defn table-reference
"Used by tests"
[db-id table]
(t2/select-one :model/QueryTable
{:select [[:t.id :table-id] [:t.name :table]]
:from [[(t2/table-name :model/Table) :t]]
:where [:and
[:= :t.db_id db-id]
(table-query {:table (name table)})]}))
(defn field-reference
"Used by tests"
[db-id table column]
(t2/select-one :model/QueryField (assoc field-and-table-fragment
:where [:and
[:= :t.db_id db-id]
(column-query nil {:table (name table)
:column (name column)})])))
(defn- strip-redundant-refs
"Strip out duplicate references, and unqualified references that are shadowed by found or qualified ones."
[references]
;; TODO handle schema
(let [qualified? (into #{} (comp (filter :table) (map :column)) references)]
(into #{}
(filter (fn [{:keys [table column]}]
(or table (not (qualified? column)))))
references)))
(defn- strip-redundant-table-refs
"Strip out duplicate references, and unqualified references that are shadowed by found or qualified ones."
[references]
(let [qualified? (into #{} (comp (filter :schema) (map :table)) references)]
(into #{}
(filter (fn [{:keys [schema table]}]
(or schema (not (qualified? table)))))
references)))
(defn- consolidate-columns
"Qualify analyzed columns with the corresponding database IDs, where we are able to resolve them."
[analyzed-columns database-columns]
;; TODO we should handle schema as well
(let [->tab-key (comp u/lower-case-en :table)
->col-key (comp u/lower-case-en :column)
column->records (group-by (comp ->col-key) database-columns)
column->records (group-by ->col-key database-columns)
table+column->records (group-by (juxt ->tab-key ->col-key) database-columns)]
(strip-redundant-refs
(mapcat (fn [{:keys [table column] :as reference}]
......@@ -121,17 +151,34 @@
[(update-vals reference strip-quotes)]))
analyzed-columns))))
(defn field-reference
"Used by tests"
[db-id table column]
(t2/select-one :model/QueryField (assoc field-and-table-fragment
:where [:and
[:= :t.db_id db-id]
(column-query nil {:table (name table)
:column (name column)})])))
(defn- consolidate-tables [analyzed-tables database-tables]
(let [->schema-key (comp u/lower-case-en :schema)
->table-key (comp u/lower-case-en :table)
table->records (group-by ->table-key database-tables)
schema+table->records (group-by (juxt ->schema-key ->table-key) database-tables)]
(strip-redundant-table-refs
(mapcat (fn [{:keys [schema table] :as reference}]
(or (if schema
(schema+table->records [(normalized-key schema) (normalized-key table)])
(table->records (normalized-key table)))
[(update-vals reference strip-quotes)]))
analyzed-tables))))
(defn- table-refs-for-query
"Given the results of query analysis, return references to the corresponding tables and cards."
[{table-maps :tables} db-id]
(let [tables (map :component table-maps)]
(consolidate-tables
tables
(t2/select :model/QueryTable
{:select [[:t.id :table-id] [:t.name :table]]
:from [[(t2/table-name :model/Table) :t]]
:where [:and
[:= :t.db_id db-id]
(into [:or] (map table-query tables))]}))))
(defn- explicit-references-for-query
"Selects IDs of Fields that could be used in the query"
(defn- explicit-field-refs-for-query
"Given the results of query analysis, return references to the corresponding fields and model outputs."
[{column-maps :columns table-maps :tables} db-id]
(let [columns (map :component column-maps)
tables (map :component table-maps)]
......@@ -169,6 +216,14 @@
(defn- mark-reference [refs explicit?]
(map #(assoc % :explicit-reference explicit?) refs))
(defn- deduce-or-fetch-table-refs [parsed-query db-id field-refs]
(let [tables (map :component (:tables parsed-query))
implicit? (into #{} (map (juxt :schema :table)) field-refs)]
(if (every? implicit? (map (juxt :schema :table) tables))
(distinct (map #(dissoc % :field-id :column) field-refs))
;; if any tables are references without referencing any of their columns, we might as well fetch every table
(table-refs-for-query parsed-query db-id))))
(defn- references-for-sql
"Returns a `{:explicit #{...} :implicit #{...}}` map with field IDs that (may) be referenced in the given card's
query. Errs on the side of optimism: i.e., it may return fields that are *not* in the query, and is unlikely to fail
......@@ -181,12 +236,14 @@
macaw-opts (nqa.impl/macaw-options driver)
sql-string (:query (nqa.sub/replace-tags query))
parsed-query (macaw/query->components (macaw/parsed-query sql-string macaw-opts) macaw-opts)
explicit-refs (explicit-references-for-query parsed-query db-id)
explicit-refs (explicit-field-refs-for-query parsed-query db-id)
implicit-refs (set/difference (set (implicit-references-for-query parsed-query db-id))
(set explicit-refs))]
;; TODO: add table refs
(concat (mark-reference explicit-refs true)
(mark-reference implicit-refs false))))
(set explicit-refs))
field-refs (concat (mark-reference explicit-refs true)
(mark-reference implicit-refs false))
table-refs (deduce-or-fetch-table-refs parsed-query db-id field-refs)]
{:tables table-refs
:fields field-refs}))
(defn references-for-native
"Returns a `{:explicit #{...} :implicit #{...}}` map with field IDs that (may) be referenced in the given card's
......
......@@ -39,15 +39,27 @@
{:field-id 3 :table "t3" :column "c3"}]))))))
(defn- refs [sql]
(->> (mt/native-query {:query sql})
(#'nqa/references-for-native)
(sort-by (juxt :table :column))))
(-> (mt/native-query {:query sql})
(#'nqa/references-for-native)
(update-vals
(partial sort-by (juxt :table :column)))))
(defn- field-refs [sql]
(:fields (refs sql)))
(defn- missing-reference [table column]
{:table (name table)
:column (name column)
:explicit-reference true})
(defn- table-reference [table]
(let [reference (nqa/table-reference (mt/id) table)]
;; sanity-check that this is the right reference
(assert (= (mt/id table) (:table-id reference)))
;; sanity-check the names, whose case depends on the driver
(assert (= (name table) (u/lower-case-en (:table reference))))
reference))
(defn- field-reference [table column]
(let [reference (nqa/field-reference (mt/id) table column)]
;; sanity-check that this is the right reference
......@@ -62,48 +74,54 @@
(deftest ^:parallel field-matching-simple-test
(testing "simple query matches"
(is (= [(field-reference :venues :id)]
(refs "select id from venues")))))
(field-refs "select id from venues")))))
(deftest ^:parallel field-matching-case-test
(testing "quotes stop case matching"
(is (= [(missing-reference :venues :id)]
(refs "select \"id\" from venues")))
(field-refs "select \"id\" from venues")))
(is (= [(field-reference :venues :id)]
(refs "select \"ID\" from venues"))))
(field-refs "select \"ID\" from venues"))))
(testing "unresolved references use case verbatim"
(is (= [{:column "id", :table "unKnown", :explicit-reference true}]
(refs "select \"id\" from unKnown")))
(field-refs "select \"id\" from unKnown")))
(is (= [{:column "ID", :table "unknowN", :explicit-reference true}]
(refs "select ID from unknowN"))))
(field-refs "select ID from unknowN"))))
(testing "resolved references normalize the case"
(is (not= "veNUES" (:table (first (refs "select id from veNUES")))))
(is (= "venues" (u/lower-case-en (:table (first (refs "select id from veNUES")))))))
(is (not= "veNUES" (:table (first (field-refs "select id from veNUES")))))
(is (= "venues" (u/lower-case-en (:table (first (field-refs "select id from veNUES")))))))
(testing "you can mix quoted and unquoted names"
(is (= [(field-reference :venues :id)
(field-reference :venues :name)]
(refs "select v.\"ID\", v.name from venues v")))
(field-refs "select v.\"ID\", v.name from venues v")))
(is (= [(field-reference :venues :id)
(field-reference :venues :name)]
(refs "select v.`ID`, v.name from venues v")))))
(field-refs "select v.`ID`, v.name from venues v")))))
(deftest ^:parallel field-matching-multi-test
(testing "It will find all relevant columns if query is not specific"
(is (= [(field-reference :checkins :id)
(field-reference :venues :id)]
(refs "select id from venues join checkins"))))
(field-refs "select id from venues join checkins"))))
(testing "But if you are specific - then it's a concrete field"
(is (= [(field-reference :venues :id)]
(refs "select v.id from venues v join checkins"))))
(field-refs "select v.id from venues v join checkins"))))
(testing "And wildcards are matching everything"
(is (= {false 10}
(frequencies (map :explicit-reference (refs "select * from venues v join checkins")))))
(frequencies (map :explicit-reference (field-refs "select * from venues v join checkins")))))
(is (= {false 6}
(frequencies (map :explicit-reference (refs "select v.* from venues v join checkins")))))))
(frequencies (map :explicit-reference (field-refs "select v.* from venues v join checkins")))))))
(deftest ^:parallel field-matching-keywords-test
(when (not (contains? #{:snowflake :oracle} driver/*driver*))
(testing "Analysis does not fail due to keywords that are only reserved in other databases"
(is (= [(field-reference :venues :id)]
(refs "select id as final from venues")))
(field-refs "select id as final from venues")))
(is (= [(missing-reference :venues :final)]
(refs "select final from venues"))))))
(field-refs "select final from venues"))))))
(deftest ^:parallel table-without-field-reference-test
(testing "We track table dependencies even when there are no fields being used"
(is (= {:tables [(table-reference :venues)]
:fields []}
(refs "select count(*) from venues")))))
......@@ -28,13 +28,16 @@
(is (false? (query-analysis/enabled-type? :unexpected)))))
(defn- field-id-references [card-or-query]
(->> (:dataset_query card-or-query card-or-query)
(-> (:dataset_query card-or-query card-or-query)
(#'query-analysis/query-references)
;; lowercase names to avoid tests being driver-dependent
(map #(-> %
(update :table u/lower-case-en)
(update :column u/lower-case-en)))
(sort-by (juxt :table :column))))
(update-vals
(fn [refs]
(->> refs
;; lowercase names to avoid tests being driver-dependent
(map #(-> %
(update :table u/lower-case-en)
(update :column u/lower-case-en)))
(sort-by (juxt :table :column)))))))
(deftest parse-mbql-test
(testing "Parsing MBQL query returns correct used fields"
......@@ -52,12 +55,12 @@
(is (= (mt/$ids
[{:table-id (mt/id :venues), :table "venues", :field-id %venues.name, :column "name", :explicit-reference true}
{:table-id (mt/id :venues), :table "venues", :field-id %venues.price, :column "price", :explicit-reference true}])
(field-id-references c1)))
(is (empty? (field-id-references c2)))
(:fields (field-id-references c1))))
(is (empty? (:fields (field-id-references c2))))
(is (= (mt/$ids
[{:table-id (mt/id :checkins), :table "checkins", :field-id %checkins.venue_id, :column "venue_id", :explicit-reference true}
{:table-id (mt/id :venues), :table "venues", :field-id %venues.id, :column "id", :explicit-reference true}])
(field-id-references c3)))))
(:fields (field-id-references c3))))))
(testing "Parsing pMBQL query returns correct used fields"
(let [metadata-provider (lib.metadata.jvm/application-database-metadata-provider (mt/id))
venues (lib.metadata/table metadata-provider (mt/id :venues))
......@@ -69,7 +72,7 @@
:field-id (mt/id :venues :name)
:column "name"
:explicit-reference true}]
(field-id-references mlv2-query))))))
(:fields (field-id-references mlv2-query)))))))
(deftest replace-fields-and-tables!-test
(testing "fields and tables in a native card can be replaced"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment