From 2cf666d9af002d0fe0914c6b97c0051994760ce2 Mon Sep 17 00:00:00 2001 From: Chris Truter <crisptrutski@users.noreply.github.com> Date: Mon, 10 Jun 2024 15:58:41 +0200 Subject: [PATCH] Rename 'direct' fields references to 'explicit' (#43823) --- .../migrations/001_update_migrations.yaml | 12 ++++ src/metabase/models/query_field.clj | 24 ++++---- src/metabase/native_query_analyzer.clj | 24 ++++---- test/metabase/db/custom_migrations_test.clj | 6 +- .../db/schema_migrations_test/impl.clj | 60 ++++++++++++------- test/metabase/models/query_field_test.clj | 28 ++++----- test/metabase/native_query_analyzer_test.clj | 20 +++---- 7 files changed, 100 insertions(+), 74 deletions(-) diff --git a/resources/migrations/001_update_migrations.yaml b/resources/migrations/001_update_migrations.yaml index b45901bba71..bb7fedf2101 100644 --- a/resources/migrations/001_update_migrations.yaml +++ b/resources/migrations/001_update_migrations.yaml @@ -8053,6 +8053,18 @@ databaseChangeLog: - customChange: class: "metabase.db.custom_migrations.MigrateMetricsToV2" + - changeSet: + id: v51.2024-06-07T12:37:36 + author: crisptrutski + comment: Rename query_field.direct_reference to query_field.indirect_reference + changes: + - renameColumn: + tableName: query_field + columnDataType: ${boolean.type} + oldColumnName: direct_reference + newColumnName: explicit_reference + + # >>>>>>>>>> DO NOT ADD NEW MIGRATIONS BELOW THIS LINE! ADD THEM ABOVE <<<<<<<<<< ######################################################################################################################## diff --git a/src/metabase/models/query_field.clj b/src/metabase/models/query_field.clj index 9d2b2c202b8..1b12517b1c6 100644 --- a/src/metabase/models/query_field.clj +++ b/src/metabase/models/query_field.clj @@ -18,7 +18,7 @@ (defn- query-field-ids "Find out ids of all fields used in a query. Conforms to the same protocol as [[query-analyzer/field-ids-for-sql]], - so returns `{:direct #{...int ids}}` map. + so returns `{:explicit #{...int ids}}` map. Does not track wildcards for queries rendered as tables afterwards." [query] @@ -27,8 +27,8 @@ (query-analyzer/field-ids-for-sql query) (catch Exception e (log/error e "Error parsing SQL" query))) - :query {:direct (mbql.u/referenced-field-ids query)} - :mbql/query {:direct (lib.util/referenced-field-ids query)} + :query {:explicit (mbql.u/referenced-field-ids query)} + :mbql/query {:explicit (lib.util/referenced-field-ids query)} nil)) (defn update-query-fields-for-card! @@ -39,14 +39,14 @@ Returns `nil` (and logs the error) if there was a parse error." [{card-id :id, query :dataset_query}] (try - (let [{:keys [direct indirect] :as res} (query-field-ids query) - id->row (fn [direct? field-id] - {:card_id card-id - :field_id field-id - :direct_reference direct?}) - query-field-rows (concat - (map (partial id->row true) direct) - (map (partial id->row false) indirect))] + (let [{:keys [explicit implicit] :as res} (query-field-ids query) + id->row (fn [explicit? field-id] + {:card_id card-id + :field_id field-id + :explicit_reference explicit?}) + query-field-rows (concat + (map (partial id->row true) explicit) + (map (partial id->row false) implicit))] ;; when response is `nil`, it's a disabled parser, not unknown columns (when (some? res) (t2/with-transaction [_conn] @@ -66,6 +66,6 @@ (t2/insert! :model/QueryField to-create)) (doseq [item to-update] (t2/update! :model/QueryField {:card_id card-id :field_id (:field_id item)} - (select-keys item [:direct_reference]))))))) + (select-keys item [:explicit_reference]))))))) (catch Exception e (log/error e "Error updating query fields")))) diff --git a/src/metabase/native_query_analyzer.clj b/src/metabase/native_query_analyzer.clj index bfc0d656f2b..6d48804e830 100644 --- a/src/metabase/native_query_analyzer.clj +++ b/src/metabase/native_query_analyzer.clj @@ -104,7 +104,7 @@ (field-query :f.name (:column column)) (into [:or] (map table-query tables))])) -(defn- direct-field-ids-for-query +(defn- explicit-field-ids-for-query "Selects IDs of Fields that could be used in the query" [{column-maps :columns table-maps :tables} db-id] (let [columns (map :component column-maps) @@ -128,8 +128,8 @@ ;; limit to the named tables (seq table-wildcards) (map :component table-wildcards)))) -(defn- indirect-field-ids-for-query - "Similar to direct-field-ids-for-query, but for wildcard selects" +(defn- implicit-field-ids-for-query + "Similar to explicit-field-ids-for-query, but for wildcard selects" [parsed-query db-id] (when-let [tables (wildcard-tables parsed-query)] (t2/select-pks-set :model/Field (merge field-and-table-fragment @@ -139,12 +139,12 @@ (into [:or] (map table-query tables))]})))) (defn field-ids-for-sql - "Returns a `{:direct #{...} :indirect #{...}}` map with field IDs that (may) be referenced in the given card's + "Returns a `{:explicit #{...} :implicit #{...}}` map with field IDs that (may) be referenced in the given card's query. Errs on the side of optimism: i.e., it may return fields that are *not* in the query, and is unlikely to fail to return fields that are in the query. - Direct references are columns that are named in the query; indirect ones are from wildcards. If a field could be - both direct and indirect, it will *only* show up in the `:direct` set." + Explicit references are columns that are named in the query; implicit ones are from wildcards. If a field could be + both explicit and implicit, it will *only* show up in the `:explicit` set." [query] (when (and (active?) (:native query)) @@ -153,9 +153,9 @@ driver (driver.u/database->driver db-id) macaw-opts (nqa.impl/macaw-options driver) parsed-query (macaw/query->components (macaw/parsed-query sql-string) macaw-opts) - direct-ids (direct-field-ids-for-query parsed-query db-id) - indirect-ids (set/difference - (indirect-field-ids-for-query parsed-query db-id) - direct-ids)] - {:direct direct-ids - :indirect indirect-ids}))) + explicit-ids (explicit-field-ids-for-query parsed-query db-id) + implicit-ids (set/difference + (implicit-field-ids-for-query parsed-query db-id) + explicit-ids)] + {:explicit explicit-ids + :implicit implicit-ids}))) diff --git a/test/metabase/db/custom_migrations_test.clj b/test/metabase/db/custom_migrations_test.clj index 55be846e182..5413b62371a 100644 --- a/test/metabase/db/custom_migrations_test.clj +++ b/test/metabase/db/custom_migrations_test.clj @@ -312,8 +312,8 @@ (deftest downgrade-dashboard-tabs-test (testing "Migrations v47.00-029: downgrade dashboard tab test" - ;; it's "v47.00-030" but not "v47.00-029" because for some reasons, - ;; SOMETIMES the rollback of custom migration doens't get triggered on mysql and this test got flaky. + ;; it's "v47.00-030" but not "v47.00-029" for some reason, + ;; SOMETIMES the rollback of custom migration doesn't get triggered on mysql and this test got flaky. (impl/test-migrations "v47.00-030" [migrate!] (migrate!) (let [user-id (first (t2/insert-returning-pks! (t2/table-name :model/User) {:first_name "Howard" @@ -1761,7 +1761,7 @@ (is (= 0 (count (pulse-channel-test/send-pulse-triggers pulse-id))))) (testing "the init-send-pulse-triggers job should be re-run after migrate up" (migrate!) - ;; we need to redef this so quarzt trigger that run on a different thread use the same db connection as this test + ;; we redefine this so quartz triggers that run on different threads use the same db connection as this test (with-redefs [mdb.connection/*application-db* mdb.connection/*application-db*] ;; simulate starting MB after migrate up, which will trigger this function (task/init! ::task.send-pulses/SendPulses) diff --git a/test/metabase/db/schema_migrations_test/impl.clj b/test/metabase/db/schema_migrations_test/impl.clj index d1a0bd78383..7d44b9fc8ed 100644 --- a/test/metabase/db/schema_migrations_test/impl.clj +++ b/test/metabase/db/schema_migrations_test/impl.clj @@ -24,7 +24,8 @@ [metabase.test.initialize :as initialize] [metabase.util :as u] [metabase.util.date-2 :as u.date] - [metabase.util.log :as log]) + [metabase.util.log :as log] + [toucan2.core :as t2]) (:import (java.time OffsetDateTime) (liquibase LabelExpression) @@ -122,8 +123,9 @@ (defn- migration-id-in-range? "Whether `id` should be considered to be between `start-id` and `end-id`, inclusive. Handles both legacy plain-integer and new-style `vMM.mm-NNN` style IDs." - [start-id id end-id & [{:keys [inclusive-end?] - :or {inclusive-end? true}}]] + [start-id id end-id & [{:keys [inclusive-start? inclusive-end?] + :or {inclusive-start? true + inclusive-end? true}}]] (let [start (migration->number start-id) id (migration->number id) @@ -132,7 +134,9 @@ (migration->number end-id) Integer/MAX_VALUE)] (and - (<= start id) + (if inclusive-start? + (<= start id) + (< start id)) (if inclusive-end? (<= id end) (< id end))))) @@ -175,7 +179,9 @@ "Run Liquibase migrations from our migrations YAML file in the range of `start-id` -> `end-id` (inclusive) against a DB with `jdbc-spec`." {:added "0.41.0", :arglists '([conn [start-id end-id]] - [conn [start-id end-id] {:keys [inclusive-end?], :or {inclusive-end? true}}])} + [conn [start-id end-id] {:keys [inclusive-start? inclusive-end?] + :or {inclusive-start? true + inclusive-end? true}}])} [^java.sql.Connection conn [start-id end-id] & [range-options]] (liquibase/with-liquibase [liquibase conn] (let [database (.getDatabase liquibase) @@ -200,8 +206,8 @@ (log/debug (u/format-color 'yellow "Testing migrations for driver %s..." driver)) (with-temp-empty-app-db [conn driver] ;; sanity check: make sure the DB is actually empty - (let [metadata (.getMetaData conn) - schema (when (= :h2 driver) "PUBLIC")] + (let [metadata (.getMetaData conn) + schema (when (= :h2 driver) "PUBLIC")] (with-open [rs (.getTables metadata nil schema "%" (into-array String ["TABLE"]))] (let [tables (jdbc/result-set-seq rs)] (assert (zero? (count tables)) @@ -209,24 +215,32 @@ (u/pprint-to-str tables)))))) (log/debugf "Finding and running migrations before %s..." start-id) (run-migrations-in-range! conn ["v00.00-000" start-id] {:inclusive-end? false}) - (letfn [(migrate - ([] - (migrate :up nil)) - ([direction] - (migrate direction nil)) + (let [restart-id (atom nil)] + (letfn [(migrate + ([] + (migrate :up nil)) + ([direction] + (migrate direction nil)) - ([direction version] - (case direction - :up - (do - (log/debugf "Finding and running migrations between %s and %s (inclusive)" start-id (or end-id "end")) - (run-migrations-in-range! conn [start-id end-id])) + ([direction version] + (case direction + :up + (do + (log/debugf "Finding and running migrations between %s and %s (inclusive)" start-id (or end-id "end")) + ;; If we have rolled back earlier migrations, it's no longer safe to resume from start-id. + (if-let [start-after @restart-id] + (run-migrations-in-range! conn [start-after end-id] {:inclusive-start? false}) + (run-migrations-in-range! conn [start-id end-id]))) - :down - (do - (assert (int? version), "Downgrade requires a version") - (mdb/migrate! (mdb/data-source) :down version)))))] - (f migrate))) + :down + (do + (assert (int? version), "Downgrade requires a version") + (mdb/migrate! (mdb/data-source) :down version) + ;; We may have rolled back migrations prior to start-id, so its no longer safe to start from there. + (liquibase/with-liquibase [liquibase conn] + (let [table-name (.getDatabaseChangeLogTableName (.getDatabase liquibase))] + (reset! restart-id (t2/select-one-pk table-name {:order-by [[:orderexecuted :desc]]}))))))))] + (f migrate)))) (log/debug (u/format-color 'green "Done testing migrations for driver %s." driver))) (defn do-test-migrations! diff --git a/test/metabase/models/query_field_test.clj b/test/metabase/models/query_field_test.clj index 76d338e88a8..c254f9b44b6 100644 --- a/test/metabase/models/query_field_test.clj +++ b/test/metabase/models/query_field_test.clj @@ -12,7 +12,7 @@ [toucan2.core :as t2] [toucan2.tools.with-temp :as t2.with-temp])) -(def ^:private query-field-keys [:card_id :field_id :direct_reference]) +(def ^:private query-field-keys [:card_id :field_id :explicit_reference]) (defn- qf->map [query-field] (select-keys query-field query-field-keys)) @@ -61,10 +61,10 @@ (with-test-setup (let [total-qf {:card_id card-id :field_id total-id - :direct_reference true} + :explicit_reference true} tax-qf {:card_id card-id :field_id tax-id - :direct_reference true}] + :explicit_reference true}] (testing "A freshly created card has relevant corresponding QueryFields" (is (= #{total-qf} @@ -103,35 +103,35 @@ (with-test-setup (let [total-qf {:card_id card-id :field_id total-id - :direct_reference false} + :explicit_reference false} tax-qf {:card_id card-id :field_id tax-id - :direct_reference false}] + :explicit_reference false}] (testing "simple select *" (trigger-parse! card-id "select * from orders") (let [qfs (query-fields-for-card card-id)] (is (= 9 (count qfs))) - (is (not-every? :direct_reference qfs)) + (is (not-every? :explicit_reference qfs)) (is (set/subset? #{total-qf tax-qf} qfs))))))) (deftest table-wildcard-test (with-test-setup (let [total-qf {:card_id card-id :field_id total-id - :direct_reference true} + :explicit_reference true} tax-qf {:card_id card-id :field_id tax-id - :direct_reference true}] + :explicit_reference true}] (testing "mix of select table.* and named columns" (trigger-parse! card-id "select p.*, o.tax, o.total from orders o join people p on p.id = o.user_id") (let [qfs (query-fields-for-card card-id)] (is (= (+ 13 #_people 2 #_tax-and-total 1 #_o.user_id) (count qfs))) ;; 13 total, but id is referenced directly - (is (= 12 (t2/count :model/QueryField :card_id card-id :direct_reference false))) + (is (= 12 (t2/count :model/QueryField :card_id card-id :explicit_reference false))) ;; subset since it also includes the PKs/FKs (is (set/subset? #{total-qf tax-qf} - (t2/select-fn-set qf->map :model/QueryField :card_id card-id :direct_reference true)))))))) + (t2/select-fn-set qf->map :model/QueryField :card_id card-id :explicit_reference true)))))))) (deftest parse-mbql-test (testing "Parsing MBQL query returns correct used fields" @@ -147,11 +147,11 @@ :alias "Venues" :condition [:= $checkins.venue_id $venues.id]}]})}] (mt/$ids - (is (= {:direct #{%venues.name %venues.price}} + (is (= {:explicit #{%venues.name %venues.price}} (#'query-field/query-field-ids (:dataset_query c1)))) - (is (= {:direct nil} + (is (= {:explicit nil} (#'query-field/query-field-ids (:dataset_query c2)))) - (is (= {:direct #{%venues.id %checkins.venue_id}} + (is (= {:explicit #{%venues.id %checkins.venue_id}} (#'query-field/query-field-ids (:dataset_query c3))))))) (testing "Parsing pMBQL query returns correct used fields" (let [metadata-provider (lib.metadata.jvm/application-database-metadata-provider (mt/id)) @@ -159,5 +159,5 @@ venues-name (lib.metadata/field metadata-provider (mt/id :venues :name)) mlv2-query (-> (lib/query metadata-provider venues) (lib/aggregate (lib/distinct venues-name)))] - (is (= {:direct #{(mt/id :venues :name)}} + (is (= {:explicit #{(mt/id :venues :name)}} (#'query-field/query-field-ids mlv2-query)))))) diff --git a/test/metabase/native_query_analyzer_test.clj b/test/metabase/native_query_analyzer_test.clj index f01dcfaa2a7..c7202a5004d 100644 --- a/test/metabase/native_query_analyzer_test.clj +++ b/test/metabase/native_query_analyzer_test.clj @@ -35,33 +35,33 @@ (let [q (fn [sql] (#'query-analyzer/field-ids-for-sql (mt/native-query {:query sql})))] (testing "simple query matches" - (is (= {:direct #{(mt/id :venues :id)} :indirect nil} + (is (= {:explicit #{(mt/id :venues :id)} :implicit nil} (q "select id from venues")))) (testing "quotes stop case matching" ;; MySQL does case-insensitive string comparisons by default; quoting does not make it consider case ;; in field names either, so it's consistent behavior (if (= (mdb.connection/db-type) :mysql) - (is (= {:direct #{(mt/id :venues :id)} :indirect nil} + (is (= {:explicit #{(mt/id :venues :id)} :implicit nil} (q "select \"id\" from venues"))) - (is (= {:direct nil :indirect nil} + (is (= {:explicit nil :implicit nil} (q "select \"id\" from venues")))) - (is (= {:direct #{(mt/id :venues :id)} :indirect nil} + (is (= {:explicit #{(mt/id :venues :id)} :implicit nil} (q "select \"ID\" from venues")))) (testing "you can mix quoted and unquoted names" - (is (= {:direct #{(mt/id :venues :id) (mt/id :venues :name)} :indirect nil} + (is (= {:explicit #{(mt/id :venues :id) (mt/id :venues :name)} :implicit nil} (q "select v.\"ID\", v.name from venues"))) - (is (= {:direct #{(mt/id :venues :id) (mt/id :venues :name)} :indirect nil} + (is (= {:explicit #{(mt/id :venues :id) (mt/id :venues :name)} :implicit nil} (q "select v.`ID`, v.name from venues")))) (testing "It will find all relevant columns if query is not specific" - (is (= {:direct #{(mt/id :venues :id) (mt/id :checkins :id)} :indirect nil} + (is (= {:explicit #{(mt/id :venues :id) (mt/id :checkins :id)} :implicit nil} (q "select id from venues join checkins")))) (testing "But if you are specific - then it's a concrete field" - (is (= {:direct #{(mt/id :venues :id)} :indirect nil} + (is (= {:explicit #{(mt/id :venues :id)} :implicit nil} (q "select v.id from venues v join checkins")))) (testing "And wildcards are matching everything" (is (= 10 (-> (q "select * from venues v join checkins") - :indirect count))) + :implicit count))) (is (= 6 (-> (q "select v.* from venues v join checkins") - :indirect count))))))) + :implicit count))))))) -- GitLab