diff --git a/src/metabase/lib/convert.cljc b/src/metabase/lib/convert.cljc index 39487ed2518ae6fd50f2c498cf1c4c7966adf617..387321211b00a580292196dc0d87e6a8c7c4e3d7 100644 --- a/src/metabase/lib/convert.cljc +++ b/src/metabase/lib/convert.cljc @@ -84,16 +84,23 @@ clean-stage-schema-errors clean-stage-ref-errors)) +(def ^:dynamic *clean-query* + "If true (this is the default), the query is cleaned. + When converting queries at later stages of the preprocessing pipeline, this cleaning might not be desirable." + true) + (defn- clean [almost-query] - (loop [almost-query almost-query - stage-index 0] - (let [current-stage (nth (:stages almost-query) stage-index) - new-stage (clean-stage current-stage)] - (if (= current-stage new-stage) - (if (= stage-index (dec (count (:stages almost-query)))) - almost-query - (recur almost-query (inc stage-index))) - (recur (update almost-query :stages assoc stage-index new-stage) stage-index))))) + (if-not *clean-query* + almost-query + (loop [almost-query almost-query + stage-index 0] + (let [current-stage (nth (:stages almost-query) stage-index) + new-stage (clean-stage current-stage)] + (if (= current-stage new-stage) + (if (= stage-index (dec (count (:stages almost-query)))) + almost-query + (recur almost-query (inc stage-index))) + (recur (update almost-query :stages assoc stage-index new-stage) stage-index)))))) (defmulti ->pMBQL "Coerce something to pMBQL (the version of MBQL manipulated by Metabase Lib v2) if it's not already pMBQL." @@ -523,7 +530,7 @@ (defmethod ->legacy-MBQL :mbql/join [join] (let [base (cond-> (disqualify join) - (str/starts-with? (:alias join) legacy-default-join-alias) (dissoc :alias))] + (and *clean-query* (str/starts-with? (:alias join) legacy-default-join-alias)) (dissoc :alias))] (merge (-> base (dissoc :stages :conditions) (update-vals ->legacy-MBQL)) diff --git a/src/metabase/lib/metadata/jvm.clj b/src/metabase/lib/metadata/jvm.clj index 5d72fb866c321b229f4d5c164abba23a647e8bb9..147d324550b1f0d40d63d9866c1cf73aeda39e9f 100644 --- a/src/metabase/lib/metadata/jvm.clj +++ b/src/metabase/lib/metadata/jvm.clj @@ -139,7 +139,8 @@ [query-type model parsed-args honeysql] (merge (next-method query-type model parsed-args honeysql) - {:select [:field/base_type + {:select [:field/active + :field/base_type :field/coercion_strategy :field/database_type :field/description diff --git a/src/metabase/lib/remove_replace.cljc b/src/metabase/lib/remove_replace.cljc index 8853d0e60207aabbc2b6867ceb637e2660b59e36..1d25673996ff1e80efe2dec7edcbeea423bbb2a6 100644 --- a/src/metabase/lib/remove_replace.cljc +++ b/src/metabase/lib/remove_replace.cljc @@ -205,10 +205,10 @@ (some (fn [{:keys [lib/source lib/source-uuid] :as column}] (when (and (= :source/previous-stage source) (= target-uuid source-uuid)) (:lib/desired-column-alias column)))))] - (if target-ref-id + (cond-> query ;; We are moving to the next stage, so pass the current query as the unmodified-query-for-stage - (remove-local-references query stage-number query :field {} target-ref-id) - query)) + target-ref-id + (remove-local-references stage-number query :field {} target-ref-id))) query)) (defn- find-location diff --git a/src/metabase/lib/schema/metadata.cljc b/src/metabase/lib/schema/metadata.cljc index 5be304ff1cd20e70b1f479cd0839084572a10ed0..718b350f5064bdf815e1accf28617d696ca42d8f 100644 --- a/src/metabase/lib/schema/metadata.cljc +++ b/src/metabase/lib/schema/metadata.cljc @@ -150,6 +150,7 @@ [:effective-type {:optional true} [:maybe ::lib.schema.common/base-type]] ;; type of this column in the data warehouse, e.g. `TEXT` or `INTEGER` [:database-type {:optional true} [:maybe :string]] + [:active {:optional true} :boolean] ;; if this is a field from another table (implicit join), this is the field in the current table that should be ;; used to perform the implicit join. e.g. if current table is `VENUES` and this field is `CATEGORIES.ID`, then the ;; `fk_field_id` would be `VENUES.CATEGORY_ID`. In a `:field` reference this is saved in the options map as diff --git a/src/metabase/query_processor/middleware/remove_inactive_field_refs.clj b/src/metabase/query_processor/middleware/remove_inactive_field_refs.clj new file mode 100644 index 0000000000000000000000000000000000000000..b85fc08d2eef2174dda463a998241697f89f14ca --- /dev/null +++ b/src/metabase/query_processor/middleware/remove_inactive_field_refs.clj @@ -0,0 +1,132 @@ +(ns metabase.query-processor.middleware.remove-inactive-field-refs + "This middleware exists to let queries run even if some database columns have been removed in the data warehouse. + + Queries that don't depend on removed columns (other than showing them) should run and show the available data. + Queries that use removed fields otherwise, e.g., for filtering or summary will continue to fail. + + We only try to fix queries if we know a column has been removed. We recognize this during the next sync: deleted + columns are marked active = false." + (:require + [metabase.lib.equality :as lib.equality] + [metabase.lib.metadata :as lib.metadata] + [metabase.lib.schema :as lib.schema] + [metabase.lib.util :as lib.util] + [metabase.lib.walk :as lib.walk] + [metabase.util :as u] + [metabase.util.malli :as mu])) + +(defn- collect-fields-clauses + [query] + (let [clauses (volatile! (transient {})) + visitor (fn [_query _path-type path stage-or-join] + (let [fields (:fields stage-or-join)] + (when (and (seqable? fields) (seq fields)) + (vswap! clauses assoc! path fields)) + nil))] + (lib.walk/walk query visitor) + (persistent! @clauses))) + +(defn- next-path + [query stage-path] + (let [type-index (- (count stage-path) 2) + parent-stage-path (subvec stage-path 0 type-index) + next-stage-path (update stage-path (dec (count stage-path)) inc)] + (cond + (= (get stage-path type-index) :joins) + ;; the stage this join is in + parent-stage-path + + (some? (get-in query next-stage-path)) + next-stage-path + + (pos? type-index) + ;; the join this stage is in + parent-stage-path))) + +(defn- source-metadata->stage-metadata + [source-metadata-column] + (-> source-metadata-column + (update-keys u/->kebab-case-en) + (assoc :lib/type :metadata/column))) + +(defn- column-metadata + [query stage-path] + (or (not-empty (get-in query (into stage-path [:lib/stage-metadata :columns]))) + (not-empty (into [] (map source-metadata->stage-metadata) (get-in query (conj stage-path :source-metadata)))) + (when (> (count stage-path) 2) + (column-metadata query (subvec stage-path 0 (- (count stage-path) 2)))))) + +(defn- resolve-refs + [columns removed-field-refs default-alias] + (let [columns-with-deafult-alias (delay (into [] (map #(assoc % :source-alias default-alias)) columns))] + (mapv #(or (lib.equality/find-matching-column % columns) + (when default-alias + (lib.equality/find-matching-column % @columns-with-deafult-alias))) + removed-field-refs))) + +(defn- propagate-removal + [query stage-path removed-field-refs] + (if-let [next-stage-path (next-path query stage-path)] + (if-not (-> query (get-in next-stage-path) :fields) + (recur query next-stage-path removed-field-refs) + (let [columns (column-metadata query stage-path) + removed-columns (when (seq columns) + (resolve-refs columns removed-field-refs (:alias (get-in query stage-path)))) + next-fields-path (conj next-stage-path :fields) + next-stage-fields (get-in query next-fields-path) + removed-field-refs (when (seq next-stage-fields) + (into #{} + (keep #(lib.equality/find-matching-ref % next-stage-fields)) + removed-columns))] + (if-not (seq removed-field-refs) + query + (-> query + (assoc-in next-fields-path (into [] (remove removed-field-refs) next-stage-fields)) + (recur next-stage-path removed-field-refs))))) + query)) + +(defn- filter-fields-clause + [query stage-path fields active-field-ids] + (let [removed-field-refs (into #{} + (filter (fn [field] + (and (lib.util/field-clause? field) + (let [id (get field 2)] + (and (integer? id) + (not (active-field-ids id))))))) + fields)] + (if-not (seq removed-field-refs) + query + (-> query + (assoc-in (conj stage-path :fields) (into [] (remove removed-field-refs) fields)) + (propagate-removal stage-path removed-field-refs))))) + +(defn- keep-active-fields + [query fields-clauses active-field-ids] + (reduce-kv #(filter-fields-clause %1 %2 %3 active-field-ids) query fields-clauses)) + +(mu/defn remove-inactive-field-refs :- ::lib.schema/query + "Remove any references to fields that are not active. + This might result in a broken query, but the original query would break at run time too because of the + references to columns that do not exist in the database. + This middleware can fix queries that contain references that are not used other than being returned. + + This function should be called after the point where the implicit :fields clauses are added to the query. + We determine which direct database field references are referencing active fields and remove the others. + Then we recursively remove references to the removed columns." + [query :- ::lib.schema/query] + (let [fields-clauses (collect-fields-clauses query) + field-ids (into #{} + (comp cat + (filter lib.util/field-clause?) + (map #(get % 2)) + (filter integer?)) + (vals fields-clauses)) + active-field-ids (if (seq field-ids) + (into #{} + (comp (filter :active) + (map :id)) + (lib.metadata/bulk-metadata query :metadata/column field-ids)) + #{})] + (cond-> query + (not= field-ids active-field-ids) + (keep-active-fields fields-clauses active-field-ids)))) diff --git a/src/metabase/query_processor/preprocess.clj b/src/metabase/query_processor/preprocess.clj index 04b656c520d9bbf56e433ca712de307589d76558..fd26c6aaebe1b138a79195d6f643fd1a9eac4364 100644 --- a/src/metabase/query_processor/preprocess.clj +++ b/src/metabase/query_processor/preprocess.clj @@ -33,6 +33,7 @@ [metabase.query-processor.middleware.persistence :as qp.persistence] [metabase.query-processor.middleware.pre-alias-aggregations :as qp.pre-alias-aggregations] [metabase.query-processor.middleware.reconcile-breakout-and-order-by-bucketing :as reconcile-bucketing] + [metabase.query-processor.middleware.remove-inactive-field-refs :as qp.remove-inactive-field-refs] [metabase.query-processor.middleware.resolve-fields :as qp.resolve-fields] [metabase.query-processor.middleware.resolve-joined-fields :as resolve-joined-fields] [metabase.query-processor.middleware.resolve-joins :as resolve-joins] @@ -72,6 +73,29 @@ assoc :converted-form query))) (with-meta (meta middleware-fn)))) +(def ^:private unconverted-property? + (some-fn #{:info} qualified-keyword?)) + +(defn- copy-unconverted-properties + [to from] + (reduce-kv (fn [m k v] + (cond-> m + (unconverted-property? k) (assoc k v))) + to + from)) + +(defn- ensure-pmbql-for-unclean-query + [middleware-fn] + (-> (fn [query] + (mu/disable-enforcement + (binding [lib.convert/*clean-query* false] + (let [query' (-> (cond->> query + (not (:lib/type query)) + (lib.query/query (qp.store/metadata-provider))) + (copy-unconverted-properties query))] + (-> query' middleware-fn ->legacy))))) + (with-meta (meta middleware-fn)))) + (def ^:private middleware "Pre-processing middleware. Has the form @@ -104,6 +128,7 @@ (ensure-legacy #'resolve-joined-fields/resolve-joined-fields) (ensure-legacy #'fix-bad-refs/fix-bad-references) (ensure-legacy #'escape-join-aliases/escape-join-aliases) + (ensure-pmbql-for-unclean-query #'qp.remove-inactive-field-refs/remove-inactive-field-refs) ;; yes, this is called a second time, because we need to handle any joins that got added (ensure-legacy #'qp.middleware.enterprise/apply-sandboxing) (ensure-legacy #'qp.cumulative-aggregations/rewrite-cumulative-aggregations) diff --git a/test/metabase/lib/convert_test.cljc b/test/metabase/lib/convert_test.cljc index 1e253f206e7c0fc441e9f9fbfc156dcdaccec80c..c5b68468ebc99c47fca942d2f8fa445e941b0955 100644 --- a/test/metabase/lib/convert_test.cljc +++ b/test/metabase/lib/convert_test.cljc @@ -476,6 +476,123 @@ :source-table 1} :type :query})) +(deftest ^:parallel unclean-stage-round-trip-test + (binding [lib.convert/*clean-query* false] + (doseq [query + [{:database 7 + :type :query + :query {:joins [{:alias "__join" + :strategy :left-join + :condition [:= [:field 388 nil] 1] + :source-table 44}] + :source-table 43 + :fields [[:field 390 nil] + [:field 391 nil] + [:field 388 nil] + [:field 392 nil] + [:field 393 nil] + [:field 389 nil]]}} + {:database 7 + :qp/source-card-id 1 + :info {:card-id 1} + :type :query + :query {:limit 2 + :fields [[:field 350 {:base-type :type/Text :join-alias "Card 2 - Category"}] + [:field "count" {:base-type :type/Integer}] + [:field 350 {:join-alias "Card 2 - Category"}]] + :joins [{:fields [[:field 350 {:join-alias "Card 2 - Category"}]] + :source-metadata [{:semantic_type :type/Category + :table_id 45 + :name "CATEGORY" + :field_ref [:field 350 {:base-type :type/Text}] + :effective_type :type/Text + :id 350 + :display_name "Category" + :fingerprint {:global {:distinct-count 4 + :nil% 0} + :type {:type/Text {:percent-json 0 + :percent-url 0 + :percent-email 0 + :percent-state 0 + :average-length 6.375}}} + :base_type :type/Text}] + :alias "Card 2 - Category" + :strategy :left-join + :source-query/model? false + :qp/stage-had-source-card 2 + :condition [:= + [:field "Products__CATEGORY" {:base-type :type/Text}] + [:field 350 {:base-type :type/Text, :join-alias "Card 2 - Category"}]] + :source-query {:source-table 45 + :breakout [[:field 350 {:base-type :type/Text}]] + :qp/stage-is-from-source-card 2 + :order-by [[:asc [:field 350 {:base-type :type/Text}]]]}}] + :source-query {:qp/stage-had-source-card 1 + :source-query/model? false + :fields [[:field 350 {:base-type :type/Text, :join-alias "Products"}] + [:field "count" {:base-type :type/Integer}]] + :source-query {:source-table 42 + :breakout [[:field 350 {:base-type :type/Text, :join-alias "Products"}]] + :aggregation [[:count]] + :qp/stage-is-from-source-card 1 + :order-by [[:asc [:field 350 {:base-type :type/Text, :join-alias "Products"}]]] + :joins [{:alias "Products" + :strategy :left-join + :condition [:= + [:field 382 {:base-type :type/Integer}] + [:field 351 {:base-type :type/BigInteger + :join-alias "Products"}]] + :source-table 45} + {:alias "People - User" + :strategy :left-join + :condition [:= + [:field 381 {:base-type :type/Integer}] + [:field 370 {:base-type :type/BigInteger + :join-alias "People - User"}]] + :source-table 40}]} + :source-metadata [{:semantic_type :type/Category + :table_id 45 + :name "CATEGORY" + :field_ref [:field 350 {:base-type :type/Text, :join-alias "Products"}] + :effective_type :type/Text + :id 350 + :display_name "Products → Category" + :fingerprint {:global {:distinct-count 4, :nil% 0} + :type {:type/Text {:percent-json 0 + :percent-url 0 + :percent-email 0 + :percent-state 0 + :average-length 6.375}}} + :base_type :type/Text + :source_alias "Products"} + {:name "count" + :display_name "Count" + :base_type :type/Integer + :semantic_type :type/Quantity + :field_ref [:aggregation 0]}]} + :source-metadata [{:semantic_type :type/Category + :table_id 45 + :name "CATEGORY" + :field_ref [:field 350 {:base-type :type/Text + :join-alias "Card 2 - Category"}] + :effective_type :type/Text + :id 350 + :display_name "Products → Category" + :fingerprint {:global {:distinct-count 4, :nil% 0} + :type {:type/Text {:percent-json 0 + :percent-url 0 + :percent-email 0 + :percent-state 0 + :average-length 6.375}}} + :base_type :type/Text + :source_alias "Products"} + {:name "count" + :display_name "Count" + :base_type :type/Integer + :semantic_type :type/Quantity + :field_ref [:field "count" {:base-type :type/Integer}]}]}}]] + (test-round-trip query)))) + (deftest ^:parallel round-trip-options-test (testing "Round-tripping (p)MBQL caluses with options (#30280)" (testing "starting with pMBQL" diff --git a/test/metabase/query_processor/middleware/remove_inactive_field_refs_test.clj b/test/metabase/query_processor/middleware/remove_inactive_field_refs_test.clj new file mode 100644 index 0000000000000000000000000000000000000000..c77f4c548e15003af75248e6af837c1336a1fa1e --- /dev/null +++ b/test/metabase/query_processor/middleware/remove_inactive_field_refs_test.clj @@ -0,0 +1,155 @@ +(ns ^:mb/once metabase.query-processor.middleware.remove-inactive-field-refs-test + (:require + [clojure.test :refer :all] + [metabase.lib.metadata.jvm :as lib.metadata.jvm] + [metabase.query-processor :as qp] + [metabase.query-processor.store :as qp.store] + [metabase.test :as mt] + [metabase.util :as u] + [toucan2.core :as t2])) + +(deftest ^:synchronized deleted-columns-test + ;; It doesn't really matter which DB we test with. The test uses H2 column names. + (qp.store/with-metadata-provider (lib.metadata.jvm/application-database-metadata-provider (mt/id)) + (mt/with-temp [:model/Card card0 {:dataset_query + (mt/mbql-query orders + {:joins [{:source-table $$products + :alias "Product" + :condition + [:= $orders.product_id + [:field %products.id {:join-alias "Product"}]] + :fields :all}]})} + :model/Card card1 {:dataset_query + (mt/mbql-query orders + {:fields [$id $subtotal $tax $total $created_at $quantity] + :joins [{:source-table $$products + :alias "Product" + :condition + [:= $orders.product_id + [:field %products.id {:join-alias "Product"}]] + :fields :all}]})} + :model/Card card2 {:dataset_query + (mt/mbql-query orders + {:fields [$id $subtotal $tax $total $created_at $quantity] + :joins [{:source-table $$products + :alias "Product" + :condition + [:= $orders.product_id + [:field %products.id {:join-alias "Product"}]] + :fields + [[:field %products.id {:join-alias "Product"}] + [:field %products.title {:join-alias "Product"}] + [:field %products.vendor {:join-alias "Product"}] + [:field %products.price {:join-alias "Product"}] + [:field %products.rating {:join-alias "Product"}]]}]})} + :model/Card card3 {:dataset_query + (mt/mbql-query orders + {:source-table (str "card__" (u/the-id card2)) + :fields [[:field "ID" {:base-type :type/BigInteger}] + [:field "TAX" {:base-type :type/Float}] + [:field "TOTAL" {:base-type :type/Float}] + [:field "ID_2" {:base-type :type/BigInteger}] + [:field "RATING" {:base-type :type/Float}]] + :filter [:> [:field "TOTAL" {:base-type :type/Float}] 3]})}] + (let [summary-query (mt/mbql-query orders + {:source-table (str "card__" (u/the-id card3)) + :aggregation [[:sum [:field "TOTAL" {:base-type :type/Float}]]] + :breakout [[:field "RATING" {:base-type :type/Float}]]}) + join-query (mt/mbql-query orders + {:source-table (mt/id :products) + :joins [{:source-table (str "card__" (u/the-id card2)) + :alias "Card" + :condition + [:= $products.id + [:field "ID_2" {:join-alias "Card" + :base-type :type/BigInteger}]] + :fields + [[:field "ID_2" {:join-alias "Card" + :base-type :type/BigInteger}] + [:field "TOTAL" {:join-alias "Card" + :base-type :type/Float}] + [:field "TAX" {:join-alias "Card" + :base-type :type/Float}] + [:field "VENDOR" {:join-alias "Card" + :base-type :type/Text}]]}]})] + ;; running these questions before fields get removed from the database + (testing "Behavior before the deletion (if this changes, the other cases have to change accordingly)" + (doseq [[card fields] {card0 ["ID" "USER_ID" "PRODUCT_ID" "SUBTOTAL" "TAX" "TOTAL" "DISCOUNT" + "CREATED_AT" "QUANTITY" + "ID_2" "EAN" "TITLE" "CATEGORY" "VENDOR" "PRICE" + "RATING" "CREATED_AT_2"] + card1 ["ID" "SUBTOTAL" "TAX" "TOTAL" "CREATED_AT" "QUANTITY" + "ID_2" "EAN" "TITLE" "CATEGORY" "VENDOR" "PRICE" + "RATING" "CREATED_AT_2"] + card2 ["ID" "SUBTOTAL" "TAX" "TOTAL" "CREATED_AT" "QUANTITY" + "ID_2" "TITLE" "VENDOR" "PRICE" "RATING"] + card3 ["ID" "TAX" "TOTAL" "ID_2" "RATING"]}] + (let [query (mt/mbql-query orders + {:source-table (str "card__" (u/the-id card))})] + (let [results (qp/process-query query)] + (is (=? fields + (map :name (mt/cols results))))))) + (is (= ["Product → Rating" "Sum of Total"] + (->> (mt/process-query summary-query) + mt/cols + (map :display_name)))) + (is (= ["ID" "Ean" "Title" "Category" "Vendor" "Price" "Rating" "Created At" + "Card → ID 2" "Card → Total" "Card → Tax" "Card → Vendor"] + (->> (mt/process-query join-query) + mt/cols + (map :display_name))))) + + ;; simulate the deletion of some fields and sync marking them inactive + (let [inactive-ids [(mt/id :orders :tax) (mt/id :products :ean) (mt/id :products :vendor)]] + (t2/update! :model/Field :id [:in inactive-ids] {:active false}) + + ;; running the actual tests + (try + (let [mp (lib.metadata.jvm/application-database-metadata-provider (mt/id))] + (binding [qp.store/*TESTS-ONLY-allow-replacing-metadata-provider* true] + (qp.store/with-metadata-provider mp + ;; running these questions after fields have been removed from the database + ;; and the change has been detected by syncing + (testing "Questions return the same columns except the ones deleted" + (doseq [[card fields] {card0 ["ID" "USER_ID" "PRODUCT_ID" "SUBTOTAL" "TOTAL" "DISCOUNT" + "CREATED_AT" "QUANTITY" + "ID_2" "TITLE" "CATEGORY" "PRICE" + "RATING" "CREATED_AT_2"] + card1 ["ID" "SUBTOTAL" "TOTAL" "CREATED_AT" "QUANTITY" + "ID_2" "TITLE" "CATEGORY" "PRICE" + "RATING" "CREATED_AT_2"] + card2 ["ID" "SUBTOTAL" "TOTAL" "CREATED_AT" "QUANTITY" + "ID_2" "TITLE" "PRICE" "RATING"] + card3 ["ID" "TOTAL" "ID_2" "RATING"]}] + (let [query (mt/mbql-query orders + {:source-table (str "card__" (u/the-id card))})] + (let [results (qp/process-query query)] + (is (=? fields + (map :name (mt/cols results)))))))) + (testing "Active columns can be used" + (is (= ["Product → Rating" "Sum of Total"] + (->> (mt/run-mbql-query orders + {:source-table (str "card__" (u/the-id card2)) + :aggregation [[:sum [:field "TOTAL" {:base-type :type/Float}]]] + :breakout [[:field "RATING" {:base-type :type/Integer}]]}) + mt/cols + (map :display_name))))) + (testing "Using deleted columns results in an error" + (is (thrown? clojure.lang.ExceptionInfo + (mt/run-mbql-query orders + {:source-table (str "card__" (u/the-id card2)) + :aggregation [[:sum [:field "TAX" {:base-type :type/Float}]]] + :breakout [[:field "RATING" {:base-type :type/Integer}]]})))) + (testing "Additional level of nesting is OK" + (is (= ["Product → Rating" "Sum of Total"] + (->> (mt/process-query summary-query) + mt/cols + (map :display_name)))) + (testing "in joins too" + (is (= ["ID" "Title" "Category" "Price" "Rating" "Created At" + "Card → ID 2" "Card → Total"] + (->> (qp/process-query join-query) + mt/cols + (map :display_name))))))))) + (finally + (t2/update! :model/Field :id [:in inactive-ids] {:active true}))))))))