From 1a521c41e422b450a3dad3f517b6246cd5056203 Mon Sep 17 00:00:00 2001 From: Cam Saul <1455846+camsaul@users.noreply.github.com> Date: Sun, 13 Feb 2022 12:30:00 -0800 Subject: [PATCH] Fix source aliases for Fields from joins not being properly escaped (#20474) * Fix source aliases for Fields from joins not being properly escaped * Tweak TODO comment * Disable the flaky tests when running driver tests * Rename the new macro * Dox * Remove unused :require --- .../query_processor/util/add_alias_info.clj | 12 ++- test/metabase/api/session_test.clj | 36 ++++--- .../util/add_alias_info_test.clj | 100 +++++++++++++++++- .../explicit_joins_test.clj | 49 ++++++++- test/metabase/test.clj | 10 ++ 5 files changed, 187 insertions(+), 20 deletions(-) diff --git a/src/metabase/query_processor/util/add_alias_info.clj b/src/metabase/query_processor/util/add_alias_info.clj index c904439aff6..745fdfc6acf 100644 --- a/src/metabase/query_processor/util/add_alias_info.clj +++ b/src/metabase/query_processor/util/add_alias_info.clj @@ -56,7 +56,7 @@ "Generate a field alias by applying `prefix` to `field-alias`. This is used for automatically-generated aliases for columns that are the result of joins." [prefix field-alias] - (str prefix "__" field-alias)) + (driver/escape-alias driver/*driver* (str prefix "__" field-alias))) (defn- make-unique-alias-fn "Creates a function with the signature @@ -262,6 +262,16 @@ [_ _id-or-name {:keys [join-alias]}, :as _field-clause] {:keys [field-name join-is-this-level? alias-from-join alias-from-source-query]}] (cond + ;; TODO -- this just recalculates the info instead of actually finding the Field in the join and getting its desired + ;; alias there... this seems like a clear bug since it doesn't go thru the uniquify logic. Something will + ;; potentially break by doing this. I haven't been able to reproduce it yet however. + ;; + ;; This will only be triggered if the join somehow exposes duplicate columns or columns that have the same escaped + ;; name after going thru [[driver/escape-alias]]. I think the only way this could happen is if we escape them + ;; aggressively but the escape logic produces duplicate columns (i.e., there is overlap between the unique hashes we + ;; suffix to escaped identifiers.) + ;; + ;; We'll have to look into this more in the future. For now, it seems to work for everything we try it with. (and join-alias (not join-is-this-level?)) (prefix-field-alias join-alias field-name) (and join-is-this-level? alias-from-join) alias-from-join alias-from-source-query alias-from-source-query diff --git a/test/metabase/api/session_test.clj b/test/metabase/api/session_test.clj index 1b3f013b9db..102ccd9356a 100644 --- a/test/metabase/api/session_test.clj +++ b/test/metabase/api/session_test.clj @@ -140,23 +140,25 @@ (deftest failure-threshold-throttling-test (testing "Test that source based throttling kicks in after the login failure threshold (50) has been reached" - (with-redefs [session-api/login-throttlers (cleaned-throttlers #'session-api/login-throttlers - [:username :ip-address]) - public-settings/source-address-header (constantly "x-forwarded-for")] - (dotimes [n 50] - (let [response (send-login-request (format "user-%d" n) - {"x-forwarded-for" "10.1.2.3"}) - status-code (:status response)] - (assert (= status-code 401) (str "Unexpected response status code:" status-code)))) - (let [error (fn [] - (-> (send-login-request "last-user" {"x-forwarded-for" "10.1.2.3"}) - :body - json/parse-string - (get-in ["errors" "username"])))] - (is (re= #"^Too many attempts! You must wait \d+ seconds before trying again\.$" - (error))) - (is (re= #"^Too many attempts! You must wait \d+ seconds before trying again\.$" - (error))))))) + ;; disable this when we're testing drivers since it tends to F L A K E. + (mt/disable-flaky-test-when-running-driver-tests-in-ci + (with-redefs [session-api/login-throttlers (cleaned-throttlers #'session-api/login-throttlers + [:username :ip-address]) + public-settings/source-address-header (constantly "x-forwarded-for")] + (dotimes [n 50] + (let [response (send-login-request (format "user-%d" n) + {"x-forwarded-for" "10.1.2.3"}) + status-code (:status response)] + (assert (= status-code 401) (str "Unexpected response status code:" status-code)))) + (let [error (fn [] + (-> (send-login-request "last-user" {"x-forwarded-for" "10.1.2.3"}) + :body + json/parse-string + (get-in ["errors" "username"])))] + (is (re= #"^Too many attempts! You must wait \d+ seconds before trying again\.$" + (error))) + (is (re= #"^Too many attempts! You must wait \d+ seconds before trying again\.$" + (error)))))))) (deftest failure-threshold-per-request-source (testing "The same as above, but ensure that throttling is done on a per request source basis." diff --git a/test/metabase/query_processor/util/add_alias_info_test.clj b/test/metabase/query_processor/util/add_alias_info_test.clj index 09d94fe25de..f4cdb1318bd 100644 --- a/test/metabase/query_processor/util/add_alias_info_test.clj +++ b/test/metabase/query_processor/util/add_alias_info_test.clj @@ -1,5 +1,6 @@ (ns metabase.query-processor.util.add-alias-info-test - (:require [clojure.test :refer :all] + (:require [clojure.string :as str] + [clojure.test :refer :all] [clojure.walk :as walk] [metabase.driver :as driver] [metabase.driver.h2 :as h2] @@ -416,6 +417,103 @@ add-alias-info :query))))))) +(driver/register! ::custom-escape-spaces-to-underscores :parent :h2) + +(defmethod driver/escape-alias ::custom-escape-spaces-to-underscores + [driver field-alias] + (-> ((get-method driver/escape-alias :h2) driver field-alias) + (str/replace #"\s" "_"))) + +(deftest use-correct-alias-for-joined-field-test + (testing "Make sure we call `driver/escape-alias` for the `:source-alias` for Fields coming from joins (#20413)" + (mt/dataset sample-dataset + (let [db (mt/db)] + (driver/with-driver ::custom-escape-spaces-to-underscores + (mt/with-db db + (is (query= (mt/$ids nil + {:source-query {:source-table $$orders + :joins [{:source-table $$products + :alias "Products Renamed" + :condition + [:= + [:field + %orders.product_id + {::add/source-alias "PRODUCT_ID" + ::add/source-table $$orders}] + [:field + %products.id + {::add/desired-alias "Products_Renamed__ID" + ::add/position 0 + ::add/source-alias "ID" + ::add/source-table "Products Renamed" + :join-alias "Products Renamed"}]] + :fields + [[:field + %products.id + {::add/desired-alias "Products_Renamed__ID" + ::add/position 0 + ::add/source-alias "ID" + ::add/source-table "Products Renamed" + :join-alias "Products Renamed"}]] + :strategy :left-join}] + :expressions {"CC" [:+ 1 1]} + :fields + [[:field + %products.id + {::add/desired-alias "Products_Renamed__ID" + ::add/position 0 + ::add/source-alias "ID" + ::add/source-table "Products Renamed" + :join-alias "Products Renamed"}] + [:expression "CC" {::add/desired-alias "CC", ::add/position 1}]] + :filter + [:= + [:field + %products.category + {::add/source-alias "CATEGORY" + ::add/source-table "Products Renamed" + :join-alias "Products Renamed"}] + [:value + "Doohickey" + {:base_type :type/Text + :coercion_strategy nil + :database_type "VARCHAR" + :effective_type :type/Text + :name "CATEGORY" + :semantic_type :type/Category}]]} + :fields [[:field + %products.id + {::add/desired-alias "Products_Renamed__ID" + ::add/position 0 + ::add/source-alias "Products_Renamed__ID" + ::add/source-table ::add/source + :join-alias "Products Renamed"}] + [:field + "CC" + {::add/desired-alias "CC" + ::add/position 1 + ::add/source-alias "CC" + ::add/source-table ::add/source + :base-type :type/Float}]] + :limit 1}) + (-> (mt/mbql-query orders + {:source-query {:source-table $$orders + :joins [{:source-table $$products + :alias "Products Renamed" + :condition [:= + $product_id + [:field %products.id {:join-alias "Products Renamed"}]] + :fields [[:field %products.id {:join-alias "Products Renamed"}]]}] + :expressions {"CC" [:+ 1 1]} + :fields [[:field %products.id {:join-alias "Products Renamed"}] + [:expression "CC"]] + :filter [:= + [:field %products.category {:join-alias "Products Renamed"}] + "Doohickey"]} + :limit 1}) + add-alias-info + :query))))))))) + (deftest use-source-unique-aliases-test (testing "Make sure uniquified aliases in the source query end up getting used for `::add/source-alias`" ;; keep track of the IDs so we don't accidentally fetch the wrong ones after we switch the name of `price` diff --git a/test/metabase/query_processor_test/explicit_joins_test.clj b/test/metabase/query_processor_test/explicit_joins_test.clj index e1c7221b6fb..eeccddf2837 100644 --- a/test/metabase/query_processor_test/explicit_joins_test.clj +++ b/test/metabase/query_processor_test/explicit_joins_test.clj @@ -690,7 +690,7 @@ (qp/process-query query)))))))))) (deftest join-against-multiple-saved-questions-with-same-column-test - (testing "Should be able to join multiple against saved questions on the same column (#15863, #20362, #20413)" + (testing "Should be able to join multiple against saved questions on the same column (#15863, #20362)" (mt/test-drivers (mt/normal-drivers-with-feature :nested-queries :left-join) (mt/dataset sample-dataset (let [q1 (mt/mbql-query products {:breakout [$category], :aggregation [[:count]]}) @@ -729,3 +729,50 @@ ["Gizmo" 51 "Gizmo" 2834.88 "Gizmo" 3.64] ["Widget" 54 "Widget" 3109.31 "Widget" 3.15]] (mt/formatted-rows [str int str 2.0 str 2.0] results)))))))))))) + +(deftest use-correct-source-alias-for-fields-from-joins-test + (testing "Make sure we use the correct escaped alias for a Fields coming from joins (#20413)" + (mt/test-drivers (mt/normal-drivers-with-feature :nested-queries :left-join) + (mt/dataset sample-dataset + (let [query (mt/mbql-query orders + {:joins [{:source-table $$products + :alias "Products Renamed" + :condition [:= + $product_id + [:field %products.id {:join-alias "Products Renamed"}]] + :fields :all}] + :expressions {"CC" [:+ 1 1]} + :filter [:= + [:field %products.category {:join-alias "Products Renamed"}] + "Doohickey"] + :order-by [[:asc $id]] + :limit 2})] + (mt/with-native-query-testing-context query + (let [results (qp/process-query query)] + (when (#{:h2 :postgres} driver/*driver*) + (is (= ["ID" + "User ID" + "Product ID" + "Subtotal" + "Tax" + "Total" + "Discount" + "Created At" + "Quantity" + "CC" + "Products Renamed → ID" + "Products Renamed → Ean" + "Products Renamed → Title" + "Products Renamed → Category" + "Products Renamed → Vendor" + "Products Renamed → Price" + "Products Renamed → Rating" + "Products Renamed → Created At"] + (map :display_name (get-in results [:data :results_metadata :columns]))))) + (is (= [[6 1 60 29.8 1.64 31.44 nil "2019-11-06T16:38:50.134Z" 3 2 + 60 "4819782507258" "Rustic Paper Car" "Doohickey" "Stroman-Carroll" 19.87 4.1 "2017-12-16T11:14:43.264Z"] + [10 1 6 97.44 5.36 102.8 nil "2020-01-17T01:44:37.233Z" 2 2 + 6 "2293343551454" "Small Marble Hat" "Doohickey" "Nolan-Wolff" 64.96 3.8 "2017-03-29T05:43:40.15Z"]] + (mt/formatted-rows [int int int 2.0 2.0 2.0 2.0 str int int + int str str str str 2.0 2.0 str] + results)))))))))) diff --git a/test/metabase/test.clj b/test/metabase/test.clj index bd9ce1a2b14..97cf3256c0b 100644 --- a/test/metabase/test.clj +++ b/test/metabase/test.clj @@ -375,3 +375,13 @@ :let [~argv args#]] (is ~expr (str (are+-message '~expr '~argv args#))))) + +(defmacro disable-flaky-test-when-running-driver-tests-in-ci + "Only run `body` when we're not running driver tests in CI (i.e., `DRIVERS` and `CI` are both not set). Perfect for + disabling those damn flaky tests that cause CI to fail all the time. You should obviously only do this for things + that have nothing to do with drivers but tend to flake anyway." + {:style/indent 0} + [& body] + `(when (and (not (seq (env/env :drivers))) + (not (seq (env/env :ci)))) + ~@body)) -- GitLab