From a27c151078aae2b7d4d98d55c3f7856db9911bf7 Mon Sep 17 00:00:00 2001 From: adam-james <21064735+adam-james-v@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:52:09 -0700 Subject: [PATCH] Sanitize SVG before parsing with Batik to prevent render failures when invalid characters are in the SVG content (#44516) * Add Temporal Units List to Dashboard Parameters Schema Closes: #44361 This adds an entry to the Dashboard Parameter schema so that we can validate the list of temporal_units that might be passed from the frontend when adding or updating the `Unit of Time` type parameters. * Sanitize the SVG before parsing it with Batik WIP Fixes: #43677 There are characters that are invalid according to the Batik XML Parser. This sanitizes the svg string to strip out some of those characters so that the render can continue. * Use the xml 1.0 spec allowed unicode chars list for the regex * Remove accidentally merged code * still miseed a line --- src/metabase/pulse/render/js_svg.clj | 19 +++++++++++++++++-- test/metabase/pulse/render/js_svg_test.clj | 5 +++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/metabase/pulse/render/js_svg.clj b/src/metabase/pulse/render/js_svg.clj index 94ea37ac7ce..fcbae475efb 100644 --- a/src/metabase/pulse/render/js_svg.clj +++ b/src/metabase/pulse/render/js_svg.clj @@ -5,6 +5,7 @@ `toJSMap` functions to turn Clojure's normal datastructures into js native structures." (:require [cheshire.core :as json] + [clojure.string :as str] [metabase.config :as config] [metabase.public-settings :as public-settings] [metabase.pulse.render.js-engine :as js] @@ -88,9 +89,23 @@ (.setTextContent "")) node))) +(defn- sanitize-svg + "Using a regex of negated allowed characters according to the XML 1.0 spec, replace disallowed characters with an empty string." + [svg-string] + (let [allowed-chars (re-pattern (str "[^" + "\u0009" + "\u000A" + "\u000D" + "\u0020-\uD7FF" + "\uE000-\uFFFD" + "\u10000-\u10FFFF" + "]"))] + (str/replace svg-string allowed-chars ""))) + (defn- parse-svg-string [^String s] - (let [factory (SAXSVGDocumentFactory. "org.apache.xerces.parsers.SAXParser")] - (with-open [is (ByteArrayInputStream. (.getBytes s StandardCharsets/UTF_8))] + (let [s (sanitize-svg s) + factory (SAXSVGDocumentFactory. "org.apache.xerces.parsers.SAXParser")] + (with-open [is (ByteArrayInputStream. (.getBytes ^String s StandardCharsets/UTF_8))] (.createDocument factory "file:///fake.svg" is)))) (def ^:dynamic ^:private *svg-render-width* diff --git a/test/metabase/pulse/render/js_svg_test.clj b/test/metabase/pulse/render/js_svg_test.clj index 71d23ced318..44a34f8d8b5 100644 --- a/test/metabase/pulse/render/js_svg_test.clj +++ b/test/metabase/pulse/render/js_svg_test.clj @@ -104,3 +104,8 @@ (json/generate-string settings) (json/generate-string {})))] (validate-svg-string :progress svg-string)))) + +(deftest parse-svg-sanitizes-characters-test + (testing "Characters discouraged or not permitted by the xml 1.0 specification are removed. (#" + (#'js-svg/parse-svg-string + "<svg xmlns=\"http://www.w3.org/2000/svg\">\u001F</svg>"))) -- GitLab