From a27c151078aae2b7d4d98d55c3f7856db9911bf7 Mon Sep 17 00:00:00 2001
From: adam-james <21064735+adam-james-v@users.noreply.github.com>
Date: Mon, 24 Jun 2024 14:52:09 -0700
Subject: [PATCH] Sanitize SVG before parsing with Batik to prevent render
 failures when invalid characters are in the SVG content (#44516)

* Add Temporal Units List to Dashboard Parameters Schema

Closes: #44361

This adds an entry to the Dashboard Parameter schema so that we can validate
the list of temporal_units that might be passed from the frontend when adding
or updating the `Unit of Time` type parameters.

* Sanitize the SVG before parsing it with Batik

WIP

Fixes: #43677

There are characters that are invalid according to the Batik XML Parser. This sanitizes the svg string to strip out
some of those characters so that the render can continue.

* Use the xml 1.0 spec allowed unicode chars list for the regex

* Remove accidentally merged code

* still miseed a line
---
 src/metabase/pulse/render/js_svg.clj       | 19 +++++++++++++++++--
 test/metabase/pulse/render/js_svg_test.clj |  5 +++++
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/metabase/pulse/render/js_svg.clj b/src/metabase/pulse/render/js_svg.clj
index 94ea37ac7ce..fcbae475efb 100644
--- a/src/metabase/pulse/render/js_svg.clj
+++ b/src/metabase/pulse/render/js_svg.clj
@@ -5,6 +5,7 @@
   `toJSMap` functions to turn Clojure's normal datastructures into js native structures."
   (:require
    [cheshire.core :as json]
+   [clojure.string :as str]
    [metabase.config :as config]
    [metabase.public-settings :as public-settings]
    [metabase.pulse.render.js-engine :as js]
@@ -88,9 +89,23 @@
         (.setTextContent ""))
       node)))
 
+(defn- sanitize-svg
+  "Using a regex of negated allowed characters according to the XML 1.0 spec, replace disallowed characters with an empty string."
+  [svg-string]
+  (let [allowed-chars (re-pattern (str "[^"
+                                       "\u0009"
+                                       "\u000A"
+                                       "\u000D"
+                                       "\u0020-\uD7FF"
+                                       "\uE000-\uFFFD"
+                                       "\u10000-\u10FFFF"
+                                       "]"))]
+    (str/replace svg-string allowed-chars "")))
+
 (defn- parse-svg-string [^String s]
-  (let [factory (SAXSVGDocumentFactory. "org.apache.xerces.parsers.SAXParser")]
-    (with-open [is (ByteArrayInputStream. (.getBytes s StandardCharsets/UTF_8))]
+  (let [s (sanitize-svg s)
+        factory (SAXSVGDocumentFactory. "org.apache.xerces.parsers.SAXParser")]
+    (with-open [is (ByteArrayInputStream. (.getBytes ^String s StandardCharsets/UTF_8))]
       (.createDocument factory "file:///fake.svg" is))))
 
 (def ^:dynamic ^:private *svg-render-width*
diff --git a/test/metabase/pulse/render/js_svg_test.clj b/test/metabase/pulse/render/js_svg_test.clj
index 71d23ced318..44a34f8d8b5 100644
--- a/test/metabase/pulse/render/js_svg_test.clj
+++ b/test/metabase/pulse/render/js_svg_test.clj
@@ -104,3 +104,8 @@
                                   (json/generate-string settings)
                                   (json/generate-string {})))]
       (validate-svg-string :progress svg-string))))
+
+(deftest parse-svg-sanitizes-characters-test
+  (testing "Characters discouraged or not permitted by the xml 1.0 specification are removed. (#"
+    (#'js-svg/parse-svg-string
+     "<svg xmlns=\"http://www.w3.org/2000/svg\">\u001F</svg>")))
-- 
GitLab