From 35cebc56d3e1a8e3c46f37e5fa770e90d199771b Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Fri, 30 Oct 2015 18:19:20 +0000
Subject: [PATCH] rewrite the HTML message stuff to fix XSS and improve clarity

---
 .../vector/views/molecules/MNoticeTile.js     | 43 +++++++++---------
 src/skins/vector/views/molecules/MTextTile.js | 44 ++++++++++---------
 2 files changed, 47 insertions(+), 40 deletions(-)

diff --git a/src/skins/vector/views/molecules/MNoticeTile.js b/src/skins/vector/views/molecules/MNoticeTile.js
index e9ca85ea..d173c728 100644
--- a/src/skins/vector/views/molecules/MNoticeTile.js
+++ b/src/skins/vector/views/molecules/MNoticeTile.js
@@ -35,43 +35,46 @@ module.exports = React.createClass({
     // FIXME: this entire class is copy-pasted from MTextTile :(        
     render: function() {
         var content = this.props.mxEvent.getContent();
-        var body = content.body;
-
-        if (content.format === "org.matrix.custom.html") {
-            body = sanitizeHtml(content.formatted_body, sanitizeHtmlParams);
-        }
+        var originalBody = content.body;
 
         if (this.props.searchTerm) {
             var lastOffset = 0;
             var bodyList = [];
             var k = 0;
             var offset;
-            // XXX: this probably doesn't handle stemming very well.
-            while ((offset = body.indexOf(this.props.searchTerm, lastOffset)) >= 0) {
-                if (content.format === "org.matrix.custom.html") {
+
+            // XXX: rather than searching for the search term in the body,
+            // we should be looking at the match delimiters returned by the FTS engine
+            if (content.format === "org.matrix.custom.html") {
+                var safeBody = sanitizeHtml(content.formatted_body, sanitizeHtmlParams);
+                var safeSearchTerm = sanitizeHtml(this.props.searchTerm, sanitizeHtmlParams);
+                while ((offset = safeBody.indexOf(safeSearchTerm, lastOffset)) >= 0) {
                     // FIXME: we need to apply the search highlighting to only the text elements of HTML, which means
                     // hooking into the sanitizer parser rather than treating it as a string.  Otherwise
                     // the act of highlighting a <b/> or whatever will break the HTML badly.
-                    bodyList.push(<span key={ k++ } dangerouslySetInnerHTML={{ __html: body.substring(lastOffset, offset) }} />);
-                    bodyList.push(<span key={ k++ } dangerouslySetInnerHTML={{ __html: this.props.searchTerm }} className="mx_MessageTile_searchHighlight" />);
+                    bodyList.push(<span key={ k++ } dangerouslySetInnerHTML={{ __html: safeBody.substring(lastOffset, offset) }} />);
+                    bodyList.push(<span key={ k++ } dangerouslySetInnerHTML={{ __html: safeSearchTerm }} className="mx_MessageTile_searchHighlight" />);
+                    lastOffset = offset + safeSearchTerm.length;
                 }
-                else {
-                    bodyList.push(<span key={ k++ } >{ body.substring(lastOffset, offset) }</span>);
-                    bodyList.push(<span key={ k++ } className="mx_MessageTile_searchHighlight">{ this.props.searchTerm }</span>);
-                }
-                lastOffset = offset + this.props.searchTerm.length;
-            }
-            if (content.format === "org.matrix.custom.html") {
-                bodyList.push(<span key={ k++ } dangerouslySetInnerHTML={{ __html: body.substring(lastOffset) }} />);
+                bodyList.push(<span key={ k++ } dangerouslySetInnerHTML={{ __html: safeBody.substring(lastOffset) }} />);
             }
             else {
-                bodyList.push(<span key={ k++ }>{ body.substring(lastOffset) }</span>);
+                while ((offset = originalBody.indexOf(this.props.searchTerm, lastOffset)) >= 0) {
+                    bodyList.push(<span key={ k++ } >{ originalBody.substring(lastOffset, offset) }</span>);
+                    bodyList.push(<span key={ k++ } className="mx_MessageTile_searchHighlight">{ this.props.searchTerm }</span>);
+                    lastOffset = offset + this.props.searchTerm.length;
+                }
+                bodyList.push(<span key={ k++ }>{ originalBody.substring(lastOffset) }</span>);
             }
             body = bodyList;
         }
         else {
             if (content.format === "org.matrix.custom.html") {
-                body = <span dangerouslySetInnerHTML={{ __html: body }} />;
+                var safeBody = sanitizeHtml(content.formatted_body, sanitizeHtmlParams);
+                body = <span dangerouslySetInnerHTML={{ __html: safeBody }} />;
+            }
+            else {
+                body = originalBody;
             }
         }
 
diff --git a/src/skins/vector/views/molecules/MTextTile.js b/src/skins/vector/views/molecules/MTextTile.js
index 6e04088e..626ee5b9 100644
--- a/src/skins/vector/views/molecules/MTextTile.js
+++ b/src/skins/vector/views/molecules/MTextTile.js
@@ -32,45 +32,49 @@ module.exports = React.createClass({
     displayName: 'MTextTile',
     mixins: [MTextTileController],
 
+    // FIXME: this entire class is copy-pasted from MTextTile :(        
     render: function() {
         var content = this.props.mxEvent.getContent();
-        var body = content.body;
-
-        if (content.format === "org.matrix.custom.html") {
-            body = sanitizeHtml(content.formatted_body, sanitizeHtmlParams);
-        }
+        var originalBody = content.body;
 
         if (this.props.searchTerm) {
             var lastOffset = 0;
             var bodyList = [];
             var k = 0;
             var offset;
-            // XXX: this probably doesn't handle stemming very well.
-            while ((offset = body.indexOf(this.props.searchTerm, lastOffset)) >= 0) {
-                if (content.format === "org.matrix.custom.html") {
+
+            // XXX: rather than searching for the search term in the body,
+            // we should be looking at the match delimiters returned by the FTS engine
+            if (content.format === "org.matrix.custom.html") {
+                var safeBody = sanitizeHtml(content.formatted_body, sanitizeHtmlParams);
+                var safeSearchTerm = sanitizeHtml(this.props.searchTerm, sanitizeHtmlParams);
+                while ((offset = safeBody.indexOf(safeSearchTerm, lastOffset)) >= 0) {
                     // FIXME: we need to apply the search highlighting to only the text elements of HTML, which means
                     // hooking into the sanitizer parser rather than treating it as a string.  Otherwise
                     // the act of highlighting a <b/> or whatever will break the HTML badly.
-                    bodyList.push(<span key={ k++ } dangerouslySetInnerHTML={{ __html: body.substring(lastOffset, offset) }} />);
-                    bodyList.push(<span key={ k++ } dangerouslySetInnerHTML={{ __html: this.props.searchTerm }} className="mx_MessageTile_searchHighlight" />);
+                    bodyList.push(<span key={ k++ } dangerouslySetInnerHTML={{ __html: safeBody.substring(lastOffset, offset) }} />);
+                    bodyList.push(<span key={ k++ } dangerouslySetInnerHTML={{ __html: safeSearchTerm }} className="mx_MessageTile_searchHighlight" />);
+                    lastOffset = offset + safeSearchTerm.length;
                 }
-                else {
-                    bodyList.push(<span key={ k++ } >{ body.substring(lastOffset, offset) }</span>);
-                    bodyList.push(<span key={ k++ } className="mx_MessageTile_searchHighlight">{ this.props.searchTerm }</span>);
-                }
-                lastOffset = offset + this.props.searchTerm.length;
-            }
-            if (content.format === "org.matrix.custom.html") {
-                bodyList.push(<span key={ k++ } dangerouslySetInnerHTML={{ __html: body.substring(lastOffset) }} />);
+                bodyList.push(<span key={ k++ } dangerouslySetInnerHTML={{ __html: safeBody.substring(lastOffset) }} />);
             }
             else {
-                bodyList.push(<span key={ k++ }>{ body.substring(lastOffset) }</span>);
+                while ((offset = originalBody.indexOf(this.props.searchTerm, lastOffset)) >= 0) {
+                    bodyList.push(<span key={ k++ } >{ originalBody.substring(lastOffset, offset) }</span>);
+                    bodyList.push(<span key={ k++ } className="mx_MessageTile_searchHighlight">{ this.props.searchTerm }</span>);
+                    lastOffset = offset + this.props.searchTerm.length;
+                }
+                bodyList.push(<span key={ k++ }>{ originalBody.substring(lastOffset) }</span>);
             }
             body = bodyList;
         }
         else {
             if (content.format === "org.matrix.custom.html") {
-                body = <span dangerouslySetInnerHTML={{ __html: body }} />;
+                var safeBody = sanitizeHtml(content.formatted_body, sanitizeHtmlParams);
+                body = <span dangerouslySetInnerHTML={{ __html: safeBody }} />;
+            }
+            else {
+                body = originalBody;
             }
         }