From 3f02b0bf56f887fb4cca631bde644d6106335169 Mon Sep 17 00:00:00 2001 From: Nicolas Werner Date: Tue, 14 Jan 2020 17:47:30 +0100 Subject: [PATCH] Escape blacklisted html tags --- src/Utils.cpp | 56 ++++++++++++++++++++++++++-------- src/Utils.h | 4 +++ src/timeline/TimelineModel.cpp | 4 +-- 3 files changed, 49 insertions(+), 15 deletions(-) diff --git a/src/Utils.cpp b/src/Utils.cpp index 1c94761d..ddd066d7 100644 --- a/src/Utils.cpp +++ b/src/Utils.cpp @@ -314,37 +314,67 @@ utils::linkifyMessage(const QString &body) return doc; } -QByteArray -escapeRawHtml(const QByteArray &data) +QString +utils::escapeBlacklistedHtml(const QString &rawStr) { + static const std::vector allowedTags = { + "font", "/font", "del", "/del", "h1", "/h1", "h2", "/h2", + "h3", "/h3", "h4", "/h4", "h5", "/h5", "h6", "/h6", + "blockquote", "/blockquote", "p", "/p", "a", "/a", "ul", "/ul", + "ol", "/ol", "sup", "/sup", "sub", "/sub", "li", "/li", + "b", "/b", "i", "/i", "u", "/u", "strong", "/strong", + "em", "/em", "strike", "/strike", "code", "/code", "hr", "/hr", + "br", "br/", "div", "/div", "table", "/table", "thead", "/thead", + "tbody", "/tbody", "tr", "/tr", "th", "/th", "td", "/td", + "caption", "/caption", "pre", "/pre", "span", "/span", "img", "/img"}; + QByteArray data = rawStr.toUtf8(); QByteArray buffer; const size_t length = data.size(); buffer.reserve(length); + bool escapingTag = false; for (size_t pos = 0; pos != length; ++pos) { switch (data.at(pos)) { - case '&': - buffer.append("&"); - break; - case '<': - buffer.append("<"); + case '<': { + bool oneTagMatched = false; + size_t endPos = std::min(static_cast(data.indexOf('>', pos)), + static_cast(data.indexOf(' ', pos))); + + auto mid = data.mid(pos + 1, endPos - pos - 1); + for (const auto &tag : allowedTags) { + // TODO: Check src and href attribute + if (mid.compare(tag.data(), Qt::CaseInsensitive) == 0) { + oneTagMatched = true; + } + } + if (oneTagMatched) + buffer.append('<'); + else { + escapingTag = true; + buffer.append("<"); + } break; + } case '>': - buffer.append(">"); + if (escapingTag) + buffer.append(">"); + else { + escapingTag = false; + buffer.append('>'); + } break; default: buffer.append(data.at(pos)); break; } } - return buffer; + return QString::fromUtf8(buffer); } QString utils::markdownToHtml(const QString &text) { - const auto str = escapeRawHtml(text.toUtf8()); - const char *tmp_buf = - cmark_markdown_to_html(str.constData(), str.size(), CMARK_OPT_DEFAULT); + const auto str = text.toUtf8(); + const char *tmp_buf = cmark_markdown_to_html(str.constData(), str.size(), CMARK_OPT_UNSAFE); // Copy the null terminated output buffer. std::string html(tmp_buf); @@ -352,7 +382,7 @@ utils::markdownToHtml(const QString &text) // The buffer is no longer needed. free((char *)tmp_buf); - auto result = QString::fromStdString(html).trimmed(); + auto result = escapeBlacklistedHtml(QString::fromStdString(html)).trimmed(); return result; } diff --git a/src/Utils.h b/src/Utils.h index 6b809d63..119d660a 100644 --- a/src/Utils.h +++ b/src/Utils.h @@ -286,6 +286,10 @@ linkifyMessage(const QString &body); QString markdownToHtml(const QString &text); +//! Escape every html tag, that was not whitelisted +QString +escapeBlacklistedHtml(const QString &data); + //! Generate a Rich Reply quote message QString getFormattedQuoteBody(const RelatedInfo &related, const QString &html); diff --git a/src/timeline/TimelineModel.cpp b/src/timeline/TimelineModel.cpp index 3064ab4a..eef4ec31 100644 --- a/src/timeline/TimelineModel.cpp +++ b/src/timeline/TimelineModel.cpp @@ -260,8 +260,8 @@ TimelineModel::data(const QString &id, int role) const const static QRegularExpression replyFallback( ".*", QRegularExpression::DotMatchesEverythingOption); return QVariant( - utils::replaceEmoji(utils::linkifyMessage(formattedBodyWithFallback(event))) - .remove(replyFallback)); + utils::replaceEmoji(utils::linkifyMessage(utils::escapeBlacklistedHtml( + formattedBodyWithFallback(event).remove(replyFallback))))); } case Url: return QVariant(QString::fromStdString(url(event)));