Skip to content

Commit e3026f5

Browse files
ohaderbnf
andauthored
Merge pull request from GHSA-59jf-3q9v-rh6g
* [SECURITY] Properly encode noscript child nodes The `<noscript>` element has a special behavior when being evaluated in browsers, which depends on whether script-parsing is enabled or disabled. As a consequence this change will * encode comment inner data, e.g. `<!-- <"comment"> -->` as `<!-- &lt;&quot;comment&quot;&gt -->` * always encode tag attributes, e.g. `<p id="<value>">` as `<p id="&lt;value&gt;">` * extend `Comment` and `CdataSection` to have a constructor (which triggers encoding per default) and to implement the `Behavior\HandlerInterface` * add a new serializer option `encode_attributes`, which might basically be extracted to `Masterminds\HTML5` * [TASK] Ensure attribute serialization preserves values as is We don't want so called double-encoding – which is a valid usecase when HTML describes how HTML is to be written – to be automagically transformed to single encoded values, as otherwise a valid input like <a title="Insert &amp;amp; to write an &amp;"></a> (Browser would show "Insert &amp; to write an &") …would be changed to: <a title="Insert &amp; to write an &amp;"></a> (Browser would show "Insert & to write an &") Also add tests for the attribute encoding we want: * Encode quotes, tags and stuff that might cause security issues * do not encode unnecessarily encode slashes or colons (like htmlentitites would do) --------- Co-authored-by: Benjamin Franzke <[email protected]>
1 parent 476383a commit e3026f5

File tree

10 files changed

+302
-70
lines changed

10 files changed

+302
-70
lines changed

src/Behavior.php

Lines changed: 13 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
namespace TYPO3\HtmlSanitizer;
1616

1717
use LogicException;
18+
use TYPO3\HtmlSanitizer\Behavior\CdataSection;
19+
use TYPO3\HtmlSanitizer\Behavior\Comment;
1820
use TYPO3\HtmlSanitizer\Behavior\NodeInterface;
1921
use TYPO3\HtmlSanitizer\Behavior\Tag;
2022

@@ -74,11 +76,16 @@ class Behavior
7476
* Node names as array index, e.g. `['strong' => new Tag('strong', '#comment' => new Comment()]`
7577
* @var array<string, ?NodeInterface>
7678
*/
77-
protected $nodes = [
79+
protected $nodes = [];
80+
81+
public function __construct()
82+
{
7883
// v2.1.0: adding `#comment` and `#cdata-section` hints for backward compatibility, will be removed with v3.0.0
79-
'#comment' => null,
80-
'#cdata-section' => null,
81-
];
84+
$this->nodes = array_merge($this->nodes, [
85+
'#comment' => new Comment(),
86+
'#cdata-section' => new CdataSection(),
87+
]);
88+
}
8289

8390
public function withFlags(int $flags): self
8491
{
@@ -125,7 +132,6 @@ public function withNodes(NodeInterface ...$nodes): self
125132
if (!is_array($indexedNodes)) {
126133
return $this;
127134
}
128-
$this->assertNodeUniqueness($indexedNodes);
129135
$target = clone $this;
130136
$target->nodes = array_merge($target->nodes, $indexedNodes);
131137
return $target;
@@ -136,9 +142,8 @@ public function withoutNodes(NodeInterface ...$nodes): self
136142
$names = array_map([$this, 'getNodeName'], $nodes);
137143
$filteredNodes = array_filter(
138144
$this->nodes,
139-
static function (?NodeInterface $node, string $name) use ($nodes, $names) {
140-
return $node === null && !in_array($name, $names, true)
141-
|| $node !== null && !in_array($node, $nodes, true);
145+
static function (NodeInterface $node, string $name) use ($nodes, $names) {
146+
return !in_array($name, $names, true) && !in_array($node, $nodes, true);
142147
},
143148
ARRAY_FILTER_USE_BOTH
144149
);
@@ -247,23 +252,6 @@ protected function assertScalarUniqueness(array $names): void
247252
}
248253
}
249254

250-
/**
251-
* @param array<string, NodeInterface> $nodes
252-
*/
253-
protected function assertNodeUniqueness(array $nodes): void
254-
{
255-
$existingNodeNames = array_intersect_key(array_filter($this->nodes), $nodes);
256-
if ($existingNodeNames !== []) {
257-
throw new LogicException(
258-
sprintf(
259-
'Cannot redeclare node names %s. Remove duplicates first',
260-
implode(', ', array_keys($existingNodeNames))
261-
),
262-
1625391217
263-
);
264-
}
265-
}
266-
267255
protected function getNodeName(NodeInterface $node): string
268256
{
269257
return strtolower($node->getName());

src/Behavior/CdataSection.php

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,36 @@
1414

1515
namespace TYPO3\HtmlSanitizer\Behavior;
1616

17+
use DOMNode;
18+
use DOMText;
19+
use TYPO3\HtmlSanitizer\Behavior;
20+
use TYPO3\HtmlSanitizer\Context;
21+
1722
/**
1823
* Model of CDATA node.
1924
*/
20-
class CdataSection implements NodeInterface
25+
class CdataSection implements NodeInterface, HandlerInterface
2126
{
27+
/**
28+
* @var bool
29+
*/
30+
protected $secure = true;
31+
32+
public function __construct(bool $secure = true)
33+
{
34+
$this->secure = $secure;
35+
}
36+
2237
public function getName(): string
2338
{
2439
return '#cdata-section';
2540
}
41+
42+
public function handle(NodeInterface $node, ?DOMNode $domNode, Context $context, Behavior $behavior = null): ?DOMNode
43+
{
44+
if (!$this->secure || $domNode === null) {
45+
return $domNode;
46+
}
47+
return new DOMText(trim($domNode->nodeValue));
48+
}
2649
}

src/Behavior/Comment.php

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,36 @@
1414

1515
namespace TYPO3\HtmlSanitizer\Behavior;
1616

17+
use DOMComment;
18+
use DOMNode;
19+
use TYPO3\HtmlSanitizer\Behavior;
20+
use TYPO3\HtmlSanitizer\Context;
21+
1722
/**
1823
* Model of comment node.
1924
*/
20-
class Comment implements NodeInterface
25+
class Comment implements NodeInterface, HandlerInterface
2126
{
27+
/**
28+
* @var bool
29+
*/
30+
protected $secure = true;
31+
32+
public function __construct(bool $secure = true)
33+
{
34+
$this->secure = $secure;
35+
}
36+
2237
public function getName(): string
2338
{
2439
return '#comment';
2540
}
41+
42+
public function handle(NodeInterface $node, ?DOMNode $domNode, Context $context, Behavior $behavior = null): ?DOMNode
43+
{
44+
if (!$this->secure || $domNode === null) {
45+
return $domNode;
46+
}
47+
return new DOMComment(htmlspecialchars($domNode->textContent, ENT_QUOTES, 'UTF-8', false));
48+
}
2649
}

src/Builder/CommonBuilder.php

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,8 @@
1414

1515
namespace TYPO3\HtmlSanitizer\Builder;
1616

17-
use DOMNode;
18-
use DOMText;
1917
use TYPO3\HtmlSanitizer\Behavior;
2018
use TYPO3\HtmlSanitizer\Behavior\Attr\UriAttrValueBuilder;
21-
use TYPO3\HtmlSanitizer\Behavior\NodeInterface;
2219
use TYPO3\HtmlSanitizer\Sanitizer;
2320
use TYPO3\HtmlSanitizer\Visitor\CommonVisitor;
2421

@@ -83,8 +80,7 @@ protected function createBehavior(): Behavior
8380
->withName('common')
8481
->withTags(...array_values($this->createBasicTags()))
8582
->withTags(...array_values($this->createMediaTags()))
86-
->withTags(...array_values($this->createTableTags()))
87-
->withNodes(...array_values($this->createSpecialNodes()));
83+
->withTags(...array_values($this->createTableTags()));
8884
}
8985

9086
protected function createBasicTags(): array
@@ -211,23 +207,6 @@ protected function createTableTags(): array
211207
return $tags;
212208
}
213209

214-
/**
215-
* @return array<string, Behavior\NodeInterface>
216-
*/
217-
protected function createSpecialNodes(): array
218-
{
219-
$nodes = [];
220-
$nodes['#cdata-section'] = (new Behavior\NodeHandler(
221-
new Behavior\CdataSection(),
222-
new Behavior\Handler\ClosureHandler(
223-
static function (NodeInterface $node, ?DOMNode $domNode) {
224-
return $domNode !== null ? new DOMText(trim($domNode->nodeValue)) : null;
225-
}
226-
)
227-
));
228-
return $nodes;
229-
}
230-
231210
/**
232211
* @return Behavior\Attr[]
233212
*/

src/Sanitizer.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ class Sanitizer
4141
protected const mastermindsDefaultOptions = [
4242
// Whether the serializer should aggressively encode all characters as entities.
4343
'encode_entities' => false,
44+
'encode_attributes' => true,
4445
// Prevents the parser from automatically assigning the HTML5 namespace to the DOM document.
4546
// (adjusted due to https://github.com/Masterminds/html5-php/issues/181#issuecomment-643767471)
4647
'disable_html_ns' => true,

src/Serializer/Rules.php

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ class Rules extends OutputRules implements RulesInterface
4545
*/
4646
protected $initiator;
4747

48+
/**
49+
* @var bool
50+
*/
51+
protected $encodeAttributes;
52+
4853
/**
4954
* @param Behavior $behavior
5055
* @param resource$output
@@ -66,6 +71,7 @@ public static function create(Behavior $behavior, $output, array $options = []):
6671
public function __construct($output, $options = [])
6772
{
6873
$this->options = (array)$options;
74+
$this->encodeAttributes = !empty($options['encode_attributes']);
6975
parent::__construct($output, $this->options);
7076
}
7177

@@ -158,6 +164,32 @@ public function text($domNode): void
158164
$this->wr($domNode->data);
159165
}
160166

167+
protected function enc($text, $attribute = false): string
168+
{
169+
if ($attribute && $this->encodeAttributes && !$this->encode) {
170+
// In contrast to parent::enc() (when $this->encode is true),
171+
// we are using htmlspecialchars() instead of htmlentities() as
172+
// colons and slashes do not need to be aggressively escaped.
173+
$value = htmlspecialchars(
174+
$text,
175+
ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES,
176+
'UTF-8',
177+
// $double_encode: true
178+
// (name is misleading, it actually means: disable-automagic/always-encode)
179+
// Our input is always entity decoded by the parser and we do not
180+
// want to consider our input to possibly contain valid entities
181+
// we rather want to treat it as pure text, that is *always* to be encoded.
182+
true
183+
);
184+
// htmlspecialchars does escaping, but it doesn't match the requirements of
185+
// HTML5 section 8.3 to ecape non breaking spaces
186+
// https://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
187+
$value = implode('&nbsp;', mb_split("\xc2\xa0", $value));
188+
return $value;
189+
}
190+
return parent::enc($text, $attribute);
191+
}
192+
161193
/**
162194
* If the element has a declared namespace in the HTML, MathML or
163195
* SVG namespaces, we use the localName instead of the tagName.

src/Visitor/CommonVisitor.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ public function enterNode(DOMNode $domNode): ?DOMNode
8686
if (!$node->shallHandleFirst()) {
8787
$domNode = $node->getHandler()->handle($node->getNode(), $domNode, $this->context, $this->behavior);
8888
}
89+
} elseif ($node instanceof Behavior\HandlerInterface) {
90+
$domNode = $node->handle($node, $domNode, $this->context, $this->behavior);
91+
$domNode = $domNode instanceof DOMElement ? $this->enterDomElement($domNode, $node) : $domNode;
8992
} elseif ($domNode instanceof DOMElement) {
9093
$domNode = $this->enterDomElement($domNode, $node);
9194
}

tests/BehaviorTest.php

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ class BehaviorTest extends TestCase
2424
public function ambiguityIsDetectedDataProvider(): array
2525
{
2626
return [
27-
[ ['same'], ['same'], 1625391217 ],
2827
[ ['same', 'same'], [], 1625591503 ],
2928
[ ['same', 'same'], ['same'], 1625591503 ],
3029
[ [], ['same', 'same'], 1625591503 ],

tests/CommonBuilderTest.php

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,13 +256,21 @@ public function isSanitizedDataProvider(): array
256256
'<!-- #comment -->',
257257
],
258258
'#910' => [
259-
'<![CDATA[ #cdata ]]>',
260-
'#cdata',
259+
'<!-- <"comment"> -->',
260+
'<!-- &lt;&quot;comment&quot;&gt; -->',
261261
],
262262
'#911' => [
263+
'<!-- &lt;&quot;comment&quot;&gt; -->',
264+
'<!-- &lt;&quot;comment&quot;&gt; -->',
265+
],
266+
'#915' => [
263267
'#text',
264268
'#text',
265269
],
270+
'#920' => [
271+
'<![CDATA[ #cdata ]]>',
272+
'#cdata',
273+
],
266274
'#921' => [
267275
'<![CDATA[<any><span data-value="value"></any>*/]]>',
268276
'&lt;any&gt;&lt;span data-value="value"&gt;&lt;/any&gt;*/',
@@ -287,6 +295,14 @@ public function isSanitizedDataProvider(): array
287295
'<img src="/typo3.org/logo.svg"><any>value</any></img>',
288296
'<img src="/typo3.org/logo.svg">&lt;any&gt;value&lt;/any&gt;',
289297
],
298+
'#935' => [
299+
'<p class="</p><script>alert(1)">value</p>',
300+
'<p class="&lt;/p&gt;&lt;script&gt;alert(1)">value</p>',
301+
],
302+
'#936' => [
303+
'<p class="{&quot;json&quot;:true}">value</p>',
304+
'<p class="{&quot;json&quot;:true}">value</p>',
305+
],
290306
];
291307
}
292308

0 commit comments

Comments
 (0)