Add first tests for DOMParser
This has exposed an error in the assumptions about BOM handling in the XML parser. Proper handling of external encoding information will take more effort than aniticpated.
This commit is contained in:
parent
2b39319894
commit
bcbe74d6b9
3 changed files with 41 additions and 1 deletions
|
@ -331,7 +331,7 @@ abstract class Charset {
|
|||
$pos++;
|
||||
}
|
||||
# If the byte at encodingPosition is not 0x3D (=), then return failure.
|
||||
// NOTE: This is also buggy: see https://github.com/whatwg/html/issues/7193
|
||||
# Advance encodingPosition to the next byte.
|
||||
if ($s[$pos++] !== "=") {
|
||||
return null;
|
||||
}
|
||||
|
|
39
tests/cases/TestDOMParser.php
Normal file
39
tests/cases/TestDOMParser.php
Normal file
|
@ -0,0 +1,39 @@
|
|||
<?php
|
||||
/** @license MIT
|
||||
* Copyright 2017 , Dustin Wilson, J. King et al.
|
||||
* See LICENSE and AUTHORS files for details */
|
||||
|
||||
declare(strict_types=1);
|
||||
namespace MensBeam\HTML\TestCase;
|
||||
|
||||
use MensBeam\HTML\DOMParser;
|
||||
|
||||
/**
|
||||
* @covers \MensBeam\HTML\DOMParser
|
||||
*/
|
||||
class TestDOMParser extends \PHPUnit\Framework\TestCase {
|
||||
/** @dataProvider provideDocuments */
|
||||
public function testParseADocument(string $input, string $type, bool $parseError, string $exp): void {
|
||||
$p = new DOMParser;
|
||||
$document = $p->parseFromString($input, $type);
|
||||
$root = $parseError ? "parserror" : "html";
|
||||
$this->assertSame($root, $document->documentElement->tagName);
|
||||
$this->assertSame($exp, $document->documentElement->textContent);
|
||||
}
|
||||
|
||||
public function provideDocuments(): iterable {
|
||||
return [
|
||||
["Test", "text/html", false, "Test"],
|
||||
["Ol\xE9", "text/html", false, "Ol\u{E9}"],
|
||||
["Ol\u{E9}", "text/html;charset=utf8", false, "Ol\u{E9}"],
|
||||
["<meta charset=utf8>Ol\u{E9}", "text/html", false, "Ol\u{E9}"],
|
||||
["<html>Test</html>", "text/xml", false, "Test"],
|
||||
["<html>Ol\u{E9}</html>", "text/xml", false, "Ol\u{E9}"],
|
||||
["<html>Ol\xE9</html>", "text/xml;charset=windows-1252", false, "Ol\u{E9}"],
|
||||
["\u{FEFF}<html>Ol\u{E9}</html>", "text/xml;charset=windows-1252", false, "Ol\u{E9}"],
|
||||
["<?xml version='1.0' encoding='windows-1252'?><html>Ol\xE9</html>", "text/xml", false, "Ol\u{E9}"],
|
||||
["<html>Ol\xE9</html>", "text/xml;charset=windows-1252", false, "Ol\u{E9}"],
|
||||
["<?xml version='1.2' encoding='windows-1252'?><html>Ol\u{E9}</html>", "text/xml;charset=UTF-8", false, "Ol\u{E9}"],
|
||||
];
|
||||
}
|
||||
}
|
|
@ -27,6 +27,7 @@
|
|||
</testsuite>
|
||||
<testsuite name="Parser">
|
||||
<file>cases/TestParser.php</file>
|
||||
<file>cases/TestDOMParser.php</file>
|
||||
</testsuite>
|
||||
<testsuite name="Serializer">
|
||||
<file>cases/TestSerializer.php</file>
|
||||
|
|
Loading…
Reference in a new issue