Browse Source

Add first tests for DOMParser

This has exposed an error in the assumptions about BOM handling in the
XML parser. Proper handling of external encoding information will take
more effort than aniticpated.
domparser
J. King 1 year ago
parent
commit
bcbe74d6b9
  1. 2
      lib/Parser/Charset.php
  2. 39
      tests/cases/TestDOMParser.php
  3. 1
      tests/phpunit.dist.xml

2
lib/Parser/Charset.php

@ -331,7 +331,7 @@ abstract class Charset {
$pos++;
}
# If the byte at encodingPosition is not 0x3D (=), then return failure.
// NOTE: This is also buggy: see https://github.com/whatwg/html/issues/7193
# Advance encodingPosition to the next byte.
if ($s[$pos++] !== "=") {
return null;
}

39
tests/cases/TestDOMParser.php

@ -0,0 +1,39 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML\TestCase;
use MensBeam\HTML\DOMParser;
/**
* @covers \MensBeam\HTML\DOMParser
*/
class TestDOMParser extends \PHPUnit\Framework\TestCase {
/** @dataProvider provideDocuments */
public function testParseADocument(string $input, string $type, bool $parseError, string $exp): void {
$p = new DOMParser;
$document = $p->parseFromString($input, $type);
$root = $parseError ? "parserror" : "html";
$this->assertSame($root, $document->documentElement->tagName);
$this->assertSame($exp, $document->documentElement->textContent);
}
public function provideDocuments(): iterable {
return [
["Test", "text/html", false, "Test"],
["Ol\xE9", "text/html", false, "Ol\u{E9}"],
["Ol\u{E9}", "text/html;charset=utf8", false, "Ol\u{E9}"],
["<meta charset=utf8>Ol\u{E9}", "text/html", false, "Ol\u{E9}"],
["<html>Test</html>", "text/xml", false, "Test"],
["<html>Ol\u{E9}</html>", "text/xml", false, "Ol\u{E9}"],
["<html>Ol\xE9</html>", "text/xml;charset=windows-1252", false, "Ol\u{E9}"],
["\u{FEFF}<html>Ol\u{E9}</html>", "text/xml;charset=windows-1252", false, "Ol\u{E9}"],
["<?xml version='1.0' encoding='windows-1252'?><html>Ol\xE9</html>", "text/xml", false, "Ol\u{E9}"],
["<html>Ol\xE9</html>", "text/xml;charset=windows-1252", false, "Ol\u{E9}"],
["<?xml version='1.2' encoding='windows-1252'?><html>Ol\u{E9}</html>", "text/xml;charset=UTF-8", false, "Ol\u{E9}"],
];
}
}

1
tests/phpunit.dist.xml

@ -27,6 +27,7 @@
</testsuite>
<testsuite name="Parser">
<file>cases/TestParser.php</file>
<file>cases/TestDOMParser.php</file>
</testsuite>
<testsuite name="Serializer">
<file>cases/TestSerializer.php</file>

Loading…
Cancel
Save