Browse Source

Polish off DOMParser tests

domparser
J. King 1 year ago
parent
commit
6b863a1a85
  1. 3
      lib/DOMParser.php
  2. 71
      tests/cases/TestDOMParser.php

3
lib/DOMParser.php

@ -124,7 +124,8 @@ XMLDECL;
protected function createDocumentXml(string $string): \DOMDocument {
$document = new \DOMDocument;
if (!$document->loadXML($string, \LIBXML_NONET | \LIBXML_BIGLINES | \LIBXML_COMPACT |\LIBXML_NOWARNING | \LIBXML_NOERROR)) {
throw new \Exception(libxml_get_last_error()->message);
$err = libxml_get_last_error();
throw new \Exception($err->code.": \"".trim($err->message)."\" on line ".$err->line.", column ".$err->column);
}
return $document;
}

71
tests/cases/TestDOMParser.php

@ -13,12 +13,11 @@ use MensBeam\HTML\DOMParser;
*/
class TestDOMParser extends \PHPUnit\Framework\TestCase {
/** @dataProvider provideDocuments */
public function testParseADocument(string $input, string $type, bool $parseError, string $exp): void {
public function testParseADocument(string $input, string $type, string $exp): void {
$p = new DOMParser;
$document = $p->parseFromString($input, $type);
$root = $parseError ? "parsererror" : "html";
$this->assertSame($exp, $document->documentElement->textContent);
$this->assertSame($root, $document->documentElement->tagName);
$this->assertSame("html", $document->documentElement->tagName);
}
public function provideDocuments(): iterable {
@ -27,31 +26,47 @@ class TestDOMParser extends \PHPUnit\Framework\TestCase {
return preg_replace("/[\x{01}-\x{7F}]/s", $replacement, $s);
};
return [
["Test", "text/html", false, "Test"],
["Ol\xE9", "text/html", false, "Ol\u{E9}"],
["Ol\u{E9}", "text/html;charset=utf8", false, "Ol\u{E9}"],
["<meta charset=utf8>Ol\u{E9}", "text/html", false, "Ol\u{E9}"],
["<html>Test</html>", "text/xml", false, "Test"],
["<html>Ol\u{E9}</html>", "text/xml", false, "Ol\u{E9}"],
["<html>Ol\xE9</html>", "text/xml;charset=windows-1252", false, "Ol\u{E9}"],
["\u{FEFF}<html>Ol\u{E9}</html>", "text/xml;charset=windows-1252", false, "Ol\u{E9}"],
["<?xml version='1.0' encoding='windows-1252'?><html>Ol\xE9</html>", "text/xml", false, "Ol\u{E9}"],
["<html>Ol\xE9</html>", "text/xml;charset=windows-1252", false, "Ol\u{E9}"],
["<html>Ol\u{E9}</html>", "text/xml;charset=UTF-8", false, "Ol\u{E9}"],
["<?xml version='1.1' encoding='windows-1252'?><html>Ol\u{E9}</html>", "text/xml;charset=UTF-8", false, "Ol\u{E9}"],
["<?xml version='1.1' encoding='utf8'?><html>Ol\u{E9}</html>", "text/xml;charset=UTF-8", false, "Ol\u{E9}"],
["<?xml version='1.1'?><html>Ol\u{E9}</html>", "text/xml;charset=UTF-8", false, "Ol\u{E9}"],
["<?xml version='1.1' ?><html>Ol\u{E9}</html>", "text/xml;charset=UTF-8", false, "Ol\u{E9}"],
["<?xml version='1.0' standalone='yes'?><html>Ol\u{E9}</html>", "text/xml;charset=UTF-8", false, "Ol\u{E9}"],
["<?xml version='1.0' standalone='yes'?><html>Ol\xE9</html>", "text/xml;charset=windows-1252", false, "Ol\u{E9}"],
[$mkUtf16("\xFE\xFF<html>Ol\x00\xE9</html>", false), "text/xml", false, "Ol\u{E9}"],
[$mkUtf16("\xFF\xFE<html>Ol\xE9\x00</html>", true), "text/xml", false, "Ol\u{E9}"],
[$mkUtf16("<?xml version='1.0' encoding='UTF-16'?><html>Ol\x00\xE9</html>", false), "text/xml", false, "Ol\u{E9}"],
[$mkUtf16("<?xml version='1.0' encoding='UTF-16'?><html>Ol\xE9\x00</html>", true), "text/xml", false, "Ol\u{E9}"],
[$mkUtf16("\xFE\xFF<?xml version='1.0' encoding='UTF-8'?><html>Ol\x00\xE9</html>", false), "text/xml", false, "Ol\u{E9}"],
[$mkUtf16("\xFF\xFE<?xml version='1.0' encoding='UTF-8'?><html>Ol\xE9\x00</html>", true), "text/xml", false, "Ol\u{E9}"],
[$mkUtf16("<?xml version='1.0' encoding='UTF-8'?><html>Ol\x00\xE9</html>", false), "text/xml;charset=utf-16be", false, "Ol\u{E9}"],
[$mkUtf16("<?xml version='1.0' encoding='UTF-8'?><html>Ol\xE9\x00</html>", true), "text/xml;charset=utf-16le", false, "Ol\u{E9}"],
["Test", "text/html", "Test"],
["Ol\xE9", "text/html", "Ol\u{E9}"],
["Ol\u{E9}", "text/html;charset=utf8", "Ol\u{E9}"],
["<meta charset=utf8>Ol\u{E9}", "text/html", "Ol\u{E9}"],
["<html>Test</html>", "text/xml", "Test"],
["<html>Ol\u{E9}</html>", "text/xml", "Ol\u{E9}"],
["<html>Ol\xE9</html>", "text/xml;charset=windows-1252", "Ol\u{E9}"],
["\u{FEFF}<html>Ol\u{E9}</html>", "text/xml;charset=windows-1252", "Ol\u{E9}"],
["<?xml version='1.0' encoding='windows-1252'?><html>Ol\xE9</html>", "text/xml", "Ol\u{E9}"],
["<html>Ol\xE9</html>", "text/xml;charset=windows-1252", "Ol\u{E9}"],
["<html>Ol\u{E9}</html>", "text/xml;charset=UTF-8", "Ol\u{E9}"],
["<?xml version='1.1' encoding='windows-1252'?><html>Ol\u{E9}</html>", "text/xml;charset=UTF-8", "Ol\u{E9}"],
["<?xml version='1.1' encoding='utf8'?><html>Ol\u{E9}</html>", "text/xml;charset=UTF-8", "Ol\u{E9}"],
["<?xml version='1.1'?><html>Ol\u{E9}</html>", "text/xml;charset=UTF-8", "Ol\u{E9}"],
["<?xml version='1.1' ?><html>Ol\u{E9}</html>", "text/xml;charset=UTF-8", "Ol\u{E9}"],
["<?xml version='1.0' standalone='yes'?><html>Ol\u{E9}</html>", "text/xml;charset=UTF-8", "Ol\u{E9}"],
["<?xml version='1.0' standalone='yes'?><html>Ol\xE9</html>", "text/xml;charset=windows-1252", "Ol\u{E9}"],
[$mkUtf16("\xFE\xFF<html>Ol\x00\xE9</html>", false), "text/xml", "Ol\u{E9}"],
[$mkUtf16("\xFF\xFE<html>Ol\xE9\x00</html>", true), "text/xml", "Ol\u{E9}"],
[$mkUtf16("<?xml version='1.0' encoding='UTF-16'?><html>Ol\x00\xE9</html>", false), "text/xml", "Ol\u{E9}"],
[$mkUtf16("<?xml version='1.0' encoding='UTF-16'?><html>Ol\xE9\x00</html>", true), "text/xml", "Ol\u{E9}"],
[$mkUtf16("\xFE\xFF<?xml version='1.0' encoding='UTF-8'?><html>Ol\x00\xE9</html>", false), "text/xml", "Ol\u{E9}"],
[$mkUtf16("\xFF\xFE<?xml version='1.0' encoding='UTF-8'?><html>Ol\xE9\x00</html>", true), "text/xml", "Ol\u{E9}"],
[$mkUtf16("<?xml version='1.0' encoding='UTF-8'?><html>Ol\x00\xE9</html>", false), "text/xml;charset=utf-16be", "Ol\u{E9}"],
[$mkUtf16("<?xml version='1.0' encoding='UTF-8'?><html>Ol\xE9\x00</html>", true), "text/xml;charset=utf-16le", "Ol\u{E9}"],
];
}
public function testFailToParseADocument(): void {
$in = "<html>Test</html><!--Test-->Test";
$p = new DOMParser;
$d = $p->parseFromString($in, "text/xml");
$this->assertSame("parsererror", $d->documentElement->tagName);
$this->assertSame("http://www.mozilla.org/newlayout/xml/parsererror.xml", $d->documentElement->namespaceURI);
$this->assertNotSame("", trim($d->documentElement->textContent));
}
public function testParseWithIncorrectType(): void {
$in = "<html>Ol\u{E9}</html>";
$p = new DOMParser;
$this->expectException(\InvalidArgumentException::class);
$p->parseFromString($in, "text/plain");
}
}

Loading…
Cancel
Save