diff --git a/lib/DOMParser.php b/lib/DOMParser.php index 09ce276..ba65bf7 100644 --- a/lib/DOMParser.php +++ b/lib/DOMParser.php @@ -124,7 +124,8 @@ XMLDECL; protected function createDocumentXml(string $string): \DOMDocument { $document = new \DOMDocument; if (!$document->loadXML($string, \LIBXML_NONET | \LIBXML_BIGLINES | \LIBXML_COMPACT |\LIBXML_NOWARNING | \LIBXML_NOERROR)) { - throw new \Exception(libxml_get_last_error()->message); + $err = libxml_get_last_error(); + throw new \Exception($err->code.": \"".trim($err->message)."\" on line ".$err->line.", column ".$err->column); } return $document; } diff --git a/tests/cases/TestDOMParser.php b/tests/cases/TestDOMParser.php index fd8542c..edb36b2 100644 --- a/tests/cases/TestDOMParser.php +++ b/tests/cases/TestDOMParser.php @@ -13,12 +13,11 @@ use MensBeam\HTML\DOMParser; */ class TestDOMParser extends \PHPUnit\Framework\TestCase { /** @dataProvider provideDocuments */ - public function testParseADocument(string $input, string $type, bool $parseError, string $exp): void { + public function testParseADocument(string $input, string $type, string $exp): void { $p = new DOMParser; $document = $p->parseFromString($input, $type); - $root = $parseError ? "parsererror" : "html"; $this->assertSame($exp, $document->documentElement->textContent); - $this->assertSame($root, $document->documentElement->tagName); + $this->assertSame("html", $document->documentElement->tagName); } public function provideDocuments(): iterable { @@ -27,31 +26,47 @@ class TestDOMParser extends \PHPUnit\Framework\TestCase { return preg_replace("/[\x{01}-\x{7F}]/s", $replacement, $s); }; return [ - ["Test", "text/html", false, "Test"], - ["Ol\xE9", "text/html", false, "Ol\u{E9}"], - ["Ol\u{E9}", "text/html;charset=utf8", false, "Ol\u{E9}"], - ["Ol\u{E9}", "text/html", false, "Ol\u{E9}"], - ["Test", "text/xml", false, "Test"], - ["Ol\u{E9}", "text/xml", false, "Ol\u{E9}"], - ["Ol\xE9", "text/xml;charset=windows-1252", false, "Ol\u{E9}"], - ["\u{FEFF}Ol\u{E9}", "text/xml;charset=windows-1252", false, "Ol\u{E9}"], - ["Ol\xE9", "text/xml", false, "Ol\u{E9}"], - ["Ol\xE9", "text/xml;charset=windows-1252", false, "Ol\u{E9}"], - ["Ol\u{E9}", "text/xml;charset=UTF-8", false, "Ol\u{E9}"], - ["Ol\u{E9}", "text/xml;charset=UTF-8", false, "Ol\u{E9}"], - ["Ol\u{E9}", "text/xml;charset=UTF-8", false, "Ol\u{E9}"], - ["Ol\u{E9}", "text/xml;charset=UTF-8", false, "Ol\u{E9}"], - ["Ol\u{E9}", "text/xml;charset=UTF-8", false, "Ol\u{E9}"], - ["Ol\u{E9}", "text/xml;charset=UTF-8", false, "Ol\u{E9}"], - ["Ol\xE9", "text/xml;charset=windows-1252", false, "Ol\u{E9}"], - [$mkUtf16("\xFE\xFFOl\x00\xE9", false), "text/xml", false, "Ol\u{E9}"], - [$mkUtf16("\xFF\xFEOl\xE9\x00", true), "text/xml", false, "Ol\u{E9}"], - [$mkUtf16("Ol\x00\xE9", false), "text/xml", false, "Ol\u{E9}"], - [$mkUtf16("Ol\xE9\x00", true), "text/xml", false, "Ol\u{E9}"], - [$mkUtf16("\xFE\xFFOl\x00\xE9", false), "text/xml", false, "Ol\u{E9}"], - [$mkUtf16("\xFF\xFEOl\xE9\x00", true), "text/xml", false, "Ol\u{E9}"], - [$mkUtf16("Ol\x00\xE9", false), "text/xml;charset=utf-16be", false, "Ol\u{E9}"], - [$mkUtf16("Ol\xE9\x00", true), "text/xml;charset=utf-16le", false, "Ol\u{E9}"], + ["Test", "text/html", "Test"], + ["Ol\xE9", "text/html", "Ol\u{E9}"], + ["Ol\u{E9}", "text/html;charset=utf8", "Ol\u{E9}"], + ["Ol\u{E9}", "text/html", "Ol\u{E9}"], + ["Test", "text/xml", "Test"], + ["Ol\u{E9}", "text/xml", "Ol\u{E9}"], + ["Ol\xE9", "text/xml;charset=windows-1252", "Ol\u{E9}"], + ["\u{FEFF}Ol\u{E9}", "text/xml;charset=windows-1252", "Ol\u{E9}"], + ["Ol\xE9", "text/xml", "Ol\u{E9}"], + ["Ol\xE9", "text/xml;charset=windows-1252", "Ol\u{E9}"], + ["Ol\u{E9}", "text/xml;charset=UTF-8", "Ol\u{E9}"], + ["Ol\u{E9}", "text/xml;charset=UTF-8", "Ol\u{E9}"], + ["Ol\u{E9}", "text/xml;charset=UTF-8", "Ol\u{E9}"], + ["Ol\u{E9}", "text/xml;charset=UTF-8", "Ol\u{E9}"], + ["Ol\u{E9}", "text/xml;charset=UTF-8", "Ol\u{E9}"], + ["Ol\u{E9}", "text/xml;charset=UTF-8", "Ol\u{E9}"], + ["Ol\xE9", "text/xml;charset=windows-1252", "Ol\u{E9}"], + [$mkUtf16("\xFE\xFFOl\x00\xE9", false), "text/xml", "Ol\u{E9}"], + [$mkUtf16("\xFF\xFEOl\xE9\x00", true), "text/xml", "Ol\u{E9}"], + [$mkUtf16("Ol\x00\xE9", false), "text/xml", "Ol\u{E9}"], + [$mkUtf16("Ol\xE9\x00", true), "text/xml", "Ol\u{E9}"], + [$mkUtf16("\xFE\xFFOl\x00\xE9", false), "text/xml", "Ol\u{E9}"], + [$mkUtf16("\xFF\xFEOl\xE9\x00", true), "text/xml", "Ol\u{E9}"], + [$mkUtf16("Ol\x00\xE9", false), "text/xml;charset=utf-16be", "Ol\u{E9}"], + [$mkUtf16("Ol\xE9\x00", true), "text/xml;charset=utf-16le", "Ol\u{E9}"], ]; } + + public function testFailToParseADocument(): void { + $in = "TestTest"; + $p = new DOMParser; + $d = $p->parseFromString($in, "text/xml"); + $this->assertSame("parsererror", $d->documentElement->tagName); + $this->assertSame("http://www.mozilla.org/newlayout/xml/parsererror.xml", $d->documentElement->namespaceURI); + $this->assertNotSame("", trim($d->documentElement->textContent)); + } + + public function testParseWithIncorrectType(): void { + $in = "Ol\u{E9}"; + $p = new DOMParser; + $this->expectException(\InvalidArgumentException::class); + $p->parseFromString($in, "text/plain"); + } }