diff --git a/lib/DOMParser.php b/lib/DOMParser.php
index 09ce276..ba65bf7 100644
--- a/lib/DOMParser.php
+++ b/lib/DOMParser.php
@@ -124,7 +124,8 @@ XMLDECL;
protected function createDocumentXml(string $string): \DOMDocument {
$document = new \DOMDocument;
if (!$document->loadXML($string, \LIBXML_NONET | \LIBXML_BIGLINES | \LIBXML_COMPACT |\LIBXML_NOWARNING | \LIBXML_NOERROR)) {
- throw new \Exception(libxml_get_last_error()->message);
+ $err = libxml_get_last_error();
+ throw new \Exception($err->code.": \"".trim($err->message)."\" on line ".$err->line.", column ".$err->column);
}
return $document;
}
diff --git a/tests/cases/TestDOMParser.php b/tests/cases/TestDOMParser.php
index fd8542c..edb36b2 100644
--- a/tests/cases/TestDOMParser.php
+++ b/tests/cases/TestDOMParser.php
@@ -13,12 +13,11 @@ use MensBeam\HTML\DOMParser;
*/
class TestDOMParser extends \PHPUnit\Framework\TestCase {
/** @dataProvider provideDocuments */
- public function testParseADocument(string $input, string $type, bool $parseError, string $exp): void {
+ public function testParseADocument(string $input, string $type, string $exp): void {
$p = new DOMParser;
$document = $p->parseFromString($input, $type);
- $root = $parseError ? "parsererror" : "html";
$this->assertSame($exp, $document->documentElement->textContent);
- $this->assertSame($root, $document->documentElement->tagName);
+ $this->assertSame("html", $document->documentElement->tagName);
}
public function provideDocuments(): iterable {
@@ -27,31 +26,47 @@ class TestDOMParser extends \PHPUnit\Framework\TestCase {
return preg_replace("/[\x{01}-\x{7F}]/s", $replacement, $s);
};
return [
- ["Test", "text/html", false, "Test"],
- ["Ol\xE9", "text/html", false, "Ol\u{E9}"],
- ["Ol\u{E9}", "text/html;charset=utf8", false, "Ol\u{E9}"],
- ["Ol\u{E9}", "text/html", false, "Ol\u{E9}"],
- ["Test", "text/xml", false, "Test"],
- ["Ol\u{E9}", "text/xml", false, "Ol\u{E9}"],
- ["Ol\xE9", "text/xml;charset=windows-1252", false, "Ol\u{E9}"],
- ["\u{FEFF}Ol\u{E9}", "text/xml;charset=windows-1252", false, "Ol\u{E9}"],
- ["Ol\xE9", "text/xml", false, "Ol\u{E9}"],
- ["Ol\xE9", "text/xml;charset=windows-1252", false, "Ol\u{E9}"],
- ["Ol\u{E9}", "text/xml;charset=UTF-8", false, "Ol\u{E9}"],
- ["Ol\u{E9}", "text/xml;charset=UTF-8", false, "Ol\u{E9}"],
- ["Ol\u{E9}", "text/xml;charset=UTF-8", false, "Ol\u{E9}"],
- ["Ol\u{E9}", "text/xml;charset=UTF-8", false, "Ol\u{E9}"],
- ["Ol\u{E9}", "text/xml;charset=UTF-8", false, "Ol\u{E9}"],
- ["Ol\u{E9}", "text/xml;charset=UTF-8", false, "Ol\u{E9}"],
- ["Ol\xE9", "text/xml;charset=windows-1252", false, "Ol\u{E9}"],
- [$mkUtf16("\xFE\xFFOl\x00\xE9", false), "text/xml", false, "Ol\u{E9}"],
- [$mkUtf16("\xFF\xFEOl\xE9\x00", true), "text/xml", false, "Ol\u{E9}"],
- [$mkUtf16("Ol\x00\xE9", false), "text/xml", false, "Ol\u{E9}"],
- [$mkUtf16("Ol\xE9\x00", true), "text/xml", false, "Ol\u{E9}"],
- [$mkUtf16("\xFE\xFFOl\x00\xE9", false), "text/xml", false, "Ol\u{E9}"],
- [$mkUtf16("\xFF\xFEOl\xE9\x00", true), "text/xml", false, "Ol\u{E9}"],
- [$mkUtf16("Ol\x00\xE9", false), "text/xml;charset=utf-16be", false, "Ol\u{E9}"],
- [$mkUtf16("Ol\xE9\x00", true), "text/xml;charset=utf-16le", false, "Ol\u{E9}"],
+ ["Test", "text/html", "Test"],
+ ["Ol\xE9", "text/html", "Ol\u{E9}"],
+ ["Ol\u{E9}", "text/html;charset=utf8", "Ol\u{E9}"],
+ ["Ol\u{E9}", "text/html", "Ol\u{E9}"],
+ ["Test", "text/xml", "Test"],
+ ["Ol\u{E9}", "text/xml", "Ol\u{E9}"],
+ ["Ol\xE9", "text/xml;charset=windows-1252", "Ol\u{E9}"],
+ ["\u{FEFF}Ol\u{E9}", "text/xml;charset=windows-1252", "Ol\u{E9}"],
+ ["Ol\xE9", "text/xml", "Ol\u{E9}"],
+ ["Ol\xE9", "text/xml;charset=windows-1252", "Ol\u{E9}"],
+ ["Ol\u{E9}", "text/xml;charset=UTF-8", "Ol\u{E9}"],
+ ["Ol\u{E9}", "text/xml;charset=UTF-8", "Ol\u{E9}"],
+ ["Ol\u{E9}", "text/xml;charset=UTF-8", "Ol\u{E9}"],
+ ["Ol\u{E9}", "text/xml;charset=UTF-8", "Ol\u{E9}"],
+ ["Ol\u{E9}", "text/xml;charset=UTF-8", "Ol\u{E9}"],
+ ["Ol\u{E9}", "text/xml;charset=UTF-8", "Ol\u{E9}"],
+ ["Ol\xE9", "text/xml;charset=windows-1252", "Ol\u{E9}"],
+ [$mkUtf16("\xFE\xFFOl\x00\xE9", false), "text/xml", "Ol\u{E9}"],
+ [$mkUtf16("\xFF\xFEOl\xE9\x00", true), "text/xml", "Ol\u{E9}"],
+ [$mkUtf16("Ol\x00\xE9", false), "text/xml", "Ol\u{E9}"],
+ [$mkUtf16("Ol\xE9\x00", true), "text/xml", "Ol\u{E9}"],
+ [$mkUtf16("\xFE\xFFOl\x00\xE9", false), "text/xml", "Ol\u{E9}"],
+ [$mkUtf16("\xFF\xFEOl\xE9\x00", true), "text/xml", "Ol\u{E9}"],
+ [$mkUtf16("Ol\x00\xE9", false), "text/xml;charset=utf-16be", "Ol\u{E9}"],
+ [$mkUtf16("Ol\xE9\x00", true), "text/xml;charset=utf-16le", "Ol\u{E9}"],
];
}
+
+ public function testFailToParseADocument(): void {
+ $in = "TestTest";
+ $p = new DOMParser;
+ $d = $p->parseFromString($in, "text/xml");
+ $this->assertSame("parsererror", $d->documentElement->tagName);
+ $this->assertSame("http://www.mozilla.org/newlayout/xml/parsererror.xml", $d->documentElement->namespaceURI);
+ $this->assertNotSame("", trim($d->documentElement->textContent));
+ }
+
+ public function testParseWithIncorrectType(): void {
+ $in = "Ol\u{E9}";
+ $p = new DOMParser;
+ $this->expectException(\InvalidArgumentException::class);
+ $p->parseFromString($in, "text/plain");
+ }
}