From 7940aeac97c74990dab69dc7cc998caf1352c216 Mon Sep 17 00:00:00 2001 From: Dustin Wilson Date: Sat, 2 Oct 2021 08:19:07 -0500 Subject: [PATCH] More tests, updated name coercion --- composer.json | 9 ++++- composer.lock | 57 ++++++++++++++++++++++++-- lib/AbstractDocument.php | 2 +- lib/Document.php | 6 +-- lib/Element.php | 2 +- lib/traits/DocumentOrElement.php | 21 ++++++++++ lib/traits/EscapeString.php | 58 --------------------------- lib/traits/Moonwalk.php | 1 + lib/traits/Node.php | 1 + lib/traits/ParentNode.php | 1 + lib/traits/ToString.php | 1 + lib/traits/Walk.php | 1 + tests/cases/Document/TestDocument.php | 47 +++++++++++++++------- 13 files changed, 124 insertions(+), 83 deletions(-) delete mode 100644 lib/traits/EscapeString.php diff --git a/composer.json b/composer.json index 4c28921..10b7d86 100644 --- a/composer.json +++ b/composer.json @@ -35,7 +35,11 @@ "autoload-dev": { "psr-4": { "MensBeam\\HTML\\DOM\\Test\\": "tests/lib/", - "MensBeam\\HTML\\DOM\\TestCase\\": "tests/cases/" + "MensBeam\\HTML\\DOM\\TestCase\\": [ + "tests/cases/", + "tests/cases/Document", + "tests/cases/Serializer" + ] } }, "repositories": [ @@ -46,6 +50,7 @@ ], "require-dev": { "bamarni/composer-bin-plugin": "^1.3", - "daux/daux.io": "^0.16.0" + "daux/daux.io": "^0.16.0", + "mikey179/vfsstream": "^1.6" } } diff --git a/composer.lock b/composer.lock index 04d5643..6b28453 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "fa97f8577cdf49f5f1dc348f28cf141b", + "content-hash": "a7994e69dd088a79ef9b73cb8f581abd", "packages": [ { "name": "mensbeam/html-parser", @@ -12,7 +12,7 @@ "source": { "type": "git", "url": "mensbeam-gitea:MensBeam/HTML-Parser.git", - "reference": "a88a3ae1074c368592d2c6c1176eac5a4ad2ac02" + "reference": "6ee9f0ccc22f0f1094eda44a89069c5f2ef2280f" }, "require": { "ext-dom": "*", @@ -72,7 +72,7 @@ } ], "description": "Parses modern HTML text into a PHP DOMDocument", - "time": "2021-10-01T02:59:03+00:00" + "time": "2021-10-02T02:46:52+00:00" }, { "name": "mensbeam/intl", @@ -762,6 +762,57 @@ }, "time": "2020-12-25T05:00:37+00:00" }, + { + "name": "mikey179/vfsstream", + "version": "v1.6.10", + "source": { + "type": "git", + "url": "https://github.com/bovigo/vfsStream.git", + "reference": "250c0825537d501e327df879fb3d4cd751933b85" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/bovigo/vfsStream/zipball/250c0825537d501e327df879fb3d4cd751933b85", + "reference": "250c0825537d501e327df879fb3d4cd751933b85", + "shasum": "" + }, + "require": { + "php": ">=5.3.0" + }, + "require-dev": { + "phpunit/phpunit": "^4.5|^5.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.6.x-dev" + } + }, + "autoload": { + "psr-0": { + "org\\bovigo\\vfs\\": "src/main/php" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Frank Kleine", + "homepage": "http://frankkleine.de/", + "role": "Developer" + } + ], + "description": "Virtual file system to mock the real file system in unit tests.", + "homepage": "http://vfs.bovigo.org/", + "support": { + "issues": "https://github.com/bovigo/vfsStream/issues", + "source": "https://github.com/bovigo/vfsStream/tree/master", + "wiki": "https://github.com/bovigo/vfsStream/wiki" + }, + "time": "2021-09-25T08:05:01+00:00" + }, { "name": "psr/container", "version": "1.1.1", diff --git a/lib/AbstractDocument.php b/lib/AbstractDocument.php index 213abec..3a6d1f1 100644 --- a/lib/AbstractDocument.php +++ b/lib/AbstractDocument.php @@ -10,5 +10,5 @@ namespace MensBeam\HTML\DOM; // Exists so Document can extend methods from its traits. abstract class AbstractDocument extends \DOMDocument { - use DocumentOrElement, EscapeString, MagicProperties, ParentNode, Walk; + use DocumentOrElement, MagicProperties, ParentNode, Walk; } diff --git a/lib/Document.php b/lib/Document.php index 354d7e2..8687f21 100644 --- a/lib/Document.php +++ b/lib/Document.php @@ -386,7 +386,7 @@ class Document extends AbstractDocument { } public function save($filename, $options = null) { - return file_put_contents($filename, $this->serialize()); + return file_put_contents($filename, $this->saveHTML()); } public function saveHTML(\DOMNode $node = null): string { @@ -432,11 +432,11 @@ class Document extends AbstractDocument { } public function validate(): bool { - return true; + throw new DOMException(DOMException::NOT_SUPPORTED, __CLASS__ . ' is only meant for HTML'); } public function xinclude($options = null): bool { - return false; + throw new DOMException(DOMException::NOT_SUPPORTED, __CLASS__ . ' is only meant for HTML'); } diff --git a/lib/Element.php b/lib/Element.php index 4564532..7bc3678 100644 --- a/lib/Element.php +++ b/lib/Element.php @@ -11,7 +11,7 @@ use MensBeam\HTML\Parser; class Element extends \DOMElement { - use DocumentOrElement, EscapeString, MagicProperties, Moonwalk, ParentNode, ToString, Walk; + use DocumentOrElement, MagicProperties, Moonwalk, ParentNode, ToString, Walk; protected $_classList; diff --git a/lib/traits/DocumentOrElement.php b/lib/traits/DocumentOrElement.php index cafb683..f82b967 100644 --- a/lib/traits/DocumentOrElement.php +++ b/lib/traits/DocumentOrElement.php @@ -7,10 +7,14 @@ declare(strict_types=1); namespace MensBeam\HTML\DOM; +use MensBeam\HTML\Parser\NameCoercion; + // This exists because the DOM spec for some stupid reason doesn't give // DocumentFragment some methods. trait DocumentOrElement { + use NameCoercion; + public function getElementsByClassName(string $classNames): \DOMNodeList { # The list of elements with class names classNames for a node root is the # HTMLCollection returned by the following algorithm: @@ -57,4 +61,21 @@ trait DocumentOrElement { return ($isDocument) ? $document->xpath->query($query) : $document->xpath->query($query, $this); } + + + protected function escapeString(string $string, bool $attribute = false): string { + # Escaping a string (for the purposes of the algorithm above) consists of + # running the following steps: + + # 1. Replace any occurrence of the "&" character by the string "&". + # 2. Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the + # string " ". + $string = str_replace(['&', "\u{A0}"], ['&', ' '], $string); + # 3. If the algorithm was invoked in the attribute mode, replace any + # occurrences of the """ character by the string """. + # 4. If the algorithm was not invoked in the attribute mode, replace any + # occurrences of the "<" character by the string "<", and any + # occurrences of the ">" character by the string ">". + return ($attribute) ? str_replace('"', '"', $string) : str_replace(['<', '>'], ['<', '>'], $string); + } } \ No newline at end of file diff --git a/lib/traits/EscapeString.php b/lib/traits/EscapeString.php deleted file mode 100644 index c70fd42..0000000 --- a/lib/traits/EscapeString.php +++ /dev/null @@ -1,58 +0,0 @@ -" character by the string ">". - return ($attribute) ? str_replace('"', '"', $string) : str_replace(['<', '>'], ['<', '>'], $string); - } - - protected function coerceName(string $name): string { - // This matches the inverse of the production of NameChar in XML 1.0, - // with the added exclusion of ":" from allowed characters - // See https://www.w3.org/TR/REC-xml/#NT-NameStartChar - preg_match_all('/[^\-\.0-9\x{B7}\x{300}-\x{36F}\x{203F}-\x{2040}A-Za-z_\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/u', $name, $m); - foreach (array_unique($m[0], \SORT_STRING) as $c) { - $o = (new UTF8($c))->nextCode(); - $esc = "U".str_pad(strtoupper(dechex($o)), 6, "0", \STR_PAD_LEFT); - $name = str_replace($c, $esc, $name); - } - // Apply stricter rules to the first character - if (preg_match('/^[^A-Za-z_\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/u', $name, $m)) { - $c = $m[0]; - $o = (new UTF8($c))->nextCode(); - $esc = "U".str_pad(strtoupper(dechex($o)), 6, "0", \STR_PAD_LEFT); - $name = $esc.substr($name, strlen($c)); - } - return $name; - } - - protected function uncoerceName(string $name): string { - preg_match_all('/U[0-9A-F]{6}/', $name, $m); - foreach (array_unique($m[0], \SORT_STRING) as $o) { - $c = UTF8::encode(hexdec(substr($o, 1))); - $name = str_replace($o, $c, $name); - } - return $name; - } -} diff --git a/lib/traits/Moonwalk.php b/lib/traits/Moonwalk.php index 869014b..cc57d5d 100644 --- a/lib/traits/Moonwalk.php +++ b/lib/traits/Moonwalk.php @@ -8,6 +8,7 @@ declare(strict_types=1); namespace MensBeam\HTML\DOM; + trait Moonwalk { /** * Generator which walks backwards through the DOM from the node the method is diff --git a/lib/traits/Node.php b/lib/traits/Node.php index a1350a6..c5035dd 100644 --- a/lib/traits/Node.php +++ b/lib/traits/Node.php @@ -8,6 +8,7 @@ declare(strict_types=1); namespace MensBeam\HTML\DOM; + // Extensions to PHP's DOM cannot inherit from an extended Node parent, so a // trait is the next best thing... trait Node { diff --git a/lib/traits/ParentNode.php b/lib/traits/ParentNode.php index d992122..39458ec 100644 --- a/lib/traits/ParentNode.php +++ b/lib/traits/ParentNode.php @@ -8,6 +8,7 @@ declare(strict_types=1); namespace MensBeam\HTML\DOM; + # 4.2.6. Mixin ParentNode trait ParentNode { use Node, ParentNodePolyfill; diff --git a/lib/traits/ToString.php b/lib/traits/ToString.php index d7dfb07..59f3dd0 100644 --- a/lib/traits/ToString.php +++ b/lib/traits/ToString.php @@ -8,6 +8,7 @@ declare(strict_types=1); namespace MensBeam\HTML\DOM; + trait ToString { public function __toString(): string { $frag = $this->ownerDocument->createDocumentFragment(); diff --git a/lib/traits/Walk.php b/lib/traits/Walk.php index 6e1474a..65cdec8 100644 --- a/lib/traits/Walk.php +++ b/lib/traits/Walk.php @@ -8,6 +8,7 @@ declare(strict_types=1); namespace MensBeam\HTML\DOM; + trait Walk { /** * Generator which walks down the DOM from the node the method is being run on. diff --git a/tests/cases/Document/TestDocument.php b/tests/cases/Document/TestDocument.php index 96d7c67..2d4c127 100644 --- a/tests/cases/Document/TestDocument.php +++ b/tests/cases/Document/TestDocument.php @@ -11,10 +11,10 @@ use MensBeam\HTML\DOM\{ DOMException, Element, ElementMap, - Exception, HTMLTemplateElement }; -use MensBeam\HTML\Parser; +use MensBeam\HTML\Parser, + org\bovigo\vfs\vfsStream; /** @covers \MensBeam\HTML\DOM\Document */ @@ -63,26 +63,27 @@ class TestDocument extends \PHPUnit\Framework\TestCase { } + public function provideDisabledMethods(): iterable { + return [ + [ 'loadXML', 'ook' ], + [ 'saveXML', null ], + [ 'validate', null ], + [ 'xinclude', null ], + ]; + } + /** + * @dataProvider provideDisabledMethods * @covers \MensBeam\HTML\DOM\Document::loadXML * @covers \MensBeam\HTML\DOM\Document::saveXML * @covers \MensBeam\HTML\DOM\Document::validate * @covers \MensBeam\HTML\DOM\Document::xinclude */ - public function testDisabledMethods() { + public function testDisabledMethods(string $methodName, ?string $argument): void { $this->expectException(DOMException::class); $this->expectExceptionCode(DOMException::NOT_SUPPORTED); $d = new Document(); - $d->loadXML('ook'); - - $d = new Document(); - $d->saveXML('ook'); - - $d = new Document(); - $d->validate(); - - $d = new Document(); - $d->xinclude('ook'); + $d->$methodName($argument); } @@ -90,6 +91,8 @@ class TestDocument extends \PHPUnit\Framework\TestCase { * @covers \MensBeam\HTML\DOM\Document::__construct * @covers \MensBeam\HTML\DOM\Document::loadDOM * @covers \MensBeam\HTML\DOM\Document::loadHTML + * @covers \MensBeam\HTML\DOM\Document::preInsertionValidity + * @covers \MensBeam\HTML\DOM\Document::__get_quirksMode */ public function testDocumentCreation(): void { // Test null source @@ -159,7 +162,7 @@ class TestDocument extends \PHPUnit\Framework\TestCase { * @covers \MensBeam\HTML\DOM\Document::validateAndExtract */ public function testElementCreationNS(?string $nsIn, ?string $nsOut, string $localIn, string $localOut, string $class): void { - $d = new Document; + $d = new Document(); $n = $d->createElementNS($nsIn, $localIn); $this->assertInstanceOf($class, $n); $this->assertNotNull($n->ownerDocument); @@ -168,6 +171,20 @@ class TestDocument extends \PHPUnit\Framework\TestCase { } + /** + * @covers \MensBeam\HTML\DOM\Document::save + * @covers \MensBeam\HTML\DOM\Document::saveHTMLFile + */ + public function testFileSaving(): void { + $vfs = vfsStream::setup('DOM', 0777); + $d = new Document(); + $d->appendChild($d->createElement('html')); + $path = $vfs->url() . '/test.html'; + $d->save($path); + $this->assertSame('', file_get_contents($path)); + } + + /** @covers \MensBeam\HTML\DOM\Document::importNode */ public function testImportingNodes() { $d = new Document(); @@ -235,4 +252,4 @@ class TestDocument extends \PHPUnit\Framework\TestCase { $d->body->removeChild($t); $this->assertFalse(ElementMap::has($t)); } -} +} \ No newline at end of file