diff --git a/README.md b/README.md index 88fa1fc..87fbe88 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,6 @@ The primary aim of this library is accuracy. If the document model differs from 1. Due to PHP's DOM being designed for XML 1.0 Second Edition, element and attribute names which are illegal in XML 1.0 Second Edition are mangled as recommended by the specification. 2. Due to a PHP bug which severely degrades performance with large documents and in consideration of existing PHP software, HTML elements are placed in the null namespace rather than in the HTML namespace. 3. While `DOMDocumentType` can be extended and registered by PHP's `DOMDocument::registerNodeClass` `DOMImplementation` cannot; this means that doctypes created with `DOMImplementation::createDocumentType` can't ever be a registered class. Therefore, doctypes remain as `DOMDocumentType` in this library and retain the same limitations as ones in PHP's DOM. -4. The DOM specification mentions that [`HTMLCollection`][a] has to be kept around for backwards compatibility in browsers, but any new implementations should use [`sequence`][b] instead which is essentially just a typed array object of some kind. Any methods should also return a copy of an object instead of a reference to the platform object, meaning the bane of any web developer's existence -- live lists -- shouldn't be in any new additions to the DOM. Since this implementation is not a fully userland PHP implementation of the DOM but instead an extension of it, this implementation will use `DOMNodeList` where the specification says to use an `HTMLCollection` and an array where the specification says to use a `sequence`. +4. The DOM specification mentions that [`HTMLCollection`][a] has to be kept around for backwards compatibility in browsers, but any new implementations should use [`sequence`][b] instead which is essentially just a typed array object of some kind. Any methods should also return a copy of an object instead of a reference to the platform object, meaning the bane of any web developer's existence -- live lists -- shouldn't be in any new additions to the DOM. Since this implementation is not a fully userland PHP implementation of the DOM but instead an extension of it, this implementation will use `DOMNodeList` where the specification says to use an `HTMLCollection` and an array where the specification says to use a `sequence`. In addition, if the specification states to return a static `NodeList` this implementation will use `MensBeam\\HTML\\DOM\\NodeList` instead; this is because `DOMNodeList` is always live in PHP. 5. Aside from `HTMLTemplateElement` there are no other specific element classes such as `HTMLAnchorElement`, `HTMLDivElement`, etc. and therefore are no DOM methods and properties that are specific to those elements. Implementing them is possible, but we weighed it against its utility as each specific element slows down the DOM seemingly exponentially especially when parsing serialized HTML because each element has to be converted to the specific variety manually and recursively. For instance, when parsing the WHATWG's single page HTML specification (which is an absurdly enormous HTML document on the very edge of what we should be able to parse) in our tests it takes around 6.5 seconds; with specific element classes it instead takes *15 minutes*. [`phpgt/dom`][c] mitigates this by only converting when querying for elements, but it's still slow. We decided not to go this route. 6. This implementation will not implement the `NodeIterator` and `TreeWalker` APIs. They are horribly conceived and impractical APIs that few people actually use because it's literally easier to write recursive loops to walk through the DOM than it is to use those APIs. They have instead been replaced with the `ChildNode::moonwalk`, `ParentNode::walk`, `ChildNode::walkFollowing`, and `ChildNode::walkPreceding` generators. \ No newline at end of file diff --git a/composer.json b/composer.json index f13f55d..3fc1ae9 100644 --- a/composer.json +++ b/composer.json @@ -6,7 +6,8 @@ "php": ">=8.0", "ext-dom": "*", "mensbeam/html-parser": ">=1.0", - "mensbeam/framework": "^1.0" + "mensbeam/framework": "^1.0", + "symfony/css-selector": "^5.3" }, "scripts": { "post-install-cmd": ["@composer bin all install"], diff --git a/composer.lock b/composer.lock index 5dab1c7..c6e2b3f 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "c11fdde4a081e999e47b166934319a92", + "content-hash": "66041b42116f0d3ea068de467bb0a990", "packages": [ { "name": "mensbeam/framework", @@ -261,6 +261,155 @@ "source": "https://github.com/php-fig/http-message/tree/master" }, "time": "2016-08-06T14:39:51+00:00" + }, + { + "name": "symfony/css-selector", + "version": "v5.3.4", + "source": { + "type": "git", + "url": "https://github.com/symfony/css-selector.git", + "reference": "7fb120adc7f600a59027775b224c13a33530dd90" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/css-selector/zipball/7fb120adc7f600a59027775b224c13a33530dd90", + "reference": "7fb120adc7f600a59027775b224c13a33530dd90", + "shasum": "" + }, + "require": { + "php": ">=7.2.5", + "symfony/polyfill-php80": "^1.16" + }, + "type": "library", + "autoload": { + "psr-4": { + "Symfony\\Component\\CssSelector\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Jean-François Simon", + "email": "jeanfrancois.simon@sensiolabs.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Converts CSS selectors to XPath expressions", + "homepage": "https://symfony.com", + "support": { + "source": "https://github.com/symfony/css-selector/tree/v5.3.4" + }, + "funding": [ + { + "url": "https://symfony.com/sponsor", + "type": "custom" + }, + { + "url": "https://github.com/fabpot", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/symfony/symfony", + "type": "tidelift" + } + ], + "time": "2021-07-21T12:38:00+00:00" + }, + { + "name": "symfony/polyfill-php80", + "version": "v1.23.1", + "source": { + "type": "git", + "url": "https://github.com/symfony/polyfill-php80.git", + "reference": "1100343ed1a92e3a38f9ae122fc0eb21602547be" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/polyfill-php80/zipball/1100343ed1a92e3a38f9ae122fc0eb21602547be", + "reference": "1100343ed1a92e3a38f9ae122fc0eb21602547be", + "shasum": "" + }, + "require": { + "php": ">=7.1" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-main": "1.23-dev" + }, + "thanks": { + "name": "symfony/polyfill", + "url": "https://github.com/symfony/polyfill" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Polyfill\\Php80\\": "" + }, + "files": [ + "bootstrap.php" + ], + "classmap": [ + "Resources/stubs" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Ion Bazan", + "email": "ion.bazan@gmail.com" + }, + { + "name": "Nicolas Grekas", + "email": "p@tchwork.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony polyfill backporting some PHP 8.0+ features to lower PHP versions", + "homepage": "https://symfony.com", + "keywords": [ + "compatibility", + "polyfill", + "portable", + "shim" + ], + "support": { + "source": "https://github.com/symfony/polyfill-php80/tree/v1.23.1" + }, + "funding": [ + { + "url": "https://symfony.com/sponsor", + "type": "custom" + }, + { + "url": "https://github.com/fabpot", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/symfony/symfony", + "type": "tidelift" + } + ], + "time": "2021-07-28T13:41:28+00:00" } ], "packages-dev": [ @@ -2156,89 +2305,6 @@ ], "time": "2021-02-19T12:13:01+00:00" }, - { - "name": "symfony/polyfill-php80", - "version": "v1.23.1", - "source": { - "type": "git", - "url": "https://github.com/symfony/polyfill-php80.git", - "reference": "1100343ed1a92e3a38f9ae122fc0eb21602547be" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/symfony/polyfill-php80/zipball/1100343ed1a92e3a38f9ae122fc0eb21602547be", - "reference": "1100343ed1a92e3a38f9ae122fc0eb21602547be", - "shasum": "" - }, - "require": { - "php": ">=7.1" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-main": "1.23-dev" - }, - "thanks": { - "name": "symfony/polyfill", - "url": "https://github.com/symfony/polyfill" - } - }, - "autoload": { - "psr-4": { - "Symfony\\Polyfill\\Php80\\": "" - }, - "files": [ - "bootstrap.php" - ], - "classmap": [ - "Resources/stubs" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Ion Bazan", - "email": "ion.bazan@gmail.com" - }, - { - "name": "Nicolas Grekas", - "email": "p@tchwork.com" - }, - { - "name": "Symfony Community", - "homepage": "https://symfony.com/contributors" - } - ], - "description": "Symfony polyfill backporting some PHP 8.0+ features to lower PHP versions", - "homepage": "https://symfony.com", - "keywords": [ - "compatibility", - "polyfill", - "portable", - "shim" - ], - "support": { - "source": "https://github.com/symfony/polyfill-php80/tree/v1.23.1" - }, - "funding": [ - { - "url": "https://symfony.com/sponsor", - "type": "custom" - }, - { - "url": "https://github.com/fabpot", - "type": "github" - }, - { - "url": "https://tidelift.com/funding/github/packagist/symfony/symfony", - "type": "tidelift" - } - ], - "time": "2021-07-28T13:41:28+00:00" - }, { "name": "symfony/process", "version": "v5.3.7", diff --git a/lib/Attr.php b/lib/Attr.php index 126e6b3..fc5def8 100644 --- a/lib/Attr.php +++ b/lib/Attr.php @@ -1,7 +1,7 @@ walk(function($n) { return ($n instanceof Element && $n->hasAttribute('id')); }, true); diff --git a/lib/DocumentFragment.php b/lib/DocumentFragment.php index 8ddf253..cd48db3 100644 --- a/lib/DocumentFragment.php +++ b/lib/DocumentFragment.php @@ -1,7 +1,7 @@ walk(function($n) use($elementId) { + return ($n instanceof Element && $n->getAttribute('id') === $elementId); + })->current(); + } + + public function __toString() { return $this->ownerDocument->saveHTML($this); } diff --git a/lib/Element.php b/lib/Element.php index 1f34722..d3fb8d0 100644 --- a/lib/Element.php +++ b/lib/Element.php @@ -1,7 +1,7 @@ _length; + } + + + public function __construct(iterable $iterable) { + // Per the specification one cannot create a NodeList via its constructor, but + // this implementation is not going to build up the framework for that. + + // Check types while also unpacking the traversable. + $array = []; + foreach ($iterable as $i) { + if (!$i instanceof Node && !$i instanceof \DOMDocumentType) { + $type = gettype($i); + if ($type === 'object') { + $type = get_class($i); + } + throw new Exception(Exception::ARGUMENT_TYPE_ERROR, 1, 'traversable', 'Node|\\DOMDocumentType', $type); + } + + $array[] = $i; + } + + $this->storage = $array; + $this->_length = count($array); + } + + public function count(): int { + return $this->_length; + } + + public function current(): Node|\DOMDocumentType|null { + return $this->item($this->position); + } + + public function item(int $index): Node|\DOMDocumentType|null { + # The item(index) method must return the indexth node in the collection. If + # there is no indexth node in the collection, then the method must return null. + if ($index >= $this->_length) { + return null; + } + + return $this->storage[$index]; + } + + public function key(): int { + return $this->position; + } + + public function next(): void { + ++$this->position; + } + + public function rewind(): void { + $this->position = 0; + } + + public function offsetExists($offset): bool { + return isset($this->storage[$offset]); + } + + public function offsetGet($offset): Node|\DOMDocumentType|null { + return $this->item($offset); + } + + public function offsetSet($offset, $value): void { + // NodeLists are immutable + } + + public function offsetUnset($offset): void { + // Nodelists are immutable + } + + public function valid() { + return array_key_exists($this->position, $this->storage); + } +} diff --git a/lib/ProcessingInstruction.php b/lib/ProcessingInstruction.php index 3a3d299..7d4b4ab 100644 --- a/lib/ProcessingInstruction.php +++ b/lib/ProcessingInstruction.php @@ -1,7 +1,7 @@ _length; } - public function current(): string { + public function current(): ?string { return $this->item($this->position); } - public function item(int $index): string { + public function item(int $index): ?string { + # The item(index) method steps are: + # 1. If index is equal to or greater than this’s token set’s size, then return null. + if ($index >= $this->_length) { + return null; + } + # 2. Return this’s token set[index]. return $this->tokenSet[$index]; } diff --git a/lib/traits/ChildNode.php b/lib/traits/ChildNode.php index b80cd05..c79eff7 100644 --- a/lib/traits/ChildNode.php +++ b/lib/traits/ChildNode.php @@ -1,7 +1,7 @@ scopeMatchSelector($selectors); + return ($result !== null) ? $result[0] : null; + } + + public function querySelectorAll(string $selectors): NodeList { + # The querySelectorAll(selectors) method steps are to return the static result + # of running scope-match a selectors string selectors against this. + $nodeList = $this->scopeMatchSelector($selectors); + return new NodeList($nodeList); + } + public function removeChild($child) { $result = parent::removeChild($child); if ($result !== false && $child instanceof Element) { @@ -153,7 +169,7 @@ trait ParentNode { } - protected function preInsertionValidity(\DOMDocumentType|Node $node, \DOMDocumentType|Node $child = null) { + private function preInsertionValidity(\DOMDocumentType|Node $node, \DOMDocumentType|Node $child = null) { // "parent" in the spec comments below is $this # 1. If parent is not a Document, DocumentFragment, or Element node, then throw @@ -285,4 +301,28 @@ trait ParentNode { } } } + + private function scopeMatchSelector(string $selectors): ?\DOMNodeList { + # To scope-match a selectors string selectors against a node, run these steps: + # 1. Let s be the result of parse a selector selectors. [SELECTORS4] + // This implementation will instead convert the CSS selector to an XPath query + // using Symfony's CSS selector converter library. + try { + $converter = new CssSelectorConverter(); + $s = $converter->toXPath($selectors); + } catch (\Exception $e) { + # 2. If s is failure, then throw a "SyntaxError" DOMException. + throw new DOMException(DOMException::SYNTAX_ERROR); + } + + # 3. Return the result of match a selector against a tree with s and node’s root + # using scoping root node. [SELECTORS4]. + $doc = ($this instanceof Document) ? $this : $this->ownerDocument; + $nodeList = $doc->xpath->query($s, $this); + if ($nodeList->length === 0) { + return null; + } + + return $nodeList; + } } \ No newline at end of file diff --git a/lib/traits/ToString.php b/lib/traits/ToString.php index 59f3dd0..b5859f0 100644 --- a/lib/traits/ToString.php +++ b/lib/traits/ToString.php @@ -1,7 +1,7 @@ createDocumentFragment(); + $o = $df->appendChild($d->createElement('span')); + $o->setAttribute('id', 'eek'); + $this->assertSame(Element::class, $df->getElementById('eek')::class); + $this->assertNull($df->getElementById('ook')); + } + /** @covers \MensBeam\HTML\DOM\DocumentFragment::__get_host */ public function testGetHost(): void { $d = new Document(); diff --git a/tests/cases/TestDocumentOrElement.php b/tests/cases/TestDocumentOrElement.php index c4a3c29..9905856 100644 --- a/tests/cases/TestDocumentOrElement.php +++ b/tests/cases/TestDocumentOrElement.php @@ -1,7 +1,7 @@ expectException(Exception::class); + $this->expectExceptionCode(Exception::ARGUMENT_TYPE_ERROR); + $closure(); + } + + + /** @covers \MensBeam\HTML\DOM\NodeList::count */ + public function testCount(): void { + $d = new Document(); + $list = new NodeList([ + $d->createElement('ook'), + $d->createTextNode('eek'), + $d->createComment('ack') + ]); + $this->assertEquals(3, count($list)); + } + + + /** @covers \MensBeam\HTML\DOM\NodeList::item */ + public function testItem(): void { + $d = new Document(); + $list = new NodeList([ + $d->createElement('ook'), + $d->createTextNode('eek'), + $d->createComment('ack') + ]); + $this->assertNull($list->item(42)); + } + + + /** + * @covers \MensBeam\HTML\DOM\NodeList::current + * @covers \MensBeam\HTML\DOM\NodeList::item + * @covers \MensBeam\HTML\DOM\NodeList::key + * @covers \MensBeam\HTML\DOM\NodeList::next + * @covers \MensBeam\HTML\DOM\NodeList::rewind + * @covers \MensBeam\HTML\DOM\NodeList::offsetExists + * @covers \MensBeam\HTML\DOM\NodeList::offsetGet + * @covers \MensBeam\HTML\DOM\NodeList::valid + */ + public function testIteration(): void { + $d = new Document(); + $list = new NodeList([ + $d->createElement('ook'), + $d->createTextNode('eek'), + $d->createComment('ack') + ]); + + foreach ($list as $key => $node) { + $this->assertSame($node, $list[$key]); + // test offsetExists + $this->assertTrue(isset($list[$key])); + } + } + + + /** @covers \MensBeam\HTML\DOM\NodeList::__get_length */ + public function testPropertyGetLength(): void { + $d = new Document(); + $list = new NodeList([ + $d->createElement('ook'), + $d->createTextNode('eek'), + $d->createComment('ack') + ]); + $this->assertEquals(3, $list->length); + } +} \ No newline at end of file diff --git a/tests/cases/TestNodeTrait.php b/tests/cases/TestNodeTrait.php index 88cafb6..8f19bc9 100644 --- a/tests/cases/TestNodeTrait.php +++ b/tests/cases/TestNodeTrait.php @@ -1,7 +1,7 @@
ook
eek
'); + $div = $d->body->querySelector('div'); + $this->assertSame('div', $div->nodeName); + + $divs = $d->body->querySelectorAll('div'); + $this->assertEquals(2, $divs->length); + $this->assertSame('eek', $divs->item(1)->getAttribute('id')); + + $this->assertNull($d->querySelector('.ook')); + } + + + /** + * @covers \MensBeam\HTML\DOM\ParentNode::querySelector + * @covers \MensBeam\HTML\DOM\ParentNode::querySelectorAll + * @covers \MensBeam\HTML\DOM\ParentNode::scopeMatchSelector + */ + public function testQuerySelectorFailure(): void { + $this->expectException(DOMException::class); + $this->expectExceptionCode(DOMException::SYNTAX_ERROR); + $d = new Document(); + $d->querySelector('ook?'); + } + + /** @covers \MensBeam\HTML\DOM\ParentNode::replaceChild */ public function testReplaceChild(): void { $d = new Document(); diff --git a/tests/cases/TestTokenList.php b/tests/cases/TestTokenList.php index ec49e58..58b4247 100644 --- a/tests/cases/TestTokenList.php +++ b/tests/cases/TestTokenList.php @@ -1,7 +1,7 @@ appendChild($d->createElement('html')); + $e->classList->add('ook', 'eek', 'ack', 'ookeek'); + $this->assertNull($e->classList->item(42)); + } + + /** * @covers \MensBeam\HTML\DOM\TokenList::current * @covers \MensBeam\HTML\DOM\TokenList::item diff --git a/tests/cases/serializer/TestSerializer.php b/tests/cases/serializer/TestSerializer.php index 546b647..974b1cc 100644 --- a/tests/cases/serializer/TestSerializer.php +++ b/tests/cases/serializer/TestSerializer.php @@ -1,7 +1,7 @@ cases/TestElement.php cases/TestElementMap.php cases/TestLeafNode.php + cases/TestNodeList.php cases/TestNodeTrait.php cases/TestParentNode.php cases/TestTokenList.php