From 89a3f81acccc5595f247a4223578d0fedd4a089e Mon Sep 17 00:00:00 2001 From: Dustin Wilson Date: Sun, 28 Nov 2021 23:36:14 -0600 Subject: [PATCH] More work on Document --- README.md | 5 ++- lib/Document.php | 83 ++++++++++++++++++++++++++++++++++++ tests/cases/TestDocument.php | 48 +++++++++++++++++++++ 3 files changed, 134 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a0a4395..298ae78 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,11 @@ [c]: https://packagist.org/packages/phpgt/dom [d]: https://dom.spec.whatwg.org [e]: #limitations +[f]: https://html.spec.whatwg.org/multipage/dom.html # HTML DOM # -Modern DOM library written in PHP for HTML documents. This library is an attempt to implement the [WHATWG's DOM specification][d] through a userland extension and encapsulation of PHP's built-in DOM. It exists because PHP's DOM is inaccurate, inadequate for use with any HTML, and extremely buggy. This implementation aims to fix as much as possible the inaccuracies of the PHP DOM, add in features necessary for modern HTML development, and circumvent most of the bugs. +Modern DOM library written in PHP for HTML documents. This library is an attempt to implement the [WHATWG's DOM specification][d] and [WHATWG HTML DOM extensions specification][f] through a userland extension and encapsulation of PHP's built-in DOM. It exists because PHP's DOM is inaccurate, inadequate for use with any HTML, and extremely buggy. This implementation aims to fix as much as possible the inaccuracies of the PHP DOM, add in features necessary for modern HTML development, and circumvent most of the bugs. ## Usage ## @@ -41,7 +42,7 @@ Coming soon ## Limitations & Differences from Specification ## -The primary aim of this library is accuracy. However, due either to limitations imposed by PHP's DOM, by assumptions made by the specification that aren't applicable to a PHP library, or simply because of impractability some changes have needed to be made. These are as follows: +The primary aim of this library is accuracy. However, due either to limitations imposed by PHP's DOM, by assumptions made by the specification that aren't applicable to a PHP library, or simply because of impracticality some changes have needed to be made. These are as follows: 1. Any mention of scripting or anything necessary because of scripting (such as the `ElementCreationOptions` options dictionary on `Document::createElement`) will not be implemented. 2. Due to a PHP bug which severely degrades performance with large documents and in consideration of existing PHP software and because of bizarre uncircumventable `xmlns` attribute bugs when the document is in the HTML namespace, HTML elements in HTML documents are placed in the null namespace internally rather than in the HTML namespace. However, externally they will be shown as having the HTML namespace. Even though null namespaced elements do not exist in the HTML specification one can create them using the DOM. However, in this implementation they will be treated as HTML namespaced elements due to the HTML namespace limitation. diff --git a/lib/Document.php b/lib/Document.php index a297238..4eba3c1 100644 --- a/lib/Document.php +++ b/lib/Document.php @@ -81,6 +81,52 @@ class Document extends Node { return $this->_URL; } + protected function __get_embeds(): HTMLCollection { + # The embeds attribute must return an HTMLCollection rooted at the Document + # node, whose filter matches only embed elements. + // Because of how namespaces are handled internally they're null when a HTML document. + $namespace = (!$this instanceof XMLDocument) ? '' : Node::HTML_NAMESPACE; + // HTMLCollections cannot be created from their constructors normally. + return Reflection::createFromProtectedConstructor(__NAMESPACE__ . '\\HTMLCollection', $this->innerNode, $this->innerNode->xpath->query(".//embed[namespace-uri()='$namespace']")); + } + + protected function __get_forms(): HTMLCollection { + # The forms attribute must return an HTMLCollection rooted at the Document node, + # whose filter matches only form elements. + // Because of how namespaces are handled internally they're null when a HTML document. + $namespace = (!$this instanceof XMLDocument) ? '' : Node::HTML_NAMESPACE; + // HTMLCollections cannot be created from their constructors normally. + return Reflection::createFromProtectedConstructor(__NAMESPACE__ . '\\HTMLCollection', $this->innerNode, $this->innerNode->xpath->query(".//form[namespace-uri()='$namespace']")); + } + + protected function __get_head(): ?Element { + # The head element of a document is the first head element that is a child of + # the html element, if there is one, or null otherwise. + # The head attribute, on getting, must return the head element of the document + # (a head element or null). + $documentElement = $this->innerNode->documentElement; + if ($documentElement !== null) { + $children = $documentElement->childNodes; + foreach ($children as $child) { + if ($child instanceof \DOMElement && $child->namespaceURI === null && $child->tagName === 'head') { + return $this->innerNode->getWrapperNode($child); + break; + } + } + } + + return null; + } + + protected function __get_images(): HTMLCollection { + # The images attribute must return an HTMLCollection rooted at the Document + # node, whose filter matches only img elements. + // Because of how namespaces are handled internally they're null when a HTML document. + $namespace = (!$this instanceof XMLDocument) ? '' : Node::HTML_NAMESPACE; + // HTMLCollections cannot be created from their constructors normally. + return Reflection::createFromProtectedConstructor(__NAMESPACE__ . '\\HTMLCollection', $this->innerNode, $this->innerNode->xpath->query(".//img[namespace-uri()='$namespace']")); + } + protected function __get_implementation(): DOMImplementation { return $this->_implementation; } @@ -89,6 +135,30 @@ class Document extends Node { return $this->_characterSet; } + protected function __get_links(): HTMLCollection { + # The links attribute must return an HTMLCollection rooted at the Document node, + # whose filter matches only a elements with href attributes and area elements + # with href attributes. + // Because of how namespaces are handled internally they're null when a HTML document. + $namespace = (!$this instanceof XMLDocument) ? '' : Node::HTML_NAMESPACE; + // HTMLCollections cannot be created from their constructors normally. + return Reflection::createFromProtectedConstructor(__NAMESPACE__ . '\\HTMLCollection', $this->innerNode, $this->innerNode->xpath->query(".//*[namespace-uri()='$namespace' and @href][name()='a' or name()='area']")); + } + + protected function __get_plugins(): HTMLCollection { + # The plugins attribute must return the same object as that returned by the + # embeds attribute. + return $this->__get_embeds(); + } + + protected function __get_scripts(): HTMLCollection { + # The scripts attribute must return an HTMLCollection rooted at the Document node, whose filter matches only script elements. + // Because of how namespaces are handled internally they're null when a HTML document. + $namespace = (!$this instanceof XMLDocument) ? '' : Node::HTML_NAMESPACE; + // HTMLCollections cannot be created from their constructors normally. + return Reflection::createFromProtectedConstructor(__NAMESPACE__ . '\\HTMLCollection', $this->innerNode, $this->innerNode->xpath->query(".//script[namespace-uri()='$namespace']")); + } + protected function __get_title(): string { # The title attribute must, on getting, run the following algorithm: # 1. If the document element is an SVG svg element, then let value be the child text @@ -471,6 +541,19 @@ class Document extends Node { return $this->innerNode->getWrapperNode($this->innerNode->createTextNode($data)); } + public function getElementsByName(string $elementName): NodeList { + # The getElementsByName(elementName) method steps are to return a live NodeList + # containing all the HTML elements in that document that have a name attribute + # whose value is identical to the elementName argument, in tree order. When the + # method is invoked on a Document object again with the same argument, the user + # agent may return the same as the object returned by the earlier call. In other + # cases, a new NodeList object must be returned. + // Because of how namespaces are handled internally they're null when a HTML document. + $namespace = (!$this instanceof XMLDocument) ? '' : Node::HTML_NAMESPACE; + // NodeLists cannot be created from their constructors normally. + return Reflection::createFromProtectedConstructor(__NAMESPACE__ . '\\NodeList', $this->innerNode, $this->innerNode->xpath->query(".//*[namespace-uri()='$namespace' and @name='$elementName']")); + } + public function importNode(Node|\DOMNode $node, bool $deep = false): Node { # The importNode(node, deep) method steps are: # diff --git a/tests/cases/TestDocument.php b/tests/cases/TestDocument.php index 8acef80..470297e 100644 --- a/tests/cases/TestDocument.php +++ b/tests/cases/TestDocument.php @@ -158,6 +158,12 @@ class TestDocument extends \PHPUnit\Framework\TestCase { } + public function testMethod_getElementsByName() { + $d = new Document('
'); + $this->assertEquals(2, $d->getElementsByName('ook')->length); + } + + /** * @covers \MensBeam\HTML\DOM\Document::importNode * @@ -487,6 +493,48 @@ class TestDocument extends \PHPUnit\Framework\TestCase { } + public function testProperty_embeds() { + $d = new Document('
'); + $this->assertEquals(4, $d->embeds->length); + $this->assertEquals(4, $d->plugins->length); + } + + + public function testProperty_forms() { + $d = new Document('
'); + $this->assertEquals(4, $d->forms->length); + } + + + public function testProperty_head() { + $d = new Document(); + $this->assertNull($d->head); + + $de = $d->appendChild($d->createElement('html')); + $head = $de->appendChild($d->createElement('head')); + + $this->assertSame($head, $d->head); + } + + + public function testProperty_images() { + $d = new Document('
'); + $this->assertEquals(4, $d->images->length); + } + + + public function testProperty_links() { + $d = new Document('
'); + $this->assertEquals(4, $d->links->length); + } + + + public function testProperty_scripts() { + $d = new Document('
'); + $this->assertEquals(4, $d->scripts->length); + } + + public function testProperty_title() { $d = new Document(); $this->assertSame('', $d->title);