Browse Source

Playing around with wrapper classes

wrapper-classes
Dustin Wilson 3 years ago
parent
commit
81cc580711
  1. 3
      .gitignore
  2. 16
      lib/CharacterData.php
  3. 6
      lib/DOMException.php
  4. 1037
      lib/Document.php
  5. 71
      lib/DocumentFragment.php
  6. 444
      lib/Element.php
  7. 96
      lib/ElementMap.php
  8. 24
      lib/Exception.php
  9. 48
      lib/HTMLTemplateElement.php
  10. 12
      lib/InnerNode/Attr.php
  11. 12
      lib/InnerNode/CDATASection.php
  12. 7
      lib/InnerNode/Comment.php
  13. 92
      lib/InnerNode/Document.php
  14. 12
      lib/InnerNode/DocumentFragment.php
  15. 12
      lib/InnerNode/Element.php
  16. 6
      lib/InnerNode/Node.php
  17. 59
      lib/InnerNode/NodeMap.php
  18. 12
      lib/InnerNode/ProcessingInstruction.php
  19. 7
      lib/InnerNode/Text.php
  20. 122
      lib/Node.php
  21. 104
      lib/NodeList.php
  22. 128
      lib/ParentNode.php
  23. 13
      lib/ProcessingInstruction.php
  24. 336
      lib/TokenList.php
  25. 269
      lib/traits/ChildNode.php
  26. 151
      lib/traits/DocumentOrElement.php
  27. 33
      lib/traits/LeafNode.php
  28. 272
      lib/traits/NodeTrait.php
  29. 335
      lib/traits/ParentNode.php
  30. 18
      lib/traits/ToString.php

3
.gitignore

@ -1,8 +1,9 @@
# html5-parser specific
# HTML-DOM specific
manual
node_modules
/test*.html
/test*.php
lib/old
# General
*.DS_Store

16
lib/CharacterData.php

@ -1,16 +0,0 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
/**
* Exists for inheritance reasons. All properties & methods necessary for
* CharacterData are in Trait/CharacterData; not declaring them twice.
*/
interface CharacterData extends Node {}

6
lib/DOMException.php

@ -24,6 +24,8 @@ class DOMException extends Exception {
const INVALID_ACCESS_ERROR = 15;
const VALIDATION_ERROR = 16;
const CLIENT_ONLY_NOT_IMPLEMENTED = 301;
public function __construct(int $code, ...$args) {
self::$messages = array_replace(parent::$messages, [
@ -37,7 +39,9 @@ class DOMException extends Exception {
13 => 'Invalid modification error',
14 => 'Namespace error',
15 => 'Invalid access error',
16 => 'Validation error'
16 => 'Validation error',
301 => '%s is client side only; not implemented'
]);
parent::__construct($code, ...$args);

1037
lib/Document.php

File diff suppressed because it is too large

71
lib/DocumentFragment.php

@ -1,71 +0,0 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
use MensBeam\Framework\MagicProperties;
class DocumentFragment extends \DOMDocumentFragment implements Node {
use MagicProperties, ParentNode;
protected ?\WeakReference $_host = null;
protected function __get_host(): ?HTMLTemplateElement {
if ($this->_host === null) {
return null;
}
return $this->_host->get();
}
protected function __set_host(HTMLTemplateElement $value) {
if ($this->_host !== null) {
throw new Exception(Exception::READONLY_PROPERTY, 'host');
}
// Check to see if this is being set within the HTMLTemplateElement constructor
// and throw a read only exception otherwise. This will ensure the host remains
// readonly. YES. THIS IS DIRTY. We shouldn't do this, but there is no other
// option. While DocumentFragment could be created via a constructor it cannot
// be associated with a document unless created by
// Document::createDocumentFragment.
$backtrace = debug_backtrace();
$okay = false;
for ($len = count($backtrace), $i = $len - 1; $i >= 0; $i--) {
$cur = $backtrace[$i];
if ($cur['function'] === '__construct' && $cur['class'] === __NAMESPACE__ . '\\HTMLTemplateElement') {
$okay = true;
break;
}
}
if (!$okay) {
throw new Exception(Exception::READONLY_PROPERTY, 'host');
}
$this->_host = \WeakReference::create($value);
}
public function getElementById(string $elementId): ?Element {
# The getElementById(elementId) method steps are to return the first element, in
# tree order, within this’s descendants, whose ID is elementId; otherwise, if
# there is no such element, null.
// This method is supposed to be within a NonElementParentNode trait, but
// Document has an adequate implementation already from PHP DOM. It doesn't,
// however, implement one for \DOMDocumentFragment, so here goes.
return $this->walk(function($n) use($elementId) {
return ($n instanceof Element && $n->getAttribute('id') === $elementId);
})->current();
}
public function __toString() {
return $this->ownerDocument->saveHTML($this);
}
}

444
lib/Element.php

@ -7,445 +7,11 @@
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
use MensBeam\Framework\MagicProperties;
use MensBeam\HTML\Parser;
use MensBeam\HTML\DOM\InnerNode\Element as InnerElement;
class Element extends \DOMElement implements Node {
use ChildNode, DocumentOrElement, MagicProperties, ParentNode, ToString;
protected function __get_classList(): TokenList {
return new TokenList($this, 'class');
}
protected function __get_innerHTML(): string {
### DOM Parsing Specification ###
# 2.3 The InnerHTML mixin
#
# On getting, return the result of invoking the fragment serializing algorithm
# on the context object providing true for the require well-formed flag (this
# might throw an exception instead of returning a string).
// DEVIATION: Parsing of XML documents will not be handled by this
// implementation, so there's no need for the well-formed flag.
return $this->ownerDocument->saveHTML($this);
}
protected function __set_innerHTML(string $value) {
### DOM Parsing Specification ###
# 2.3 The InnerHTML mixin
#
# On setting, these steps must be run:
# 1. Let context element be the context object's host if the context object is a
# ShadowRoot object, or the context object otherwise.
// DEVIATION: There is no scripting in this implementation.
# 2. Let fragment be the result of invoking the fragment parsing algorithm with
# the new value as markup, and with context element.
$fragment = Parser::parseFragment($this, ($this->ownerDocument->compatMode === 'CSS1Compat') ? Parser::NO_QUIRKS_MODE : Parser::QUIRKS_MODE, $value, 'UTF-8');
$fragment = $this->ownerDocument->importNode($fragment);
# 3. If the context object is a template element, then let context object be the
# template's template contents (a DocumentFragment).
if ($this instanceof HTMLTemplateElement) {
$this->content = $fragment;
}
# 4. Replace all with fragment within the context object.
else {
# To replace all with a node within a parent, run these steps:
#
# 1. Let removedNodes be parent’s children.
// DEVIATION: removedNodes is used below for scripting. There is no scripting in
// this implementation.
# 2. Let addedNodes be parent’s children.
// DEVIATION: addedNodes is used below for scripting. There is no scripting in
// this implementation.
# 3. If node is a DocumentFragment node, then set addedNodes to node’s
# children.
// DEVIATION: Again, there is no scripting in this implementation.
# 4. Otherwise, if node is non-null, set addedNodes to « node ».
// DEVIATION: Yet again, there is no scripting in this implementation.
# 5. Remove all parent’s children, in tree order, with the suppress observers
# flag set.
// DEVIATION: There are no observers to suppress as there is no scripting in
// this implementation.
while ($this->hasChildNodes()) {
$this->removeChild($this->firstChild);
}
# 6. Otherwise, if node is non-null, set addedNodes to « node ».
# If node is non-null, then insert node into parent before null with the
# suppress observers flag set.
// DEVIATION: Yet again, there is no scripting in this implementation.
# 7. If either addedNodes or removedNodes is not empty, then queue a tree
# mutation record for parent with addedNodes, removedNodes, null, and null.
// DEVIATION: Normally the tree mutation record would do the actual replacement,
// but there is no scripting in this implementation. Going to simply append the
// fragment instead.
$this->appendChild($fragment);
}
}
protected function __get_outerHTML(): string {
### DOM Parsing Specification ###
# 2.4 Extensions to the Element interface
# outerHTML
#
# On getting, return the result of invoking the fragment serializing algorithm
# on a fictional node whose only child is the context object providing true for
# the require well-formed flag (this might throw an exception instead of
# returning a string).
// DEVIATION: Parsing of XML documents will not be handled by this
// implementation, so there's no need for the well-formed flag.
return (string)$this;
}
protected function __set_outerHTML(string $value) {
### DOM Parsing Specification ###
# 2.4 Extensions to the Element interface
# outerHTML
#
# On setting, the following steps must be run:
# 1. Let parent be the context object's parent.
$parent = $this->parentNode;
# 2. If parent is null, terminate these steps. There would be no way to obtain a
# reference to the nodes created even if the remaining steps were run.
if ($parent === null) {
return;
}
# 3. If parent is a Document, throw a "NoModificationAllowedError" DOMException.
elseif ($parent instanceof Document) {
throw new DOMException(DOMException::NO_MODIFICATION_ALLOWED);
}
# 4. parent is a DocumentFragment, let parent be a new Element with:
# • body as its local name,
# • The HTML namespace as its namespace, and
# • The context object's node document as its node document.
elseif ($parent instanceof DocumentFragment) {
$parent = $this->ownerDocument->createElement('body');
}
# 5. Let fragment be the result of invoking the fragment parsing algorithm with
# the new value as markup, and parent as the context element.
$fragment = Parser::parseFragment($parent, ($this->ownerDocument->compatMode === 'CSS1Compat') ? Parser::NO_QUIRKS_MODE : Parser::QUIRKS_MODE, $value, 'UTF-8');
$fragment = $this->ownerDocument->importNode($fragment);
# 6. Replace the context object with fragment within the context object's
# parent.
$this->parentNode->replaceChild($fragment, $this);
}
public function getAttribute(string $qualifiedName): ?string {
# The getAttribute(qualifiedName) method steps are:
#
# 1. Let attr be the result of getting an attribute given qualifiedName and this.
$attr = $this->getAttributeNode($qualifiedName);
# 2. If attr is null, return null.
if ($attr === null) {
return null;
}
# 3. Return attr’s value.
// Uncoerce the value if necessary
return (!strpos($attr->value, 'U')) ? $attr->value : $this->uncoerceName($attr->value);
}
public function getAttributeNames(): array {
$result = [];
foreach ($this->attributes as $a) {
// Uncoerce names if necessary
$result[] = (!strpos($a->nodeName, 'U')) ? $a->nodeName : $this->uncoerceName($a->nodeName);
}
return $result;
}
public function getAttributeNode(string $qualifiedName): ?Attr {
# The getAttributeNode(qualifiedName) method steps are to return the result of
# getting an attribute given qualifiedName and this.
#
# To get an attribute by name given a qualifiedName and element element, run
# these steps:
#
# 1. If element is in the HTML namespace and its node document is an HTML document,
# then set qualifiedName to qualifiedName in ASCII lowercase.
// Document will always be an HTML document
if ($this->isHTMLNamespace()) {
$qualifiedName = strtolower($qualifiedName);
}
# 2. Return the first attribute in element’s attribute list whose qualified name is
# qualifiedName; otherwise null.
// Going to try to handle this by getting the PHP DOM to do the heavy lifting
// when we can because it's faster.
$attr = parent::getAttributeNode($qualifiedName);
if ($attr === false) {
// Replace any offending characters with "UHHHHHH" where H are the uppercase
// hexadecimal digits of the character's code point
$qualifiedName = $this->coerceName($qualifiedName);
foreach ($this->attributes as $a) {
if ($a->nodeName === $qualifiedName) {
return $a;
}
}
return null;
}
return ($attr !== false) ? $attr : null;
}
public function getAttributeNodeNS(?string $namespace = null, string $localName): ?Attr {
# The getAttributeNodeNS(namespace, localName) method steps are to return the
# result of getting an attribute given namespace, localName, and this.
#
# To get an attribute by namespace and local name given a namespace, localName,
# and element element, run these steps:
#
# 1. If namespace is the empty string, then set it to null.
if ($namespace === '') {
$namespace = null;
}
# 2. Return the attribute in element’s attribute list whose namespace is namespace
# and local name is localName, if any; otherwise null.
// Going to try to handle this by getting the PHP DOM to do the heavy lifting
// when we can because it's faster.
$value = parent::getAttributeNodeNS($namespace, $localName);
if (!$value) {
// Replace any offending characters with "UHHHHHH" where H are the uppercase
// hexadecimal digits of the character's code point
$namespace = $this->coerceName($namespace ?? '');
$localName = $this->coerceName($localName);
// The PHP DOM does not acknowledge the presence of XMLNS-namespace attributes
// sometimes, too... so this will get those as well in those circumstances.
foreach ($this->attributes as $a) {
if ($a->namespaceURI === $namespace && $a->localName === $localName) {
return $a;
}
}
return null;
}
return ($value !== false) ? $value : null;
}
public function getAttributeNS(?string $namespace = null, string $localName): ?string {
# The getAttributeNS(namespace, localName) method steps are:
#
# 1. Let attr be the result of getting an attribute given namespace, localName,
# and this.
$attr = $this->getAttributeNodeNS($namespace, $localName);
# 2. If attr is null, return null.
if ($attr === null) {
return null;
}
# 3. Return attr’s value.
// Uncoerce the value if necessary
return (!strpos($attr->value, 'U')) ? $attr->value : $this->uncoerceName($attr->value);
}
public function hasAttribute(string $qualifiedName): bool {
# The hasAttribute(qualifiedName) method steps are:
#
# 1. If this is in the HTML namespace and its node document is an HTML document,
# then set qualifiedName to qualifiedName in ASCII lowercase.
// Document will always be an HTML document
if ($this->isHTMLNamespace()) {
$qualifiedName = strtolower($qualifiedName);
}
# 2. Return true if this has an attribute whose qualified name is qualifiedName;
# otherwise false.
# An element has an attribute A if its attribute list contains A.
// Going to try to handle this by getting the PHP DOM to do the heavy lifting
// when we can because it's faster.
$value = parent::hasAttribute($qualifiedName);
if (!$value) {
// The PHP DOM does not acknowledge the presence of XMLNS-namespace attributes,
// so try it again just in case; getAttributeNode will coerce names if
// necessary, too.
$value = ($this->getAttributeNode($qualifiedName) !== null);
}
return $value;
}
public function hasAttributeNS(?string $namespace = null, string $localName): bool {
# The hasAttributeNS(namespace, localName) method steps are:
#
# 1. If namespace is the empty string, then set it to null.
if ($namespace === '') {
$namespace = null;
}
# 2. Return true if this has an attribute whose namespace is namespace and local name
# is localName; otherwise false.
# An element has an attribute A if its attribute list contains A.
// Going to try to handle this by getting the PHP DOM to do the heavy lifting
// when we can because it's faster.
$value = parent::hasAttributeNS($namespace, $localName);
if (!$value) {
// The PHP DOM does not acknowledge the presence of XMLNS-namespace attributes,
// so try it again just in case; getAttributeNode will coerce names if
// necessary, too.
$value = ($this->getAttributeNodeNS($namespace, $localName) !== null);
}
return $value;
}
public function removeAttribute(string $qualifiedName): void {
# The removeAttribute(qualifiedName) method steps are to remove an attribute
# given qualifiedName and this, and then return undefined.
#
## To remove an attribute by name given a qualifiedName and element element, run
## these steps:
##
## 1. Let attr be the result of getting an attribute given qualifiedName and element.
$attr = $this->getAttributeNode($qualifiedName);
## 2. If attr is non-null, then remove attr.
if ($attr !== null) {
// Going to try to handle this by getting the PHP DOM to do the heavy lifting
// when we can because it's faster.
parent::removeAttributeNode($attr);
// ClassList stuff because php garbage collection is... garbage.
if ($qualifiedName === 'class') {
ElementMap::delete($this);
}
}
## 3. Return attr.
// Supposed to return undefined in the end, so let's skip this.
}
public function removeAttributeNS(?string $namespace, string $localName): void {
# The removeAttributeNS(namespace, localName) method steps are to remove an
# attribute given namespace, localName, and this, and then return undefined.
#
## To remove an attribute by namespace and local name given a namespace, localName, and element element, run these steps:
##
## 1. Let attr be the result of getting an attribute given namespace, localName, and element.
$attr = $this->getAttributeNodeNS($namespace, $localName);
## 2. If attr is non-null, then remove attr.
if ($attr !== null) {
// Going to try to handle this by getting the PHP DOM to do the heavy lifting
// when we can because it's faster.
parent::removeAttributeNode($attr);
// ClassList stuff because php garbage collection is... garbage.
if ($namespace === null && $localName === 'class') {
ElementMap::delete($this);
}
}
## 3. Return attr.
// Supposed to return undefined in the end, so let's skip this.
}
public function setAttribute(string $qualifiedName, string $value): void {
# 1. If qualifiedName does not match the Name production in XML, then throw an
# "InvalidCharacterError" DOMException.
if (preg_match(self::$nameProductionRegex, $qualifiedName) !== 1) {
throw new DOMException(DOMException::INVALID_CHARACTER);
}
# 2. If this is in the HTML namespace and its node document is an HTML document,
# then set qualifiedName to qualifiedName in ASCII lowercase.
// Document will always be an HTML document
if ($this->isHTMLNamespace()) {
$qualifiedName = strtolower($qualifiedName);
}
# 3. Let attribute be the first attribute in this’s attribute list whose
# qualified name is qualifiedName, and null otherwise.
# 4. If attribute is null, create an attribute whose local name is qualifiedName,
# value is value, and node document is this’s node document, then append this
# attribute to this, and then return.
# 5. Change attribute to value.
// Going to try to handle this by getting the PHP DOM to do the heavy lifting
// when we can because it's faster.
// ClassList stuff because php garbage collection is... garbage.
if ($qualifiedName === 'class' && $value === '') {
ElementMap::delete($this);
}
try {
parent::setAttributeNS(null, $qualifiedName, $value);
} catch (\DOMException $e) {
// The attribute name is invalid for XML
// Replace any offending characters with "UHHHHHH" where H are the uppercase
// hexadecimal digits of the character's code point
parent::setAttributeNS(null, $this->coerceName($qualifiedName), $value);
}
// ClassList stuff because php garbage collection is... garbage.
if ($qualifiedName === 'class') {
ElementMap::delete($this);
}
// If you create an id attribute this way it won't be used by PHP in
// getElementById, so let's fix that.
elseif ($qualifiedName === 'id') {
$this->setIdAttribute($qualifiedName, true);
}
}
public function setAttributeNS(?string $namespace, string $qualifiedName, string $value): void {
# 1. Let namespace, prefix, and localName be the result of passing namespace and
# qualifiedName to validate and extract.
[ 'namespace' => $namespace, 'prefix' => $prefix, 'localName' => $localName ] = $this->validateAndExtract($qualifiedName, $namespace);
$qualifiedName = ($prefix === null || $prefix === '') ? $localName : "{$prefix}:{$localName}";
# 2. Set an attribute value for this using localName, value, and also prefix and
# namespace.
// Going to try to handle this by getting the PHP DOM to do the heavy lifting
// when we can because it's faster.
// NOTE: We create attribute nodes so that xmlns attributes
// don't get lost; otherwise they cannot be serialized
if ($namespace === Parser::XMLNS_NAMESPACE) {
// Xmlns attributes have special bugs just for them. How lucky! Xmlns attribute
// nodes won't stick and can actually cause segmentation faults if created on a
// no longer existing document element, appended to another element, and then
// retrieved. So, use the methods used in Document::createAttributeNS to get an
// attribute node.
$a = $this->ownerDocument->createAttributeNS($namespace, $qualifiedName);
$a->value = $this->escapeString($value, true);
$this->setAttributeNodeNS($a);
} else {
try {
parent::setAttributeNS($namespace, $qualifiedName, $value);
} catch (\DOMException $e) {
// The attribute name is invalid for XML
// Replace any offending characters with "UHHHHHH" where H are the
// uppercase hexadecimal digits of the character's code point
if ($namespace !== null) {
$qualifiedName = implode(':', array_map([$this, 'coerceName'], explode(':', $qualifiedName, 2)));
} else {
$qualifiedName = $this->coerceName($qualifiedName);
}
parent::setAttributeNS($namespace, $qualifiedName, $value);
}
}
if ($namespace === null) {
// ClassList stuff because php garbage collection is... garbage.
if ($qualifiedName === 'class') {
ElementMap::delete($this);
}
// If you create an id attribute this way it won't be used by PHP in
// getElementById, so let's fix that.
elseif ($qualifiedName === 'id') {
$this->setIdAttribute($qualifiedName, true);
}
}
class Element extends Node {
protected function __construct(InnerElement $element) {
parent::__construct($element);
}
}
}

96
lib/ElementMap.php

@ -1,96 +0,0 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
// This is a map of elements which need to be kept in memory; it exists because
// of the peculiar way PHP works. Derived DOM classes (such as
// HTMLTemplateElement) won't remain as such in the DOM (meaning they will
// revert to being what is registered for elements in Document) unless at least
// one reference is kept for the element somewhere in userspace. This is that
// somewhere.
class ElementMap {
// List of documents is necessary because when Document objects are destructed
// it's not possible to check for a document's existence without triggering a
// fatal error. Keeping document references around fixes that.
protected static array $documents = [];
protected static array $elements = [];
public static function add(Element $element): bool {
$document = $element->ownerDocument;
$index = self::index($document);
if ($index === -1) {
self::$documents[] = $document;
self::$elements[count(self::$documents) - 1][] = $element;
return true;
}
foreach (self::$elements[$index] as $v) {
if ($v === $element) {
return false;
}
}
self::$elements[$index][] = $element;
return true;
}
public static function delete(Element $element): bool {
$document = $element->ownerDocument;
$index = self::index($document);
if ($index !== -1) {
foreach (self::$elements[$index] as $k => $v) {
if ($v === $element) {
unset(self::$elements[$index][$k]);
self::$elements[$index] = array_values(self::$elements[$index]);
return true;
}
}
}
return false;
}
public static function destroy(Document $document): bool {
$index = self::index($document);
if ($index !== -1) {
unset(self::$documents[$index]);
unset(self::$elements[$index]);
self::$documents = array_values(self::$documents);
self::$elements = array_values(self::$elements);
return true;
}
return false;
}
public static function has(Element $element): bool {
$document = $element->ownerDocument;
$index = self::index($document);
if ($index !== -1) {
foreach (self::$elements[$index] as $v) {
if ($v === $element) {
return true;
}
}
}
return false;
}
protected static function index(Document $document): int {
foreach (self::$documents as $k => $d) {
if ($d === $document) {
return $k;
}
}
return -1;
}
}

24
lib/Exception.php

@ -1,24 +0,0 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
use MensBeam\Framework\Exception as FrameworkException;
class Exception extends FrameworkException {
public const DISABLED_METHOD = 301;
public function __construct(int $code, ...$args) {
self::$messages = array_replace(parent::$messages, [
301 => 'Method %s has been disabled for the following reason: %s'
]);
parent::__construct($code, ...$args);
}
}

48
lib/HTMLTemplateElement.php

@ -1,48 +0,0 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
/** Class specifically for template elements to handle its content property. */
class HTMLTemplateElement extends Element {
public DocumentFragment $content;
public function __construct(Document $ownerDocument, string $qualifiedName, ?string $namespace = null) {
parent::__construct($qualifiedName, null, $namespace ?? '');
// Elements that are created by their constructor in PHP aren't owned by any
// document and are readonly until owned by one. Temporarily append to a
// document fragment so the element will be owned by the supplied owner
// document.
$frag = $ownerDocument->createDocumentFragment();
$frag->appendChild($this);
$frag->removeChild($this);
unset($frag);
$content = $this->ownerDocument->createDocumentFragment();
$content->host = $this;
$this->content = $content;
}
public function cloneNode(bool $deep = false) {
$copy = $this->ownerDocument->createElement('template');
foreach ($this->attributes as $attr) {
$copy->setAttributeNS($attr->namespaceURI, $attr->name, $attr->value);
}
if ($deep) {
foreach ($this->content->childNodes as $child) {
$copy->content->appendChild($child->cloneNode(true));
}
}
return $copy;
}
}

12
lib/InnerNode/Attr.php

@ -0,0 +1,12 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM\InnerNode;
class Attr extends \DOMAttr {}

12
lib/InnerNode/CDATASection.php

@ -0,0 +1,12 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM\InnerNode;
class CDATASection extends \DOMCDATASection {}

7
lib/Comment.php → lib/InnerNode/Comment.php

@ -6,8 +6,7 @@
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
namespace MensBeam\HTML\DOM\InnerNode;
class Comment extends \DOMComment implements CharacterData {
use ChildNode, LeafNode, ToString;
}
class Comment extends \DOMComment {}

92
lib/InnerNode/Document.php

@ -0,0 +1,92 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM\InnerNode;
use MensBeam\Framework\MagicProperties;
use MensBeam\HTML\DOM\{
Document as WrapperDocument,
Node as WrapperNode
};
use MensBeam\HTML\Parser;
class Document extends \DOMDocument {
use MagicProperties;
protected WrapperDocument $wrapperNode;
protected NodeMap $nodeMap;
public function __construct(WrapperDocument $wrapperNode) {
parent::__construct();
parent::registerNodeClass('DOMAttr', Attr::class);
parent::registerNodeClass('DOMComment', Comment::class);
parent::registerNodeClass('DOMCDATASection', CDATASection::class);
parent::registerNodeClass('DOMDocument', self::class);
parent::registerNodeClass('DOMDocumentFragment', DocumentFragment::class);
parent::registerNodeClass('DOMElement', Element::class);
parent::registerNodeClass('DOMProcessingInstruction', ProcessingInstruction::class);
parent::registerNodeClass('DOMText', Text::class);
$this->wrapperNode = $wrapperNode;
$this->nodeMap = new NodeMap();
}
public function getWrapperNode(?\DOMNode $node = null): WrapperNode {
// If the node is a Document then the wrapperNode is this's wrapperNode
// property.
if ($node instanceof Document || $node === null) {
return $this->wrapperNode;
}
// If the wrapper node already exists then return that.
if ($wrapperNode = $this->nodeMap->get($node)) {
return $wrapperNode;
}
// If the node didn't exist we must construct the wrapper node's class name
// based upon the node's class name
$className = $node::class;
switch ($className) {
case __NAMESPACE__ . '\\Attr': $className = "MensBeam\\HTML\\DOM\\Attr";
break;
case __NAMESPACE__ . '\\CDATASection': $className = "MensBeam\\HTML\\DOM\\CDATASection";
break;
case __NAMESPACE__ . '\\Comment': $className = "MensBeam\\HTML\\DOM\\Comment";
break;
case __NAMESPACE__ . '\\Document': $className = "MensBeam\\HTML\\DOM\\Document";
break;
case __NAMESPACE__ . '\\DocumentFragment': $className = "MensBeam\\HTML\\DOM\\DocumentFragment";
break;
case __NAMESPACE__ . '\\Element':
if (($node->namespaceURI === null || $node->namespaceURI === Parser::HTML_NAMESPACE) && $node->nodeName === 'template') {
$className = "MensBeam\\HTML\\DOM\\HTMLTemplateElement";
} else {
$className = "MensBeam\\HTML\\DOM\\Element";
}
break;
case __NAMESPACE__ . '\\ProcessingInstruction': $className = "MensBeam\\HTML\\DOM\\ProcessingInstruction";
break;
case __NAMESPACE__ . '\\Text': $className = "MensBeam\\HTML\\DOM\\ProcessingInstruction";
break;
}
// Nodes cannot be created from their constructors normally, so let's bypass all
// that shit.
$reflector = new \ReflectionClass($className);
$wrapper = $reflector->newInstanceWithoutConstructor();
$constructor = new \ReflectionMethod($wrapper, '__construct');
$constructor->setAccessible(true);
$constructor->invoke($wrapper, $node);
$this->nodeMap->set($wrapper, $node);
return $wrapper;
}
}

12
lib/InnerNode/DocumentFragment.php

@ -0,0 +1,12 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM\InnerNode;
class DocumentFragment extends \DOMDocumentFragment {}

12
lib/InnerNode/Element.php

@ -0,0 +1,12 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM\InnerNode;
class Element extends \DOMElement {}

6
lib/Attr.php → lib/InnerNode/Node.php

@ -6,9 +6,9 @@
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
namespace MensBeam\HTML\DOM\InnerNode;
class Attr extends \DOMAttr implements Node {
use LeafNode, NodeTrait;
trait Node {
}

59
lib/InnerNode/NodeMap.php

@ -0,0 +1,59 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM\InnerNode;
use MensBeam\HTML\DOM\Node as WrapperNode;
class NodeMap {
protected $wrapperArray = [];
protected $innerArray = [];
public function delete(\DOMNode|WrapperNode $node): bool {
$key = $this->key($node);
if ($key === false) {
return false;
}
unset($this->wrapperArray[$key]);
unset($this->innerArray[$key]);
$this->wrapperArray = array_values($this->wrapperArray);
$this->innerArray = array_values($this->innerArray);
return true;
}
public function get(\DOMNode|WrapperNode $node): ?\DOMNode {
$key = $this->key($node);
if ($key === false) {
return null;
}
return ($node instanceof WrapperNode) ? $this->innerArray[$key] : $this->wrapperArray[$key];
}
public function has(\DOMNode|WrapperNode $node): bool {
return ($this->key($node) !== false);
}
public function set(WrapperNode $wrapper, \DOMNode $inner): bool {
if (!$this->has($wrapper)) {
$this->wrapperArray[] = $wrapper;
$this->innerArray[] = $inner;
return true;
}
return false;
}
protected function key(\DOMNode|WrapperNode $node): int|false {
return array_search($node, ($node instanceof WrapperNode) ? $this->wrapperArray : $this->innerArray, true);
}
}

12
lib/InnerNode/ProcessingInstruction.php

@ -0,0 +1,12 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM\InnerNode;
class ProcessingInstruction extends \DOMProcessingInstruction {}

7
lib/Text.php → lib/InnerNode/Text.php

@ -6,8 +6,7 @@
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
namespace MensBeam\HTML\DOM\InnerNode;
class Text extends \DOMText implements CharacterData {
use ChildNode, LeafNode, ToString;
}
class Text extends \DOMText {}

122
lib/Node.php

@ -7,21 +7,24 @@
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
use MensBeam\Framework\MagicProperties;
abstract class Node {
use MagicProperties;
/**
* Exists for inheritance reasons. All properties & methods necessary for
* CharacterData are in NodeTrait; not declaring them twice.
*/
interface Node {
public const ELEMENT_NODE = 1;
public const ATTRIBUTE_NODE = 2;
public const TEXT_NODE = 3;
public const CDATA_SECTION_NODE = 4;
public const ENTITY_REFERENCE_NODE = 5; // legacy
public const ENTITY_NODE = 6; // legacy
public const PROCESSING_INSTRUCTION_NODE = 7;
public const COMMENT_NODE = 8;
public const DOCUMENT_NODE = 9;
public const DOCUMENT_MODE = 9;
public const DOCUMENT_TYPE_NODE = 10;
public const DOCUMENT_FRAGMENT_NODE = 11;
public const NOTATION_NODE = 12; // legacy
public const DOCUMENT_POSITION_DISCONNECTED = 0x01;
public const DOCUMENT_POSITION_PRECEDING = 0x02;
@ -29,4 +32,111 @@ interface Node {
public const DOCUMENT_POSITION_CONTAINS = 0x08;
public const DOCUMENT_POSITION_CONTAINED_BY = 0x10;
public const DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC = 0x20;
protected \DOMNode $innerNode;
/**
* The nodeName read-only property returns the name of the current Node as a
* string.
*
* @property-read string nodeName
*/
protected function __get_nodeName(): string {
# The nodeName getter steps are to return the first matching statement,
# switching on the interface this implements:
# ↪ Element
# Its HTML-uppercased qualified name.
if ($this instanceof Element) {
return strtoupper($this->innerNode->nodeName);
}
// PHP's DOM mostly does this correctly with the exception of Element, so let's
// fall back to PHP's DOM on everything else.
return $this->innerNode->nodeName;
}
/**
* The read-only Node.nodeType property is an integer that identifies what the
* node is. It distinguishes different kind of nodes from each other, such as
* elements, text and comments.
*
* @property-read int nodeType
*/
protected function __get_nodeType(): int {
// PHP's DOM does this correctly already.
return $this->innerNode->nodeType;
}
/**
* The ownerDocument read-only property of the Node interface returns the
* top-level document object of the node.
*
* @property-read Document ownerDocument
*/
protected function __get_ownerDocument(): Document {
if ($this instanceof Document) {
return $this;
}
return $this->innerNode->ownerDocument->getWrapperNode();
}
/**
* The Node.parentElement read-only property returns the DOM node's parent
* Element, or null if the node either has no parent, or its parent isn't a DOM
* Element.
*
* @property-read ?Element parentElement
*/
protected function __get_parentElement(): ?Element {
# The parentElement getter steps are to return this’s parent element.
# A node’s parent of type Element is known as its parent element. If the node
# has a parent of a different type, its parent element is null.
$parent = $this->parentNode;
return ($parent instanceof Element) ? $parent : null;
}
/**
* The Node.parentNode read-only property returns the parent of the specified
* node in the DOM tree.
*
* @property-read ?Node parentNode
*/
protected function __get_parentNode(): ?Node {
# The parentNode getter steps are to return this’s parent.
# An object that participates in a tree has a parent, which is either null or an
# object, and has children, which is an ordered set of objects. An object A
# whose parent is object B is a child of B.
if ($this instanceof Document) {
return null;
}
$parent = $this->innerNode->parentNode;
if ($parent === null) {
return null;
}
return $parent->ownerDocument->getWrapperNode($parent);
}
protected function __construct(\DOMNode $innerNode) {
$this->innerNode = $innerNode;
}
public function appendChild(Node $node): void {
die(var_export($this->getInnerNode($node)));
}
protected function getInnerNode(?Node $node = null) {
$node = $node ?? $this;
$reflector = new \ReflectionClass($node::class);
$innerNode = new \ReflectionProperty($node, 'innerNode');
$innerNode->setAccessible(true);
return $innerNode->getValue($node);
}
}

104
lib/NodeList.php

@ -1,104 +0,0 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
use MensBeam\Framework\MagicProperties;
/** Exists because PHP DOM's DOMNodeList is always live. */
class NodeList implements \ArrayAccess, \Countable, \Iterator {
use MagicProperties;
protected int $_length = 0;
protected int $position = 0;
protected array $storage = [];
protected function __get_length(): int {
# The length attribute must return the number of nodes represented by the
# collection.
return $this->_length;
}
public function __construct(?iterable $iterable = []) {
if ($iterable === null) {
$iterable = [];
}
// Per the specification one cannot create a NodeList via its constructor, but
// this implementation is not going to build up the framework for that.
// Check types while also unpacking the traversable.
$array = [];
foreach ($iterable as $i) {
if (!$i instanceof Node && !$i instanceof \DOMDocumentType) {
$type = gettype($i);
if ($type === 'object') {
$type = get_class($i);
}
throw new Exception(Exception::ARGUMENT_TYPE_ERROR, 1, 'traversable', 'Node|\\DOMDocumentType', $type);
}
$array[] = $i;
}
$this->storage = $array;
$this->_length = count($array);
}
public function count(): int {
return $this->_length;
}
public function current(): Node|\DOMDocumentType|null {
return $this->item($this->position);
}
public function item(int $index): Node|\DOMDocumentType|null {
# The item(index) method must return the indexth node in the collection. If
# there is no indexth node in the collection, then the method must return null.
if ($index >= $this->_length) {
return null;
}
return $this->storage[$index];
}
public function key(): int {
return $this->position;
}
public function next(): void {
++$this->position;
}
public function rewind(): void {
$this->position = 0;
}
public function offsetExists($offset): bool {
return isset($this->storage[$offset]);
}
public function offsetGet($offset): Node|\DOMDocumentType|null {
return $this->item($offset);
}
public function offsetSet($offset, $value): void {
// NodeLists are immutable
}
public function offsetUnset($offset): void {
// Nodelists are immutable
}
public function valid() {
return array_key_exists($this->position, $this->storage);
}
}

128
lib/ParentNode.php

@ -0,0 +1,128 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
use MensBeam\Framework\MagicProperties;
abstract class Node {
use MagicProperties;
public const ELEMENT_NODE = 1;
public const ATTRIBUTE_NODE = 2;
public const TEXT_NODE = 3;
public const CDATA_SECTION_NODE = 4;
public const ENTITY_REFERENCE_NODE = 5; // legacy
public const ENTITY_NODE = 6; // legacy
public const PROCESSING_INSTRUCTION_NODE = 7;
public const COMMENT_NODE = 8;
public const DOCUMENT_MODE = 9;
public const DOCUMENT_TYPE_NODE = 10;
public const DOCUMENT_FRAGMENT_NODE = 11;
public const NOTATION_NODE = 12; // legacy
public const DOCUMENT_POSITION_DISCONNECTED = 0x01;
public const DOCUMENT_POSITION_PRECEDING = 0x02;
public const DOCUMENT_POSITION_FOLLOWING = 0x04;
public const DOCUMENT_POSITION_CONTAINS = 0x08;
public const DOCUMENT_POSITION_CONTAINED_BY = 0x10;
public const DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC = 0x20;
protected \DOMNode $innerNode;
/**
* The nodeName read-only property returns the name of the current Node as a
* string.
*
* @property-read string nodeName
*/
protected function __get_nodeName(): string {
# The nodeName getter steps are to return the first matching statement,
# switching on the interface this implements:
# ↪ Element
# Its HTML-uppercased qualified name.
if ($this instanceof Element) {
return strtoupper($this->innerNode->nodeName);
}
// PHP's DOM mostly does this correctly with the exception of Element, so let's
// fall back to PHP's DOM on everything else.
return $this->innerNode->nodeName;
}
/**
* The read-only Node.nodeType property is an integer that identifies what the
* node is. It distinguishes different kind of nodes from each other, such as
* elements, text and comments.
*
* @property-read int nodeType
*/
protected function __get_nodeType(): int {
// PHP's DOM does this correctly already.
return $this->innerNode->nodeType;
}
/**
* The ownerDocument read-only property of the Node interface returns the
* top-level document object of the node.
*
* @property-read Document ownerDocument
*/
protected function __get_ownerDocument(): Document {
if ($this instanceof Document) {
return $this;
}
return $this->innerNode->ownerDocument->getWrapperNode();
}
/**
* The Node.parentElement read-only property returns the DOM node's parent
* Element, or null if the node either has no parent, or its parent isn't a DOM
* Element.
*
* @property-read ?Element parentElement
*/
protected function __get_parentElement(): ?Element {
# The parentElement getter steps are to return this’s parent element.
# A node’s parent of type Element is known as its parent element. If the node
# has a parent of a different type, its parent element is null.
$parent = $this->parentNode;
return ($parent instanceof Element) ? $parent : null;
}
/**
* The Node.parentNode read-only property returns the parent of the specified
* node in the DOM tree.
*
* @property-read ?Node parentNode
*/
protected function __get_parentNode(): ?Node {
# The parentNode getter steps are to return this’s parent.
# An object that participates in a tree has a parent, which is either null or an
# object, and has children, which is an ordered set of objects. An object A
# whose parent is object B is a child of B.
if ($this instanceof Document) {
return null;
}
$parent = $this->innerNode->parentNode;
if ($parent === null) {
return null;
}
return $parent->ownerDocument->getWrapperNode($parent);
}
protected function __construct(\DOMNode $innerNode) {
$this->innerNode = $innerNode;
}
}

13
lib/ProcessingInstruction.php

@ -1,13 +0,0 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
class ProcessingInstruction extends \DOMProcessingInstruction implements CharacterData {
use ChildNode, LeafNode, ToString;
}

336
lib/TokenList.php

@ -1,336 +0,0 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
use MensBeam\Framework\MagicProperties,
MensBeam\HTML\Parser\Data;
class TokenList implements \ArrayAccess, \Countable, \Iterator {
use MagicProperties;
protected string $localName;
protected \WeakReference $element;
protected int $_length = 0;
protected int $position = 0;
# A DOMTokenList object has an associated token set (a set), which is initially
# empty.
protected array $tokenSet = [];
private const ASCII_WHITESPACE_REGEX = '/[\t\n\x0c\r ]+/';
protected function __get_length(): int {
return $this->_length;
}
protected function __get_value(): string {
# The value attribute must return the result of running this’s serialize steps.
return $this->__toString();
}
protected function __set_value(string $value) {
# Setting the value attribute must set an attribute value for the associated
# element using associated attribute’s local name and the given value.
$element = $this->element->get();
$element->setAttribute($this->localName, $value);
// Also update the token set and the length.
$this->tokenSet = $this->parseOrderedSet($value);
$this->_length = count($this->tokenSet);
}
public function __construct(\DOMElement $element, string $attributeLocalName) {
# A DOMTokenList object also has an associated element and an attribute’s local
# name.
# When a DOMTokenList object is created, then:
#
# 1. Let element be associated element.
// Using a weak reference here to prevent a circular reference.
$this->element = \WeakReference::create($element);
// Store the element somewhere statically because PHP's garbage collection is
// itself garbage. This seems to contradict using a WeakReference, and it does.
// However, it simply doesn't work otherwise because PHP does reference counting
// for garbage collection. Attempts are made elsewhere to garbage collect.
ElementMap::add($element);
# 2. Let localName be associated attribute’s local name.
$this->localName = $attributeLocalName;
# 3. Let value be the result of getting an attribute value given element and
# localName.
$value = $element->getAttribute($attributeLocalName);
# 4. Run the attribute change steps for element, localName, value, value, and
# null.
$this->attributeChange($attributeLocalName, $value, $value);
}
public function add(...$tokens): void {
# 1. For each token in tokens:
foreach ($tokens as $token) {
# 1. If token is the empty string, then throw a "SyntaxError" DOMException.
if ($token === '') {
throw new DOMException(DOMException::SYNTAX_ERROR);
}
# 2. If token contains any ASCII whitespace, then throw an
# "InvalidCharacterError" DOMException.
if (preg_match(Data::WHITESPACE_REGEX, $token)) {
throw new DOMException(DOMException::INVALID_CHARACTER);
}
}
# 2. For each token in tokens, append token to this’s token set.
foreach ($tokens as $token) {
if (!in_array($token, $this->tokenSet)) {
// The spec does not say to trim, but browsers do.
$this->tokenSet[] = trim($token);
$this->_length++;
}
}
# 3. Run the update steps.
$this->update();
}
public function contains(string $token): bool {
return (in_array($token, $this->tokenSet));
}
public function count(): int {
return $this->_length;
}
public function current(): ?string {
return $this->item($this->position);
}
public function item(int $index): ?string {
# The item(index) method steps are:
# 1. If index is equal to or greater than this’s token set’s size, then return null.
if ($index >= $this->_length) {
return null;
}
# 2. Return this’s token set[index].
return $this->tokenSet[$index];
}
public function key(): int {
return $this->position;
}
public function next(): void {
++$this->position;
}
public function rewind(): void {
$this->position = 0;
}
public function offsetExists($offset): bool {
return isset($this->tokenSet[$offset]);
}
public function offsetGet($offset): string {
return $this->item($offset);
}
public function offsetSet($offset, $value): void {
// Spec says nothing about setting values on DOMTokenList outside of add();
// browsers silently fail here.
}
public function offsetUnset($offset): void {
// Spec says nothing about unsetting values on DOMTokenList outside of remove();
// browsers silently fail here.
}
public function remove(...$tokens): void {
# 1. For each token in tokens:
foreach ($tokens as $token) {
# 1. If token is the empty string, then throw a "SyntaxError" DOMException.
if ($token === '') {
throw new DOMException(DOMException::SYNTAX_ERROR);
}
# 2. If token contains any ASCII whitespace, then throw an
# "InvalidCharacterError" DOMException.
if (preg_match(Data::WHITESPACE_REGEX, $token)) {
throw new DOMException(DOMException::INVALID_CHARACTER);
}
}
# For each token in tokens, remove token from this’s token set.
$changed = false;
foreach ($tokens as $token) {
if ($key = array_search($token, $this->tokenSet, true)) {
unset($this->tokenSet[$key]);
$this->_length--;
$changed = true;
}
}
if ($changed) {
$this->tokenSet = array_values($this->tokenSet);
}
# 3. Run the update steps.
$this->update();
}
public function replace(string $token, string $newToken): bool {
# 1. If either token or newToken is the empty string, then throw a "SyntaxError"
# DOMException.
if ($token === '' || $newToken === '') {
throw new DOMException(DOMException::SYNTAX_ERROR);
}
# 2. If either token or newToken contains any ASCII whitespace, then throw an
# "InvalidCharacterError" DOMException.
if (preg_match(Data::WHITESPACE_REGEX, $token) || preg_match(Data::WHITESPACE_REGEX, $newToken)) {
throw new DOMException(DOMException::INVALID_CHARACTER);
}
// The spec does not say to trim, but browsers do.
$token = trim($token);
$newToken = trim($newToken);
# 3. If this’s token set does not contain token, then return false.
if (!($key = array_search($token, $this->tokenSet))) {
return false;
}
# 4. Replace token in this’s token set with newToken.
$this->tokenSet[$key] = $newToken;
# 5. Run the update steps.
$this->update();
# 6. Return true.
return true;
}
public function supports(string $token): bool {
# 1. Let result be the return value of validation steps called with token.
# 2. Return result.
#
# A DOMTokenList object’s validation steps for a given token are:
#
# 1. If the associated attribute’s local name does not define supported tokens,
# throw a TypeError.
# 2. Let lowercase token be a copy of token, in ASCII lowercase.
# 3. If lowercase token is present in supported tokens, return true.
# 4. Return false.
// This class is presently only used for Element::classList, and it supports any
// valid class name as a token. So, there's nothing to do here at the moment.
// Just return true.
return true;
}
public function toggle(string $token, ?bool $force = null): bool {
# 1. If token is the empty string, then throw a "SyntaxError" DOMException.
if ($token === '') {
throw new DOMException(DOMException::SYNTAX_ERROR);
}
# 2. If token contains any ASCII whitespace, then throw an
# "InvalidCharacterError" DOMException.
if (preg_match(Data::WHITESPACE_REGEX, $token)) {
throw new DOMException(DOMException::INVALID_CHARACTER);
}
# 3. If this’s token set[token] exists, then:
if (in_array($token, $this->tokenSet)) {
# 1. If force is either not given or is false, then remove token from this’s
# token set, run the update steps and return false.
if (!$force) {
$this->remove($token);
return false;
}
# 2. Return true.
return true;
}
# 4. Otherwise, if force not given or is true, append token to this’s token set,
# run the update steps, and return true.
elseif ($force === null || $force === true) {
$this->add($token);
return true;
}
# 5. Return false.
return false;
}
public function valid() {
return array_key_exists($this->position, $this->tokenSet);
}
protected function attributeChange(string $localName, ?string $oldValue = null, ?string $value = null, ?string $namespace = null) {
# A DOMTokenList object has these attribute change steps for its associated
# element:
#
# 1. If localName is associated attribute’s local name, namespace is null, and
# value is null, then empty token set.
if ($localName === $this->localName && $namespace === null && $value === null) {
$this->tokenSet = [];
$this->_length = 0;
}
# 2. Otherwise, if localName is associated attribute’s local name, namespace is
# null, then set token set to value, parsed.
elseif ($localName === $this->localName && $namespace === null) {
$this->tokenSet = $this->parseOrderedSet($value);
$this->_length = count($this->tokenSet);
}
}
protected function parseOrderedSet(string $input) {
if ($input === '') {
return [];
}
# The ordered set parser takes a string input and then runs these steps:
#
# 1. Let inputTokens be the result of splitting input on ASCII whitespace.
// There isn't a Set object in php, so make sure all the tokens are unique.
$inputTokens = array_unique(preg_split(Data::WHITESPACE_REGEX, $input));
# 2. Let tokens be a new ordered set.
# 3. For each token in inputTokens, append token to tokens.
# 4. Return tokens.
// There isn't a Set object in php, so just return the uniqued input tokens.
return $inputTokens;
}
protected function update() {
# A DOMTokenList object’s update steps are:
#
# 1. If the associated element does not have an associated attribute and token
# set is empty, then return.
// Not sure what this is about. This class is constructed with a provided
// associated element and attribute; there is no need to do this.
# 2. Set an attribute value for the associated element using associated
# attribute’s local name and the result of running the ordered set serializer
# for token set.
$element = $this->element->get();
$class = $element->ownerDocument->createAttribute($this->localName);
$class->value = $this->__toString();
$element->setAttributeNode($class);
}
public function __toString(): string {
# The ordered set serializer takes a set and returns the concatenation of set
# using U+0020 SPACE.
return implode(' ', $this->tokenSet);
}
}

269
lib/traits/ChildNode.php

@ -1,269 +0,0 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
# 4.2.8. Mixin ChildNode
trait ChildNode {
public function after(...$nodes): void {
// After exists in PHP DOM, but it can insert incorrect nodes because of PHP
// DOM's incorrect (for HTML) pre-insertion validation.
// PHP's declaration for \DOMCharacterData::after doesn't include the
// Node|string typing for the nodes that it should, so type checking will
// need to be done manually.
foreach ($nodes as $node) {
if (!$node instanceof Node && !is_string($node)) {
$type = gettype($node);
if ($type === 'object') {
$type = get_class($node);
}
throw new Exception(Exception::ARGUMENT_TYPE_ERROR, 1, 'nodes', 'Node|string', $type);
}
}
# The after(nodes) method steps are:
#
# 1. Let parent be this’s parent.
$parent = $this->parentNode;
# 2. If parent is null, then return.
if ($parent === null) {
return;
}
# 3. Let viableNextSibling be this’s first following sibling not in nodes;
# otherwise null.
$n = $this;
$viableNextSibling = null;
while ($n = $n->nextSibling) {
foreach ($nodes as $nodeOrString) {
if ($nodeOrString instanceof Node && $nodeOrString === $n) {
continue 2;
}
}
$viableNextSibling = $n;
break;
}
# 4. Let node be the result of converting nodes into a node, given nodes and this’s
# node document.
$node = $this->convertNodesToNode($nodes);
# 5. Pre-insert node into parent before viableNextSibling.
$parent->insertBefore($node, $viableNextSibling);
}
public function before(...$nodes): void {
// Before exists in PHP DOM, but it can insert incorrect nodes because of PHP
// DOM's incorrect (for HTML) pre-insertion validation.
// PHP's declaration for \DOMCharacterData::before doesn't include the
// Node|string typing for the nodes that it should, so type checking will
// need to be done manually.
foreach ($nodes as $node) {
if (!$node instanceof Node && !is_string($node)) {
$type = gettype($node);
if ($type === 'object') {
$type = get_class($node);
}
throw new Exception(Exception::ARGUMENT_TYPE_ERROR, 1, 'nodes', 'Node|string', $type);
}
}
# The before(nodes) method steps are:
#
# 1. Let parent be this’s parent.
$parent = $this->parentNode;
# 2. If parent is null, then return.
if ($parent === null) {
return;
}
# 3. Let viablePreviousSibling be this’s first preceding sibling not in nodes;
# otherwise null.
$n = $this;
$viablePreviousSibling = null;
while ($n = $n->previousSibling) {
foreach ($nodes as $nodeOrString) {
if ($nodeOrString instanceof Node && $nodeOrString === $n) {
continue 2;
}
}
$viablePreviousSibling = $n;
break;
}
# 4. Let node be the result of converting nodes into a node, given nodes and
# this’s node document.
$node = $this->convertNodesToNode($nodes);
# 5. If viablePreviousSibling is null, then set it to parent’s first child;
# otherwise to viablePreviousSibling’s next sibling.
$viablePreviousSibling = ($viablePreviousSibling === null) ? $parent->firstChild : $viablePreviousSibling->nextSibling;
# 6. Pre-insert node into parent before viablePreviousSibling.
$parent->insertBefore($node, $viablePreviousSibling);
}
/**
* Generator which walks backwards through the DOM from the node the method is
* being run on. Nonstandard.
*
* @param ?\Closure $filter - An optional callback function used to filter; if not provided the generator will
* just yield every node.
* @param bool $includeReferenceNode - An optional boolean flag which if true includes the reference node ($this) in
* the iteration.
*/
public function moonwalk(?\Closure $filter = null, bool $includeReferenceNode = false): \Generator {
$node = $this->parentNode;
if ($node !== null) {
do {
$next = $node->parentNode;
$result = ($filter === null) ? true : $filter($node);
// Have to do type checking here because PHP is lacking in advanced typing
if ($result !== true && $result !== false && $result !== null) {
$type = gettype($result);
if ($type === 'object') {
$type = get_class($result);
}
throw new Exception(Exception::CLOSURE_RETURN_TYPE_ERROR, '?bool', $type);
}
if ($result === true) {
yield $node;
}
if ($node instanceof DocumentFragment) {
$host = $node->host;
if ($host !== null) {
$next = $host;
}
}
} while ($node = $next);
}
}
public function replaceWith(...$nodes): void {
// Before exists in PHP DOM, but it can insert incorrect nodes because of PHP
// DOM's incorrect (for HTML) pre-insertion validation.
// PHP's declaration for \DOMCharacterData::replaceWith doesn't include the
// Node|string typing for the nodes that it should, so type checking will
// need to be done manually.
foreach ($nodes as $node) {
if (!$node instanceof Node && !is_string($node)) {
$type = gettype($node);
if ($type === 'object') {
$type = get_class($node);
}
throw new Exception(Exception::ARGUMENT_TYPE_ERROR, 1, 'nodes', 'Node|string', $type);
}
}
# The replaceWith(nodes) method steps are:
#
# 1. Let parent be this’s parent.
$parent = $this->parentNode;
# 2. If parent is null, then return.
if ($parent === null) {
return;
}
# 3. Let viableNextSibling be this’s first following sibling not in nodes;
# otherwise null.
$n = $this;
$viableNextSibling = null;
while ($n = $n->nextSibling) {
foreach ($nodes as $nodeOrString) {
if ($nodeOrString instanceof Node && $nodeOrString === $n) {
continue 2;
}
}
$viableNextSibling = $n;
break;
}
# 4. Let node be the result of converting nodes into a node, given nodes and
# this’s node document.
$node = $this->convertNodesToNode($nodes);
# 5. If this’s parent is parent, replace this with node within parent.
# Note: This could have been inserted into node.
if ($this->parentNode === $parent) {
$parent->replaceChild($node, $this);
}
# 6. Otherwise, pre-insert node into parent before viableNextSibling.
else {
$parent->insertBefore($node, $viableNextSibling);
}
}
/**
* Generator which walks forwards through an element's siblings. Nonstandard.
*
* @param ?\Closure $filter - An optional callback function used to filter; if not provided the generator will
* just yield every node.
* @param bool $includeReferenceNode - An optional boolean flag which if true includes the reference node ($this) in
* the iteration.
*/
public function walkFollowing(?\Closure $filter = null, bool $includeReferenceNode = false): \Generator {
$node = ($includeReferenceNode) ? $this : $this->nextSibling;
if ($node !== null) {
do {
$next = $node->nextSibling;
$result = ($filter === null) ? true : $filter($node);
// Have to do type checking here because PHP is lacking in advanced typing
if ($result !== true && $result !== false && $result !== null) {
$type = gettype($result);
if ($type === 'object') {
$type = get_class($result);
}
throw new Exception(Exception::CLOSURE_RETURN_TYPE_ERROR, '?bool', $type);
}
if ($result === true) {
yield $node;
}
} while ($node = $next);
}
}
/**
* Generator which walks backwards through an element's siblings. Nonstandard.
*
* @param ?\Closure $filter - An optional callback function used to filter; if not provided the generator will
* just yield every node.
* @param bool $includeReferenceNode - An optional boolean flag which if true includes the reference node ($this) in
* the iteration.
*/
public function walkPreceding(?\Closure $filter = null, bool $includeReferenceNode = false): \Generator {
$node = ($includeReferenceNode) ? $this : $this->previousSibling;
if ($node !== null) {
do {
$next = $node->previousSibling;
$result = ($filter === null) ? true : $filter($node);
// Have to do type checking here because PHP is lacking in advanced typing
if ($result !== true && $result !== false && $result !== null) {
$type = gettype($result);
if ($type === 'object') {
$type = get_class($result);
}
throw new Exception(Exception::CLOSURE_RETURN_TYPE_ERROR, '?bool', $type);
}
if ($result === true) {
yield $node;
}
} while ($node = $next);
}
}
}

151
lib/traits/DocumentOrElement.php

@ -1,151 +0,0 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
use MensBeam\HTML\Parser;
use MensBeam\HTML\Parser\{
Data,
NameCoercion
};
/**
* Not in standard. Exists so Document and Element can share some properties and
* methods. For instance, getElementsByClassName is mentioned in the standard in
* both the Document and Element interfaces.
*/
trait DocumentOrElement {
use NameCoercion;
// Traits can't have constants, so statics are the next best thing
// Regex used to validate names when creating elements and attributes.
protected static string $nameProductionRegex = '/^[:A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}][:A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}-\.0-9\x{B7}\x{0300}-\x{036F}\x{203F}-\x{2040}]*$/Su';
public function getElementsByClassName(string $classNames): \DOMNodeList {
# The list of elements with class names classNames for a node root is the
# HTMLCollection returned by the following algorithm:
// DEVIATION: There's no HTMLCollection. The result will be a DOMNodeList
// instead. It is, fortunately, almost exactly the same thing anyway.
# 1. Let classes be the result of running the ordered set parser on classNames.
#
## The ordered set parser takes a string input and then runs these steps:
##
## 1. Let inputTokens be the result of splitting input on ASCII whitespace.
// There isn't a Set object in php, so make sure all the tokens are unique.
$inputTokens = ($classNames !== '') ? array_unique(preg_split(Data::WHITESPACE_REGEX, $classNames)) : [];
$isDocument = ($this instanceof Document);
$document = ($isDocument) ? $this : $this->ownerDocument;
## 2. Let tokens be a new ordered set.
## 3. For each token in inputTokens, append token to tokens.
## 4. Return tokens.
// There isn't a Set object in php, so just use the uniqued input tokens.
# 2. If classes is the empty set, return an empty HTMLCollection.
// DEVIATION: We can't do that, so let's create a bogus Xpath query instead.
if ($inputTokens === []) {
$ook = $document->createElement('ook');
$query = $document->xpath->query('//eek', $ook);
unset($ook);
return $query;
}
# 3. Return a HTMLCollection rooted at root, whose filter matches descendant
# elements that have all their classes in classes.
#
# The comparisons for the classes must be done in an ASCII case-insensitive manner
# if root’s node document’s mode is "quirks"; otherwise in an identical to manner.
// DEVIATION: Since we can't just create a NodeList we must instead query
// the document with XPath with the root element to get a list.
$query = '//*';
foreach ($inputTokens as $token) {
$query .= "[@class=\"$token\"]";
}
return $document->xpath->query($query, $this);
}
protected function escapeString(string $string, bool $attribute = false): string {
# Escaping a string (for the purposes of the algorithm above) consists of
# running the following steps:
# 1. Replace any occurrence of the "&" character by the string "&amp;".
# 2. Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the
# string "&nbsp;".
$string = str_replace(['&', "\u{A0}"], ['&amp;', '&nbsp;'], $string);
# 3. If the algorithm was invoked in the attribute mode, replace any
# occurrences of the """ character by the string "&quot;".
# 4. If the algorithm was not invoked in the attribute mode, replace any
# occurrences of the "<" character by the string "&lt;", and any
# occurrences of the ">" character by the string "&gt;".
return ($attribute) ? str_replace('"', '&quot;', $string) : str_replace(['<', '>'], ['&lt;', '&gt;'], $string);
}
protected function isHTMLNamespace(?Node $node = null): bool {
$node = $node ?? $this;
return ($node->namespaceURI === null || $node->namespaceURI === Parser::HTML_NAMESPACE);
}
protected function validateAndExtract(string $qualifiedName, ?string $namespace = null): array {
static $qNameProductionRegex = '/^([A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}][A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}-\.0-9\x{B7}\x{0300}-\x{036F}\x{203F}-\x{2040}]*:)?[A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}][A-Z_a-z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}-\.0-9\x{B7}\x{0300}-\x{036F}\x{203F}-\x{2040}]*$/Su';
# To validate and extract a namespace and qualifiedName, run these steps:
# 1. If namespace is the empty string, set it to null.
if ($namespace === '') {
$namespace = null;
}
# 2. Validate qualifiedName.
# To validate a qualifiedName, throw an "InvalidCharacterError" DOMException if
# qualifiedName does not match the QName production.
if (preg_match($qNameProductionRegex, $qualifiedName) !== 1) {
throw new DOMException(DOMException::INVALID_CHARACTER);
}
# 3. Let prefix be null.
$prefix = null;
# 4. Let localName be qualifiedName.
$localName = $qualifiedName;
# 5. If qualifiedName contains a ":" (U+003E), then split the string on it and
# set prefix to the part before and localName to the part after.
if (strpos($qualifiedName, ':') !== false) {
$temp = explode(':', $qualifiedName, 2);
$prefix = $temp[0];
$prefix = ($prefix !== '') ? $prefix : null;
$localName = $temp[1];
}
# 6. If prefix is non-null and namespace is null, then throw a "NamespaceError" DOMException.
# 7. If prefix is "xml" and namespace is not the XML namespace, then throw a "NamespaceError" DOMException.
# 8. If either qualifiedName or prefix is "xmlns" and namespace is not the XMLNS
# namespace, then throw a "NamespaceError" DOMException.
# 9. If namespace is the XMLNS namespace and neither qualifiedName nor prefix is
# "xmlns", then throw a "NamespaceError" DOMException.
if (
($prefix !== null && $namespace === null) ||
($prefix === 'xml' && $namespace !== Parser::XML_NAMESPACE) ||
(($qualifiedName === 'xmlns' || $prefix === 'xmlns') && $namespace !== Parser::XMLNS_NAMESPACE) ||
($namespace === Parser::XMLNS_NAMESPACE && $qualifiedName !== 'xmlns' && $prefix !== 'xmlns')
) {
throw new DOMException(DOMException::NAMESPACE_ERROR);
}
# 10. Return namespace, prefix, and localName.
return [
'namespace' => $namespace,
'prefix' => $prefix,
'localName' => $localName
];
}
}

33
lib/traits/LeafNode.php

@ -1,33 +0,0 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
/**
* Not in standard. Exists so all node types that cannot contain children will have
* the insertion methods disabled.
*/
trait LeafNode {
use NodeTrait;
public function appendChild($node) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
public function insertBefore($node, $child = null) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
public function removeChild($child) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
public function replaceChild($node, $child) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}

272
lib/traits/NodeTrait.php

@ -1,272 +0,0 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
trait NodeTrait {
private static ?int $rand = null;
// Disable C14N
public function C14N($exclusive = null, $with_comments = null, ?array $xpath = null, ?array $ns_prefixes = null): bool {
throw new Exception(Exception::DISABLED_METHOD, __METHOD__, 'It is meant for XML and buggy; use Document::saveHTML or cast to a string');
}
// Disable C14NFile
public function C14NFile($uri, $exclusive = null, $with_comments = null, ?array $xpath = null, ?array $ns_prefixes = null): bool {
throw new Exception(Exception::DISABLED_METHOD, __METHOD__, 'It is meant for XML and buggy; use Document::saveHTML or cast to a string');
}
public function compareDocumentPosition(Node $other): int {
# The compareDocumentPosition(other) method steps are:
#
# 1. If this is other, then return zero.
if ($this === $other) {
return 0;
}
# 2. Let node1 be other and node2 be this.
$node1 = $other;
$node2 = $this;
# 3. Let attr1 and attr2 be null.
$attr1 = $attr2 = null;
# 4. If node1 is an attribute, then set attr1 to node1 and node1 to attr1’s
# element.
if ($node1 instanceof Attr) {
$attr1 = $node1;
$node1 = $attr1->ownerElement;
}
# 5. If node2 is an attribute, then:
if ($node2 instanceof Attr) {
# 1. Set attr2 to node2 and node2 to attr2’s element.
$attr2 = $node2;
$node2 = $attr2->ownerElement;
# 2. If attr1 and node1 are non-null, and node2 is node1, then:
if ($attr1 !== null && $node1 !== null && $node2 === $node1) {
# 1. For each attr in node2’s attribute list:
foreach ($node2->attributes as $attr) {
# 1. If attr equals attr1, then return the result of adding DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC and DOCUMENT_POSITION_PRECEDING.
if ($attr === $attr1) {
return Node::DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC + Node::DOCUMENT_POSITION_PRECEDING;
}
# 2. If attr equals attr2, then return the result of adding DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC and DOCUMENT_POSITION_FOLLOWING.
if ($attr === $attr2) {
return Node::DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC + Node::DOCUMENT_POSITION_FOLLOWING;
}
}
}
}
# 6. If node1 or node2 is null, or node1’s root is not node2’s root, then return the
# result of adding DOCUMENT_POSITION_DISCONNECTED,
# DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC, and either
# DOCUMENT_POSITION_PRECEDING or DOCUMENT_POSITION_FOLLOWING, with the constraint
# that this is to be consistent, together.
#
# NOTE: Whether to return DOCUMENT_POSITION_PRECEDING or
# DOCUMENT_POSITION_FOLLOWING is typically implemented via pointer comparison.
# In JavaScript implementations a cached Math.random() value can be used.
if (self::$rand === null) {
self::$rand = rand(0, 1);
}
if ($node1 === null || $node2 === null || $node1->getRootNode() !== $node2->getRootNode()) {
return Node::DOCUMENT_POSITION_DISCONNECTED + Node::DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC + ((self::$rand === 0) ? Node::DOCUMENT_POSITION_PRECEDING : Node::DOCUMENT_POSITION_FOLLOWING);
}
# 7. If node1 is an ancestor of node2 and attr1 is null, or node1 is node2 and attr2
# is non-null, then return the result of adding DOCUMENT_POSITION_CONTAINS to
# DOCUMENT_POSITION_PRECEDING.
if (($node1 === $node2 && $attr2 !== null) || ($attr1 === null && $node2->moonwalk(function($n) use($node1) {
return ($n === $node1);
})->current() !== null)) {
return Node::DOCUMENT_POSITION_CONTAINS + Node::DOCUMENT_POSITION_PRECEDING;
}
# 8. If node1 is a descendant of node2 and attr2 is null, or node1 is node2 and attr1
# is non-null, then return the result of adding DOCUMENT_POSITION_CONTAINED_BY to
# DOCUMENT_POSITION_FOLLOWING.
if (($node1 === $node2 && $attr1 !== null) || ($attr2 === null && $node2->walk(function($n) use($node1) {
return ($n === $node1);
})->current() !== null)) {
return Node::DOCUMENT_POSITION_CONTAINED_BY + Node::DOCUMENT_POSITION_FOLLOWING;
}
# 9. If node1 is preceding node2, then return DOCUMENT_POSITION_PRECEDING.
if ($node2->walkPreceding(function($n) use($node1) {
return ($n === $node1);
})->current() !== null) {
return Node::DOCUMENT_POSITION_PRECEDING;
}
# 10. Return DOCUMENT_POSITION_FOLLOWING.
return Node::DOCUMENT_POSITION_FOLLOWING;
}
public function contains(\DOMDocumentType|Node|null $other): bool {
# The contains(other) method steps are to return true if other is an inclusive
# descendant of this; otherwise false (including when other is null).
// The spec is remarkably vague about this method, so I'm going to do some
// additional time saving checks.
if ($other === null || $other->parentNode === null || $other instanceof Attr || $other instanceof Document || $other instanceof DocumentFragment || (!$this instanceof Document && !$this instanceof DocumentFragment && !$this instanceof Element)) {
return false;
}
$thisDoc = ($this instanceof Document) ? $this : $this->ownerDocument;
if ($thisDoc !== $other->ownerDocument) {
return false;
}
return ($this->walk(function($n) use($other) {
return ($n === $other);
})->current() !== null);
}
public function isEqualNode(\DOMDocumentType|Node $otherNode): bool {
# The isEqualNode(otherNode) method steps are to return true if otherNode is
# non-null and this equals otherNode; otherwise false.
# A node A equals a node B if all of the following conditions are true:
#
# • A and B implement the same interfaces.
if ($this::class !== $otherNode::class) {
return false;
}
# • The following are equal, switching on the interface A implements:
$thisClass = substr($this::class, strrpos($this::class, '\\') + 1);
switch ($thisClass) {
# - DocumentType
# Its name, public ID, and system ID.
// DEVIATION: $this can never be a \DOMDocumentType seeing as we we cannot extend
// \DOMDocumentType, so there is no need to check for it.
# - Element
# Its namespace, namespace prefix, local name, and its attribute list’s size.
// PCOV is stupid
// @codeCoverageIgnoreStart
case 'Element':
// @codeCoverageIgnoreEnd
if ($this->namespaceURI !== $otherNode->namespaceURI || $this->prefix !== $otherNode->prefix || $this->localName !== $otherNode->localName || $this->attributes->length !== $otherNode->attributes->length) {
return false;
}
# • If A is an element, each attribute in its attribute list has an attribute that
# equals an attribute in B’s attribute list.
foreach ($this->attributes as $key => $attr) {
if (!$attr->isEqualNode($otherNode->attributes[$key])) {
return false;
}
}
break;
# - Attr
# Its namespace, local name, and value.
// PCOV is stupid
// @codeCoverageIgnoreStart
case 'Attr':
// @codeCoverageIgnoreEnd
if ($this->namespaceURI !== $otherNode->namespaceURI || $this->localName !== $otherNode->localName || $this->value !== $otherNode->value) {
return false;
}
break;
# - Text
# - Comment
# Its data.
// PCOV is stupid
// @codeCoverageIgnoreStart
case 'Text':
case 'Comment':
// @codeCoverageIgnoreEnd
if ($this->data !== $otherNode->data) {
return false;
}
break;
}
if ($this instanceof Document || $this instanceof DocumentFragment || $this instanceof Element) {
# • A and B have the same number of children.
if ($this->childNodes->length !== $otherNode->childNodes->length) {
return false;
}
# • Each child of A equals the child of B at the identical index.
foreach ($this->childNodes as $key => $child) {
// Have to work around the fact we cannot extend \DOMDocumentType
if (!$child instanceof \DOMDocumentType) {
if (!$child->isEqualNode($otherNode->childNodes[$key])) {
return false;
}
} else {
$other = $otherNode->childNodes[$key];
if ($child->name !== $other->name || $child->publicId !== $other->publicId || $child->systemId !== $other->systemId) {
return false;
}
}
}
}
return true;
}
// Disable getLineNo
public function getLineNo(): int {
throw new Exception(Exception::DISABLED_METHOD, __METHOD__, 'It is meant for XML and buggy; use Document::saveHTML or cast to a string');
}
public function getRootNode(): ?Node {
# The getRootNode(options) method steps are to return this’s shadow-including
# root if options["composed"] is true; otherwise this’s root.
// DEVIATION: This implementation does not have scripting, so there's no Shadow
// DOM. Therefore, there isn't a need for the options parameter.
# The root of an object is itself, if its parent is null, or else it is the root
# of its parent. The root of a tree is any object participating in that tree
# whose parent is null.
if ($this->parentNode === null) {
return $this;
}
return $this->moonwalk(function($n) {
return ($n->parentNode === null);
})->current();
}
private function convertNodesToNode(array $nodes): Node {
# To convert nodes into a node, given nodes and document, run these steps:
# 1. Let node be null.
# 2. Replace each string in nodes with a new Text node whose data is the string
# and node document is document.
# 3. If nodes contains one node, then set node to nodes[0].
# 4. Otherwise, set node to a new DocumentFragment node whose node document is
# document, and then append each node in nodes, if any, to it.
// The spec would have us iterate through the provided nodes and then iterate
// through them again to append. Let's optimize this a wee bit, shall we?
$document = ($this instanceof Document) ? $this : $this->ownerDocument;
$node = (count($nodes) > 1) ? $document->createDocumentFragment() : null;
foreach ($nodes as $k => &$n) {
if (is_string($n)) {
$n = $this->ownerDocument->createTextNode($n);
}
if ($node !== null) {
$node->appendChild($n);
} else {
$node = $n;
}
}
return $node;
}
}

335
lib/traits/ParentNode.php

@ -1,335 +0,0 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
use Symfony\Component\CssSelector\CssSelectorConverter,
Symfony\Component\CssSelector\Exception\SyntaxErrorException as SymfonySyntaxErrorException;
# 4.2.6. Mixin ParentNode
trait ParentNode {
use NodeTrait;
protected function __get_children(): \DOMNodeList {
# The children getter steps are to return an HTMLCollection collection rooted at
# this matching only element children.
// DEVIATION: HTMLCollection doesn't exist in PHP's DOM, and NodeList is
// almost identical; so, using that. PHP's DOM doesn't provide the end user any
// way to create a NodeList from scratch, so going to cheat and use XPath to
// make one for us.
$document = ($this instanceof Document) ? $this : $this->ownerDocument;
return $document->xpath->query('child::*', $this);
}
public function appendChild($node) {
$this->preInsertionValidity($node);
$result = parent::appendChild($node);
if ($result !== false && $node instanceof HTMLTemplateElement) {
ElementMap::add($node);
}
return $node;
}
public function insertBefore($node, $child = null) {
$this->preInsertionValidity($node, $child);
$result = parent::insertBefore($node, $child);
if ($result !== false && $node instanceof HTMLTemplateElement) {
ElementMap::add($node);
}
return $node;
}
public function querySelector(string $selectors): ?Element {
# The querySelector(selectors) method steps are to return the first result of
# running scope-match a selectors string selectors against this, if the result
# is not an empty list; otherwise null.
$result = $this->scopeMatchSelector($selectors);
return ($result !== null) ? $result[0] : null;
}
public function querySelectorAll(string $selectors): NodeList {
# The querySelectorAll(selectors) method steps are to return the static result
# of running scope-match a selectors string selectors against this.
$nodeList = $this->scopeMatchSelector($selectors);
return new NodeList($nodeList);
}
public function removeChild($child) {
$result = parent::removeChild($child);
if ($result !== false && $child instanceof Element) {
ElementMap::delete($child);
}
return $child;
}
public function replaceChild($node, $child) {
$result = parent::replaceChild($node, $child);
if ($result !== false) {
if ($node instanceof HTMLTemplateElement) {
ElementMap::add($node);
}
if ($child instanceof Element) {
ElementMap::delete($child);
}
}
return $node;
}
public function replaceChildren(Node|string ...$nodes) {
# The replaceChildren(nodes) method steps are:
# 1. Let node be the result of converting nodes into a node given nodes and
# this’s node document.
$node = $this->convertNodesToNode($nodes);
# 2. Ensure pre-insertion validity of node into this before null.
$this->preInsertionValidity($node);
# 3. Replace all with node within this.
#
# To replace all with a node within a parent, run these steps:
# 1. Let removedNodes be parent’s children.
$removedNodes = $this->childNodes;
# 2. Let addedNodes be the empty set.
$addedNodes = [];
# 3. If node is a DocumentFragment node, then set addedNodes to node’s children.
if ($node instanceof DocumentFragment) {
$addedNodes = $node->childNodes;
}
# 4. Otherwise, if node is non-null, set addedNodes to « node ».
elseif ($node !== null) {
$addedNodes = $node;
}
# 5. Remove all parent’s children, in tree order, with the suppress observers
# flag set.
// DEVIATION: There is no scripting in this implementation, so cannnot set
// suppress observers flag.
while ($this->hasChildNodes()) {
$this->removeChild($this->firstChild);
}
# 6. If node is non-null, then insert node into parent before null with the
# suppress observers flag set.
// DEVIATION: There is no scripting in this implementation, so cannnot set
// suppress observers flag.
if ($node !== null) {
$this->appendChild($node);
}
# 7. If either addedNodes or removedNodes is not empty, then queue a tree
# mutation record for parent with addedNodes, removedNodes, null, and null.
// DEVIATION: There is no scripting in this implementation
}
/**
* Generator which walks down the DOM from the node the method is being run on.
* Nonstandard.
*
* @param ?\Closure $filter - An optional callback function used to filter; if not provided the generator will
* just yield every node.
* @param bool $includeReferenceNode - An optional boolean flag which if true includes the reference node ($this) in
* the iteration.
*/
public function walk(?\Closure $filter = null, bool $includeReferenceNode = false): \Generator {
$node = ($includeReferenceNode && !$this instanceof DocumentFragment) ? $this : $this->firstChild;
if ($node !== null) {
do {
$next = $node->nextSibling;
$result = ($filter === null) ? true : $filter($node);
// Have to do type checking here because PHP is lacking in advanced typing
if ($result !== true && $result !== false && $result !== null) {
$type = gettype($result);
if ($type === 'object') {
$type = get_class($result);
}
throw new Exception(Exception::CLOSURE_RETURN_TYPE_ERROR, '?bool', $type);
}
if ($result === true) {
yield $node;
}
// If the filter returns true (accept) or false (skip) and the node wasn't
// removed in the filter iterate through the children
if ($result !== null && $node->parentNode !== null) {
if ($node instanceof HTMLTemplateElement) {
$node = $node->content;
}
if ($node->hasChildNodes()) {
yield from $node->walk($filter);
}
}
} while ($node = $next);
}
}
private function preInsertionValidity(\DOMDocumentType|Node $node, \DOMDocumentType|Node $child = null) {
// "parent" in the spec comments below is $this
# 1. If parent is not a Document, DocumentFragment, or Element node, then throw
# a "HierarchyRequestError" DOMException.
// Not necessary because they've been disabled and return hierarchy request
// errors in Node trait.
# 2. If node is a host-including inclusive ancestor of parent, then throw a
# "HierarchyRequestError" DOMException.
#
# An object A is a host-including inclusive ancestor of an object B, if either
# A is an inclusive ancestor of B, or if B’s root has a non-null host and A is a
# host-including inclusive ancestor of B’s root’s host.
if ($node->parentNode !== null) {
if ($this->parentNode !== null && ($this === $node || $this->moonwalk(function($n) use($node) {
return ($n === $node);
})->current() !== null)) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
} else {
$parentRoot = $this->getRootNode();
if ($parentRoot instanceof DocumentFragment) {
$parentRootHost = $parentRoot->host;
if ($parentRootHost !== null && ($parentRootHost === $node || $parentRootHost->moonwalk(function($n) use ($node) {
return ($n === $node);
})->current() !== null)) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
}
}
# 3. If child is non-null and its parent is not parent, then throw a
# "NotFoundError" DOMException.
if ($child !== null && ($child->parentNode === null || $child->parentNode !== $this)) {
throw new DOMException(DOMException::NOT_FOUND);
}
# 4. If node is not a DocumentFragment, DocumentType, Element, Text,
# ProcessingInstruction, or Comment node, then throw a "HierarchyRequestError"
# DOMException.
if (!$node instanceof DocumentFragment && !$node instanceof \DOMDocumentType && !$node instanceof Element && !$node instanceof Text && !$node instanceof ProcessingInstruction && !$node instanceof Comment) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
# 5. If either node is a Text node and parent is a document, or node is a
# doctype and parent is not a document, then throw a "HierarchyRequestError"
# DOMException.
if (($node instanceof Text && $this instanceof Document) || ($node instanceof \DOMDocumentType && !$this instanceof Document)) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
# 6. If parent is a document, and any of the statements below, switched on the
# interface node implements, are true, then throw a "HierarchyRequestError".
if ($this instanceof Document) {
# DocumentFragment node
# If node has more than one element child or has a Text node child.
# Otherwise, if node has one element child and either parent has an element
# child, child is a doctype, or child is non-null and a doctype is following
# child.
if ($node instanceof DocumentFragment) {
$nodeChildElementCount = $node->childElementCount;
if ($nodeChildElementCount > 1 || $node->firstChild->walkFollowing(function($n) {
return ($n instanceof Text);
}, true)->current() !== null) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
} elseif ($nodeChildElementCount === 1) {
if ($this->childElementCount > 0 || $child instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
if ($child !== null) {
$n = $child;
while ($n = $n->nextSibling) {
if ($n instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
}
}
}
# element
# parent has an element child, child is a doctype, or child is non-null and a
# doctype is following child.
elseif ($node instanceof Element) {
if ($child instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
if ($child !== null) {
$n = $child;
while ($n = $n->nextSibling) {
if ($n instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
}
foreach ($this->childNodes as $c) {
if ($c instanceof Element) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
}
# doctype
# parent has a doctype child, child is non-null and an element is preceding
# child, or child is null and parent has an element child.
elseif ($node instanceof \DOMDocumentType) {
foreach ($this->childNodes as $c) {
if ($c instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
if ($child !== null) {
$n = $child;
while ($n = $n->previousSibling) {
if ($n instanceof Element) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
} else {
foreach ($this->childNodes as $c) {
if ($c instanceof Element) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
}
}
}
}
private function scopeMatchSelector(string $selectors): ?\DOMNodeList {
# To scope-match a selectors string selectors against a node, run these steps:
# 1. Let s be the result of parse a selector selectors. [SELECTORS4]
// This implementation will instead convert the CSS selector to an XPath query
// using Symfony's CSS selector converter library.
try {
$converter = new CssSelectorConverter();
$s = $converter->toXPath($selectors);
} catch (\Exception $e) {
# 2. If s is failure, then throw a "SyntaxError" DOMException.
// Symfony's library will throw an exception if something is unsupported, too,
// so only throw exception when an actual syntax error, otherwise return null.
if ($e instanceof SymfonySyntaxErrorException) {
throw new DOMException(DOMException::SYNTAX_ERROR);
}
return null;
}
# 3. Return the result of match a selector against a tree with s and node’s root
# using scoping root node. [SELECTORS4].
$doc = ($this instanceof Document) ? $this : $this->ownerDocument;
$nodeList = $doc->xpath->query($s, $this);
if ($nodeList->length === 0) {
return null;
}
return $nodeList;
}
}

18
lib/traits/ToString.php

@ -1,18 +0,0 @@
<?php
/**
* @license MIT
* Copyright 2017 Dustin Wilson, J. King, et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM;
trait ToString {
public function __toString(): string {
$frag = $this->ownerDocument->createDocumentFragment();
$frag->appendChild($this->cloneNode(true));
return $this->ownerDocument->saveHTML($frag);
}
}
Loading…
Cancel
Save