Changed ElementRegistry to ElementMap, destructors for ElementMap

This commit is contained in:
Dustin Wilson 2021-04-07 23:35:16 -05:00
parent dddb7601f4
commit 3a0ffafc7a
12 changed files with 231 additions and 194 deletions

View file

@ -46,60 +46,3 @@ This library and [masterminds/html5](https://packagist.org/packages/masterminds/
† With HTML namespace disabled. With HTML namespace enabled it does not finish in a reasonable time due to a PHP bug.
‡ With parse errors suppressed. Reporting parse errors adds approximately 10% overhead.
## Document Object Model ##
This library works by parsing HTML strings into PHP's existing XML DOM. It, however, has to force the antiquated PHP DOM extension into working properly with modern HTML DOM by extending many of the node types. The documentation below follows PHP's doc style guide with the exception of inherited methods and properties not being listed. Therefore, only new constants, properties, and methods will be listed; in addition, extended methods which change outward behavior from their parent class will be listed.
### MensBeam\\HTML\\Document ###
```php
MensBeam\HTML\Document extends \DOMDocument {
/* Constants */
public const NO_QUIRKS_MODE = 0;
public const QUIRKS_MODE = 1;
public const LIMITED_QUIRKS_MODE = 2;
/* Properties */
public string|null $documentEncoding = null;
public int $quirksMode = 0;
/* Methods */
public load ( string $filename , null $options = null , string|null $encodingOrContentType = null ) : bool
public loadHTML ( string $source , null $options = null , string|null $encodingOrContentType = null ) : bool
public loadHTMLFile ( string $filename , null $options = null , string|null $encodingOrContentType = null ) : bool
public loadXML ( string $source , null $options = null ) : false
public save ( string $filename , null $options = null ) : int|false
public saveXML ( DOMNode|null $node = null , null $options = null ) : false
public validate ( ) : true
public xinclude ( null $options = null ) : false
}
```
#### Properties ####
<dl>
<dt>documentEncoding</dt>
<dd>Encoding of the document, as specified when parsing or when determining encoding type.</dd>
<dt>quirksMode</dt>
<dd>Used when parsing. Can be not in quirks mode, quirks mode, or limited quirks mode. See the `MensBeam\HTML\Document` constants to see the valid values.</dd>
</dl>
The following properties inherited from `\DOMDocument` have no effect on `Mensbeam\HTML\Document`:
* actualEncoding
* config
* encoding
* formatOutput
* preserveWhiteSpace
* recover
* resolveExternals
* standalone
* substituteEntities
* validateOnParse
* version
* xmlEncoding
* xmlStandalone
* xmlVersion

View file

@ -7,7 +7,7 @@ declare(strict_types=1);
namespace MensBeam\HTML;
class Comment extends \DOMComment {
use C14N, Moonwalk;
use Moonwalk, Node;
public function __toString(): string {
# Append the literal string "<!--" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION

View file

@ -12,6 +12,7 @@ class DOMException extends \Exception {
const WRONG_DOCUMENT = 4;
const INVALID_CHARACTER = 5;
const NO_MODIFICATION_ALLOWED = 7;
const NOT_FOUND = 8;
const SYNTAX_ERROR = 12;
const DOCUMENT_ELEMENT_DOCUMENTFRAG_EXPECTED = 100;
@ -23,6 +24,7 @@ class DOMException extends \Exception {
4 => 'Supplied node does not belong to this document',
5 => 'Invalid character',
7 => 'Modification not allowed here',
8 => 'Not found error',
12 => 'Syntax error',
100 => 'Document, Element, or DocumentFragment expected; found %s',
101 => 'The "%s" argument should be a string; found %s',

View file

@ -7,7 +7,7 @@ declare(strict_types=1);
namespace MensBeam\HTML;
class Document extends \DOMDocument {
use C14N, EscapeString, Serialize, Walk;
use EscapeString, Node, Serialize, Walk;
// Quirks mode constants
public const NO_QUIRKS_MODE = 0;
@ -29,21 +29,6 @@ class Document extends \DOMDocument {
$this->registerNodeClass('DOMText', '\MensBeam\HTML\Text');
}
public function appendChild($node) {
# If node is not a DocumentFragment, DocumentType, Element, Text,
# ProcessingInstruction, or Comment node then throw a "HierarchyRequestError"
# DOMException.
if (!$node instanceof DocumentFragment && !$node instanceof \DOMDocumentType && !$node instanceof Element &&!$node instanceof Text && !$node instanceof ProcessingInstruction && !$node instanceof Comment) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
$result = parent::appendChild($node);
if ($result !== false && $result instanceof TemplateElement) {
ElementRegistry::set($result);
}
return $result;
}
public function createAttribute($name) {
return $this->createAttributeNS(null, $name);
}
@ -85,7 +70,7 @@ class Document extends \DOMDocument {
} else {
$e = new TemplateElement($this, $qualifiedName, $value);
// Template elements need to have a reference kept in userland
ElementRegistry::set($e);
ElementMap::set($e);
$e->content = $this->createDocumentFragment();
}
@ -108,26 +93,6 @@ class Document extends \DOMDocument {
return false;
}
public function insertBefore($node, $child = null) {
# If node is not a DocumentFragment, DocumentType, Element, Text,
# ProcessingInstruction, or Comment node then throw a "HierarchyRequestError"
# DOMException.
if (!$node instanceof DocumentFragment && !$node instanceof \DOMDocumentType && !$node instanceof Element &&!$node instanceof Text && !$node instanceof ProcessingInstruction && !$node instanceof Comment) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
$result = parent::insertBefore($node, $child);
if ($result !== false) {
if ($result instanceof TemplateElement) {
ElementRegistry::set($result);
}
if ($child instanceof TemplateElement) {
ElementRegistry::delete($child);
}
}
return $result;
}
public function load($filename, $options = null, ?string $encodingOrContentType = null): bool {
$data = Parser::fetchFile($filename, $encodingOrContentType);
if (!$data) {
@ -152,27 +117,6 @@ class Document extends \DOMDocument {
return false;
}
public function removeChild($child) {
$result = parent::removeChild($child);
if ($result !== false && $result instanceof TemplateElement) {
ElementRegistry::delete($child);
}
return $result;
}
public function replaceChild($node, $child) {
$result = parent::replaceChild($node, $child);
if ($result !== false) {
if ($result instanceof TemplateElement) {
ElementRegistry::set($child);
}
if ($child instanceof TemplateElement) {
ElementRegistry::delete($child);
}
}
return $result;
}
public function save($filename, $options = null) {
return file_put_contents($filename, $this->serialize());
}
@ -203,6 +147,10 @@ class Document extends \DOMDocument {
return false;
}
public function __destruct() {
ElementMap::destroy($this);
}
public function __toString() {
return $this->serialize();
}

View file

@ -7,7 +7,7 @@ declare(strict_types=1);
namespace MensBeam\HTML;
class DocumentFragment extends \DOMDocumentFragment {
use C14N, Moonwalk, Serialize;
use Moonwalk, Node, Serialize, Walk;
public function __toString() {
return $this->serialize();

View file

@ -7,25 +7,10 @@ declare(strict_types=1);
namespace MensBeam\HTML;
class Element extends \DOMElement {
use C14N, EscapeString, Moonwalk, Serialize, Walk;
use EscapeString, Moonwalk, Node, Serialize, Walk;
protected $_classList;
public function appendChild($node) {
# If node is not a DocumentFragment, DocumentType, Element, Text,
# ProcessingInstruction, or Comment node then throw a "HierarchyRequestError"
# DOMException.
if (!$node instanceof DocumentFragment && !$node instanceof \DOMDocumentType && !$node instanceof Element &&!$node instanceof Text && !$node instanceof ProcessingInstruction && !$node instanceof Comment) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
$result = parent::appendChild($node);
if ($result !== false && $result instanceof TemplateElement) {
ElementRegistry::set($result);
}
return $result;
}
public function getAttribute($name) {
// Newer versions of the DOM spec have getAttribute return an empty string only
// when the attribute exists and is empty, otherwise null. This fixes that.
@ -48,45 +33,19 @@ class Element extends \DOMElement {
return $value;
}
public function insertBefore($node, $child = null) {
# If node is not a DocumentFragment, DocumentType, Element, Text,
# ProcessingInstruction, or Comment node then throw a "HierarchyRequestError"
# DOMException.
if (!$node instanceof DocumentFragment && !$node instanceof \DOMDocumentType && !$node instanceof Element &&!$node instanceof Text && !$node instanceof ProcessingInstruction && !$node instanceof Comment) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
public function isAncestorOf(\DOMNode $node): bool {
# An inclusive ancestor is an object or one of its ancestors.
#
# An object A is called an ancestor of an object B if and only if B is a
# descendant of A.
// object A is $this, object B is $node
$tree = $this->walk(function($n) use($node) {
if ($n->isSameNode($node)) {
return true;
}
});
$result = parent::insertBefore($node, $child);
if ($result !== false) {
if ($result instanceof TemplateElement) {
ElementRegistry::set($result);
}
if ($child instanceof TemplateElement) {
ElementRegistry::delete($child);
}
}
return $result;
}
public function removeChild($child) {
$result = parent::removeChild($child);
if ($result !== false && $result instanceof TemplateElement) {
ElementRegistry::delete($child);
}
return $result;
}
public function replaceChild($node, $child) {
$result = parent::replaceChild($node, $child);
if ($result !== false) {
if ($result instanceof TemplateElement) {
ElementRegistry::set($child);
}
if ($child instanceof TemplateElement) {
ElementRegistry::delete($child);
}
}
return $result;
return ($tree->current() !== null);
}
public function setAttribute($name, $value) {

View file

@ -9,8 +9,8 @@ namespace MensBeam\HTML;
// This is a write-only map of elements which need to be kept in memory; it
// exists because values of properties on derived DOM classes are lost unless at
// least one PHP reference is kept for the element somewhere in userspace. This
// is that somewhere. It is at present only used for TemplateElements.
class ElementRegistry {
// is that somewhere. It is at present only used for template elements.
class ElementMap {
public static $_storage = [];
public static function delete(Element $element) {
@ -24,6 +24,14 @@ class ElementRegistry {
return false;
}
public static function destroy(Document $document) {
foreach (self::$_storage as $k => $v) {
if ($v->ownerDocument->isSameNode($document)) {
unset(self::$_storage[$k]);
}
}
}
public static function has(Element $element) {
foreach (self::$_storage as $v) {
if ($v->isSameNode($element)) {

View file

@ -7,7 +7,7 @@ declare(strict_types=1);
namespace MensBeam\HTML;
class ProcessingInstruction extends \DOMProcessingInstruction {
use C14N, Moonwalk;
use Moonwalk, Node;
public function __toString(): string {
# Append the literal string "<?" (U+003C LESS-THAN SIGN, U+003F QUESTION MARK),

View file

@ -22,4 +22,8 @@ class TemplateElement extends Element {
$frag->removeChild($this);
unset($frag);
}
public function __destruct() {
ElementMap::delete($this);
}
}

View file

@ -7,7 +7,7 @@ declare(strict_types=1);
namespace MensBeam\HTML;
class Text extends \DOMText {
use C14N, EscapeString, Moonwalk;
use EscapeString, Moonwalk, Node;
function __toString(): string {
# If the parent of current node is a style, script, xmp, iframe, noembed,

View file

@ -1,18 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
// Disables C14N in extended DOM classes
trait C14N {
public function C14N($exclusive = null, $with_comments = null, ?array $xpath = null, ?array $ns_prefixes = null): bool {
return false;
}
public function C14NFile($uri, $exclusive = null, $with_comments = null, ?array $xpath = null, ?array $ns_prefixes = null): bool {
return false;
}
}

191
lib/DOM/traits/Node.php Normal file
View file

@ -0,0 +1,191 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
// Extensions to PHP's DOM cannot inherit from an extended Node parent, so a
// trait is the next best thing...
trait Node {
protected function preInsertionValidity(\DOMNode $node, ?\DOMNode $child = null) {
// "parent" is $this
# 1. If parent is not a Document, DocumentFragment, or Element node,
# then throw a "HierarchyRequestError" DOMException.
if (!$this instanceof Document && !$this instanceof DocumentFragment && !$this instanceof Element) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
# 2. If node is a host-including inclusive ancestor of parent, then
# throw a "HierarchyRequestError" DOMException.
#
# An object A is a host-including inclusive ancestor of an object B, if
# either A is an inclusive ancestor of B, or if Bs root has a non-null
# host and A is a host-including inclusive ancestor of Bs roots host.
// DEVIATION: The baseline for this library is PHP 7.1, and without
// WeakReferences we cannot add a host property to DocumentFragment to
// check against.
if ($node instanceof Element && $node->isAncestorOf($this)) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
# 3. If child is non-null and its parent is not parent, then throw a
# "NotFoundError" DOMException.
if ($child !== null && !$child->parentNode->isSameNode($this)) {
throw new DOMException(DOMException::NOT_FOUND);
}
# 4. If node is not a DocumentFragment, DocumentType, Element,
# Text, ProcessingInstruction, or Comment node, then throw a
# "HierarchyRequestError" DOMException.
if (!$node instanceof DocumentFragment && !$node instanceof \DOMDocumentType && !$node instanceof Element && !$node instanceof Text && !$node instanceof ProcessingInstruction && !$node instanceof Comment) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
# 5. If either node is a Text node and parent is a document, or
# node is a doctype and parent is not a document, then throw a
# "HierarchyRequestError" DOMException.
if (($node instanceof Text && $this instanceof Document) || ($node instanceof \DOMDocumentType && !$this instanceof Document)) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
# 6. If parent is a document, and any of the statements below, switched
# on node, are true, then throw a "HierarchyRequestError" DOMException.
if ($this instanceof Document) {
# DocumentFragment node
# If node has more than one element child or has a Text node child.
# Otherwise, if node has one element child and either parent has an element child, child is a doctype, or child is non-null and a doctype is following child.
if ($node instanceof DocumentFragment) {
if ($node->childNodes->length > 1 || $node->firstChild instanceof Text) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
} else {
if ($node->firstChild instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
foreach ($this->childNodes as $c) {
if ($c instanceof Element) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
if ($child !== null) {
$n = $child;
while ($n = $n->nextSibling) {
if ($n instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
}
}
}
# element
# parent has an element child, child is a doctype, or child is non-null and a doctype is following child.
elseif ($node instanceof Element) {
if ($child instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
if ($child !== null) {
$n = $child;
while ($n = $n->nextSibling) {
if ($n instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
}
foreach ($this->childNodes as $c) {
if ($c instanceof Element) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
}
# doctype
# parent has a doctype child, child is non-null and an element
# is preceding child, or child is null and parent has an element
# child.
elseif ($node instanceof \DOMDocumentType) {
foreach ($this->childNodes as $c) {
if ($c instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
if ($child !== null) {
$n = $child;
while ($n = $n->prevSibling) {
if ($n instanceof Element) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
} else {
foreach ($this->childNodes as $c) {
if ($c instanceof Element) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
}
}
}
}
public function appendChild($node) {
$this->preInsertionValidity($node);
$result = parent::appendChild($node);
if ($result !== false && $result instanceof TemplateElement) {
ElementMap::set($result);
}
return $result;
}
// Disable C14N
public function C14N($exclusive = null, $with_comments = null, ?array $xpath = null, ?array $ns_prefixes = null): bool {
return false;
}
// Disable C14NFile
public function C14NFile($uri, $exclusive = null, $with_comments = null, ?array $xpath = null, ?array $ns_prefixes = null): bool {
return false;
}
public function insertBefore($node, $child = null) {
$this->preInsertionValidity($node, $child);
$result = parent::insertBefore($node, $child);
if ($result !== false) {
if ($result instanceof TemplateElement) {
ElementMap::set($result);
}
if ($child instanceof TemplateElement) {
ElementMap::delete($child);
}
}
return $result;
}
public function removeChild($child) {
$result = parent::removeChild($child);
if ($result !== false && $result instanceof TemplateElement) {
ElementMap::delete($child);
}
return $result;
}
public function replaceChild($node, $child) {
$result = parent::replaceChild($node, $child);
if ($result !== false) {
if ($result instanceof TemplateElement) {
ElementMap::set($child);
}
if ($child instanceof TemplateElement) {
ElementMap::delete($child);
}
}
return $result;
}
}