Browse Source

Changed ElementRegistry to ElementMap, destructors for ElementMap

split-manual
Dustin Wilson 3 years ago
parent
commit
3a0ffafc7a
  1. 59
      README.md
  2. 2
      lib/DOM/Comment.php
  3. 2
      lib/DOM/DOMException.php
  4. 64
      lib/DOM/Document.php
  5. 2
      lib/DOM/DocumentFragment.php
  6. 65
      lib/DOM/Element.php
  7. 12
      lib/DOM/ElementMap.php
  8. 2
      lib/DOM/ProcessingInstruction.php
  9. 4
      lib/DOM/TemplateElement.php
  10. 2
      lib/DOM/Text.php
  11. 18
      lib/DOM/traits/C14N.php
  12. 191
      lib/DOM/traits/Node.php

59
README.md

@ -45,61 +45,4 @@ This library and [masterminds/html5](https://packagist.org/packages/masterminds/
† With HTML namespace disabled. With HTML namespace enabled it does not finish in a reasonable time due to a PHP bug. † With HTML namespace disabled. With HTML namespace enabled it does not finish in a reasonable time due to a PHP bug.
‡ With parse errors suppressed. Reporting parse errors adds approximately 10% overhead. ‡ With parse errors suppressed. Reporting parse errors adds approximately 10% overhead.
## Document Object Model ##
This library works by parsing HTML strings into PHP's existing XML DOM. It, however, has to force the antiquated PHP DOM extension into working properly with modern HTML DOM by extending many of the node types. The documentation below follows PHP's doc style guide with the exception of inherited methods and properties not being listed. Therefore, only new constants, properties, and methods will be listed; in addition, extended methods which change outward behavior from their parent class will be listed.
### MensBeam\\HTML\\Document ###
```php
MensBeam\HTML\Document extends \DOMDocument {
/* Constants */
public const NO_QUIRKS_MODE = 0;
public const QUIRKS_MODE = 1;
public const LIMITED_QUIRKS_MODE = 2;
/* Properties */
public string|null $documentEncoding = null;
public int $quirksMode = 0;
/* Methods */
public load ( string $filename , null $options = null , string|null $encodingOrContentType = null ) : bool
public loadHTML ( string $source , null $options = null , string|null $encodingOrContentType = null ) : bool
public loadHTMLFile ( string $filename , null $options = null , string|null $encodingOrContentType = null ) : bool
public loadXML ( string $source , null $options = null ) : false
public save ( string $filename , null $options = null ) : int|false
public saveXML ( DOMNode|null $node = null , null $options = null ) : false
public validate ( ) : true
public xinclude ( null $options = null ) : false
}
```
#### Properties ####
<dl>
<dt>documentEncoding</dt>
<dd>Encoding of the document, as specified when parsing or when determining encoding type.</dd>
<dt>quirksMode</dt>
<dd>Used when parsing. Can be not in quirks mode, quirks mode, or limited quirks mode. See the `MensBeam\HTML\Document` constants to see the valid values.</dd>
</dl>
The following properties inherited from `\DOMDocument` have no effect on `Mensbeam\HTML\Document`:
* actualEncoding
* config
* encoding
* formatOutput
* preserveWhiteSpace
* recover
* resolveExternals
* standalone
* substituteEntities
* validateOnParse
* version
* xmlEncoding
* xmlStandalone
* xmlVersion

2
lib/DOM/Comment.php

@ -7,7 +7,7 @@ declare(strict_types=1);
namespace MensBeam\HTML; namespace MensBeam\HTML;
class Comment extends \DOMComment { class Comment extends \DOMComment {
use C14N, Moonwalk; use Moonwalk, Node;
public function __toString(): string { public function __toString(): string {
# Append the literal string "<!--" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION # Append the literal string "<!--" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION

2
lib/DOM/DOMException.php

@ -12,6 +12,7 @@ class DOMException extends \Exception {
const WRONG_DOCUMENT = 4; const WRONG_DOCUMENT = 4;
const INVALID_CHARACTER = 5; const INVALID_CHARACTER = 5;
const NO_MODIFICATION_ALLOWED = 7; const NO_MODIFICATION_ALLOWED = 7;
const NOT_FOUND = 8;
const SYNTAX_ERROR = 12; const SYNTAX_ERROR = 12;
const DOCUMENT_ELEMENT_DOCUMENTFRAG_EXPECTED = 100; const DOCUMENT_ELEMENT_DOCUMENTFRAG_EXPECTED = 100;
@ -23,6 +24,7 @@ class DOMException extends \Exception {
4 => 'Supplied node does not belong to this document', 4 => 'Supplied node does not belong to this document',
5 => 'Invalid character', 5 => 'Invalid character',
7 => 'Modification not allowed here', 7 => 'Modification not allowed here',
8 => 'Not found error',
12 => 'Syntax error', 12 => 'Syntax error',
100 => 'Document, Element, or DocumentFragment expected; found %s', 100 => 'Document, Element, or DocumentFragment expected; found %s',
101 => 'The "%s" argument should be a string; found %s', 101 => 'The "%s" argument should be a string; found %s',

64
lib/DOM/Document.php

@ -7,7 +7,7 @@ declare(strict_types=1);
namespace MensBeam\HTML; namespace MensBeam\HTML;
class Document extends \DOMDocument { class Document extends \DOMDocument {
use C14N, EscapeString, Serialize, Walk; use EscapeString, Node, Serialize, Walk;
// Quirks mode constants // Quirks mode constants
public const NO_QUIRKS_MODE = 0; public const NO_QUIRKS_MODE = 0;
@ -29,21 +29,6 @@ class Document extends \DOMDocument {
$this->registerNodeClass('DOMText', '\MensBeam\HTML\Text'); $this->registerNodeClass('DOMText', '\MensBeam\HTML\Text');
} }
public function appendChild($node) {
# If node is not a DocumentFragment, DocumentType, Element, Text,
# ProcessingInstruction, or Comment node then throw a "HierarchyRequestError"
# DOMException.
if (!$node instanceof DocumentFragment && !$node instanceof \DOMDocumentType && !$node instanceof Element &&!$node instanceof Text && !$node instanceof ProcessingInstruction && !$node instanceof Comment) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
$result = parent::appendChild($node);
if ($result !== false && $result instanceof TemplateElement) {
ElementRegistry::set($result);
}
return $result;
}
public function createAttribute($name) { public function createAttribute($name) {
return $this->createAttributeNS(null, $name); return $this->createAttributeNS(null, $name);
} }
@ -85,7 +70,7 @@ class Document extends \DOMDocument {
} else { } else {
$e = new TemplateElement($this, $qualifiedName, $value); $e = new TemplateElement($this, $qualifiedName, $value);
// Template elements need to have a reference kept in userland // Template elements need to have a reference kept in userland
ElementRegistry::set($e); ElementMap::set($e);
$e->content = $this->createDocumentFragment(); $e->content = $this->createDocumentFragment();
} }
@ -108,26 +93,6 @@ class Document extends \DOMDocument {
return false; return false;
} }
public function insertBefore($node, $child = null) {
# If node is not a DocumentFragment, DocumentType, Element, Text,
# ProcessingInstruction, or Comment node then throw a "HierarchyRequestError"
# DOMException.
if (!$node instanceof DocumentFragment && !$node instanceof \DOMDocumentType && !$node instanceof Element &&!$node instanceof Text && !$node instanceof ProcessingInstruction && !$node instanceof Comment) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
$result = parent::insertBefore($node, $child);
if ($result !== false) {
if ($result instanceof TemplateElement) {
ElementRegistry::set($result);
}
if ($child instanceof TemplateElement) {
ElementRegistry::delete($child);
}
}
return $result;
}
public function load($filename, $options = null, ?string $encodingOrContentType = null): bool { public function load($filename, $options = null, ?string $encodingOrContentType = null): bool {
$data = Parser::fetchFile($filename, $encodingOrContentType); $data = Parser::fetchFile($filename, $encodingOrContentType);
if (!$data) { if (!$data) {
@ -152,27 +117,6 @@ class Document extends \DOMDocument {
return false; return false;
} }
public function removeChild($child) {
$result = parent::removeChild($child);
if ($result !== false && $result instanceof TemplateElement) {
ElementRegistry::delete($child);
}
return $result;
}
public function replaceChild($node, $child) {
$result = parent::replaceChild($node, $child);
if ($result !== false) {
if ($result instanceof TemplateElement) {
ElementRegistry::set($child);
}
if ($child instanceof TemplateElement) {
ElementRegistry::delete($child);
}
}
return $result;
}
public function save($filename, $options = null) { public function save($filename, $options = null) {
return file_put_contents($filename, $this->serialize()); return file_put_contents($filename, $this->serialize());
} }
@ -203,6 +147,10 @@ class Document extends \DOMDocument {
return false; return false;
} }
public function __destruct() {
ElementMap::destroy($this);
}
public function __toString() { public function __toString() {
return $this->serialize(); return $this->serialize();
} }

2
lib/DOM/DocumentFragment.php

@ -7,7 +7,7 @@ declare(strict_types=1);
namespace MensBeam\HTML; namespace MensBeam\HTML;
class DocumentFragment extends \DOMDocumentFragment { class DocumentFragment extends \DOMDocumentFragment {
use C14N, Moonwalk, Serialize; use Moonwalk, Node, Serialize, Walk;
public function __toString() { public function __toString() {
return $this->serialize(); return $this->serialize();

65
lib/DOM/Element.php

@ -7,25 +7,10 @@ declare(strict_types=1);
namespace MensBeam\HTML; namespace MensBeam\HTML;
class Element extends \DOMElement { class Element extends \DOMElement {
use C14N, EscapeString, Moonwalk, Serialize, Walk; use EscapeString, Moonwalk, Node, Serialize, Walk;
protected $_classList; protected $_classList;
public function appendChild($node) {
# If node is not a DocumentFragment, DocumentType, Element, Text,
# ProcessingInstruction, or Comment node then throw a "HierarchyRequestError"
# DOMException.
if (!$node instanceof DocumentFragment && !$node instanceof \DOMDocumentType && !$node instanceof Element &&!$node instanceof Text && !$node instanceof ProcessingInstruction && !$node instanceof Comment) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
$result = parent::appendChild($node);
if ($result !== false && $result instanceof TemplateElement) {
ElementRegistry::set($result);
}
return $result;
}
public function getAttribute($name) { public function getAttribute($name) {
// Newer versions of the DOM spec have getAttribute return an empty string only // Newer versions of the DOM spec have getAttribute return an empty string only
// when the attribute exists and is empty, otherwise null. This fixes that. // when the attribute exists and is empty, otherwise null. This fixes that.
@ -48,45 +33,19 @@ class Element extends \DOMElement {
return $value; return $value;
} }
public function insertBefore($node, $child = null) { public function isAncestorOf(\DOMNode $node): bool {
# If node is not a DocumentFragment, DocumentType, Element, Text, # An inclusive ancestor is an object or one of its ancestors.
# ProcessingInstruction, or Comment node then throw a "HierarchyRequestError" #
# DOMException. # An object A is called an ancestor of an object B if and only if B is a
if (!$node instanceof DocumentFragment && !$node instanceof \DOMDocumentType && !$node instanceof Element &&!$node instanceof Text && !$node instanceof ProcessingInstruction && !$node instanceof Comment) { # descendant of A.
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR); // object A is $this, object B is $node
} $tree = $this->walk(function($n) use($node) {
if ($n->isSameNode($node)) {
$result = parent::insertBefore($node, $child); return true;
if ($result !== false) {
if ($result instanceof TemplateElement) {
ElementRegistry::set($result);
} }
if ($child instanceof TemplateElement) { });
ElementRegistry::delete($child);
}
}
return $result;
}
public function removeChild($child) {
$result = parent::removeChild($child);
if ($result !== false && $result instanceof TemplateElement) {
ElementRegistry::delete($child);
}
return $result;
}
public function replaceChild($node, $child) { return ($tree->current() !== null);
$result = parent::replaceChild($node, $child);
if ($result !== false) {
if ($result instanceof TemplateElement) {
ElementRegistry::set($child);
}
if ($child instanceof TemplateElement) {
ElementRegistry::delete($child);
}
}
return $result;
} }
public function setAttribute($name, $value) { public function setAttribute($name, $value) {

12
lib/DOM/ElementRegistry.php → lib/DOM/ElementMap.php

@ -9,8 +9,8 @@ namespace MensBeam\HTML;
// This is a write-only map of elements which need to be kept in memory; it // This is a write-only map of elements which need to be kept in memory; it
// exists because values of properties on derived DOM classes are lost unless at // exists because values of properties on derived DOM classes are lost unless at
// least one PHP reference is kept for the element somewhere in userspace. This // least one PHP reference is kept for the element somewhere in userspace. This
// is that somewhere. It is at present only used for TemplateElements. // is that somewhere. It is at present only used for template elements.
class ElementRegistry { class ElementMap {
public static $_storage = []; public static $_storage = [];
public static function delete(Element $element) { public static function delete(Element $element) {
@ -24,6 +24,14 @@ class ElementRegistry {
return false; return false;
} }
public static function destroy(Document $document) {
foreach (self::$_storage as $k => $v) {
if ($v->ownerDocument->isSameNode($document)) {
unset(self::$_storage[$k]);
}
}
}
public static function has(Element $element) { public static function has(Element $element) {
foreach (self::$_storage as $v) { foreach (self::$_storage as $v) {
if ($v->isSameNode($element)) { if ($v->isSameNode($element)) {

2
lib/DOM/ProcessingInstruction.php

@ -7,7 +7,7 @@ declare(strict_types=1);
namespace MensBeam\HTML; namespace MensBeam\HTML;
class ProcessingInstruction extends \DOMProcessingInstruction { class ProcessingInstruction extends \DOMProcessingInstruction {
use C14N, Moonwalk; use Moonwalk, Node;
public function __toString(): string { public function __toString(): string {
# Append the literal string "<?" (U+003C LESS-THAN SIGN, U+003F QUESTION MARK), # Append the literal string "<?" (U+003C LESS-THAN SIGN, U+003F QUESTION MARK),

4
lib/DOM/TemplateElement.php

@ -22,4 +22,8 @@ class TemplateElement extends Element {
$frag->removeChild($this); $frag->removeChild($this);
unset($frag); unset($frag);
} }
public function __destruct() {
ElementMap::delete($this);
}
} }

2
lib/DOM/Text.php

@ -7,7 +7,7 @@ declare(strict_types=1);
namespace MensBeam\HTML; namespace MensBeam\HTML;
class Text extends \DOMText { class Text extends \DOMText {
use C14N, EscapeString, Moonwalk; use EscapeString, Moonwalk, Node;
function __toString(): string { function __toString(): string {
# If the parent of current node is a style, script, xmp, iframe, noembed, # If the parent of current node is a style, script, xmp, iframe, noembed,

18
lib/DOM/traits/C14N.php

@ -1,18 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
// Disables C14N in extended DOM classes
trait C14N {
public function C14N($exclusive = null, $with_comments = null, ?array $xpath = null, ?array $ns_prefixes = null): bool {
return false;
}
public function C14NFile($uri, $exclusive = null, $with_comments = null, ?array $xpath = null, ?array $ns_prefixes = null): bool {
return false;
}
}

191
lib/DOM/traits/Node.php

@ -0,0 +1,191 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
// Extensions to PHP's DOM cannot inherit from an extended Node parent, so a
// trait is the next best thing...
trait Node {
protected function preInsertionValidity(\DOMNode $node, ?\DOMNode $child = null) {
// "parent" is $this
# 1. If parent is not a Document, DocumentFragment, or Element node,
# then throw a "HierarchyRequestError" DOMException.
if (!$this instanceof Document && !$this instanceof DocumentFragment && !$this instanceof Element) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
# 2. If node is a host-including inclusive ancestor of parent, then
# throw a "HierarchyRequestError" DOMException.
#
# An object A is a host-including inclusive ancestor of an object B, if
# either A is an inclusive ancestor of B, or if B’s root has a non-null
# host and A is a host-including inclusive ancestor of B’s root’s host.
// DEVIATION: The baseline for this library is PHP 7.1, and without
// WeakReferences we cannot add a host property to DocumentFragment to
// check against.
if ($node instanceof Element && $node->isAncestorOf($this)) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
# 3. If child is non-null and its parent is not parent, then throw a
# "NotFoundError" DOMException.
if ($child !== null && !$child->parentNode->isSameNode($this)) {
throw new DOMException(DOMException::NOT_FOUND);
}
# 4. If node is not a DocumentFragment, DocumentType, Element,
# Text, ProcessingInstruction, or Comment node, then throw a
# "HierarchyRequestError" DOMException.
if (!$node instanceof DocumentFragment && !$node instanceof \DOMDocumentType && !$node instanceof Element && !$node instanceof Text && !$node instanceof ProcessingInstruction && !$node instanceof Comment) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
# 5. If either node is a Text node and parent is a document, or
# node is a doctype and parent is not a document, then throw a
# "HierarchyRequestError" DOMException.
if (($node instanceof Text && $this instanceof Document) || ($node instanceof \DOMDocumentType && !$this instanceof Document)) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
# 6. If parent is a document, and any of the statements below, switched
# on node, are true, then throw a "HierarchyRequestError" DOMException.
if ($this instanceof Document) {
# DocumentFragment node
# If node has more than one element child or has a Text node child.
# Otherwise, if node has one element child and either parent has an element child, child is a doctype, or child is non-null and a doctype is following child.
if ($node instanceof DocumentFragment) {
if ($node->childNodes->length > 1 || $node->firstChild instanceof Text) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
} else {
if ($node->firstChild instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
foreach ($this->childNodes as $c) {
if ($c instanceof Element) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
if ($child !== null) {
$n = $child;
while ($n = $n->nextSibling) {
if ($n instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
}
}
}
# element
# parent has an element child, child is a doctype, or child is non-null and a doctype is following child.
elseif ($node instanceof Element) {
if ($child instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
if ($child !== null) {
$n = $child;
while ($n = $n->nextSibling) {
if ($n instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
}
foreach ($this->childNodes as $c) {
if ($c instanceof Element) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
}
# doctype
# parent has a doctype child, child is non-null and an element
# is preceding child, or child is null and parent has an element
# child.
elseif ($node instanceof \DOMDocumentType) {
foreach ($this->childNodes as $c) {
if ($c instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
if ($child !== null) {
$n = $child;
while ($n = $n->prevSibling) {
if ($n instanceof Element) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
} else {
foreach ($this->childNodes as $c) {
if ($c instanceof Element) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
}
}
}
}
public function appendChild($node) {
$this->preInsertionValidity($node);
$result = parent::appendChild($node);
if ($result !== false && $result instanceof TemplateElement) {
ElementMap::set($result);
}
return $result;
}
// Disable C14N
public function C14N($exclusive = null, $with_comments = null, ?array $xpath = null, ?array $ns_prefixes = null): bool {
return false;
}
// Disable C14NFile
public function C14NFile($uri, $exclusive = null, $with_comments = null, ?array $xpath = null, ?array $ns_prefixes = null): bool {
return false;
}
public function insertBefore($node, $child = null) {
$this->preInsertionValidity($node, $child);
$result = parent::insertBefore($node, $child);
if ($result !== false) {
if ($result instanceof TemplateElement) {
ElementMap::set($result);
}
if ($child instanceof TemplateElement) {
ElementMap::delete($child);
}
}
return $result;
}
public function removeChild($child) {
$result = parent::removeChild($child);
if ($result !== false && $result instanceof TemplateElement) {
ElementMap::delete($child);
}
return $result;
}
public function replaceChild($node, $child) {
$result = parent::replaceChild($node, $child);
if ($result !== false) {
if ($result instanceof TemplateElement) {
ElementMap::set($child);
}
if ($child instanceof TemplateElement) {
ElementMap::delete($child);
}
}
return $result;
}
}
Loading…
Cancel
Save