Browse Source

Starting to move all DOM actual serialization to Document::serialize

element-classes
Dustin Wilson 3 years ago
parent
commit
70f8769c93
  1. 2
      lib/DOM/AbstractDocument.php
  2. 10
      lib/DOM/Comment.php
  3. 365
      lib/DOM/Document.php
  4. 6
      lib/DOM/DocumentFragment.php
  5. 118
      lib/DOM/Element.php
  6. 10
      lib/DOM/ProcessingInstruction.php
  7. 16
      lib/DOM/Text.php
  8. 85
      lib/DOM/traits/Serialize.php
  9. 13
      lib/DOM/traits/ToString.php

2
lib/DOM/AbstractDocument.php

@ -8,5 +8,5 @@ namespace MensBeam\HTML;
// Exists so Document can extend a method gained from the ContainerNode trait.
abstract class AbstractDocument extends \DOMDocument {
use ContainerNode, EscapeString, Serialize, Walk;
use ContainerNode, EscapeString, Walk;
}

10
lib/DOM/Comment.php

@ -7,13 +7,5 @@ declare(strict_types=1);
namespace MensBeam\HTML;
class Comment extends \DOMComment {
use LeafNode, Moonwalk;
public function __toString(): string {
# Append the literal string "<!--" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION
# MARK, U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS), followed by the value of
# current node’s data IDL attribute, followed by the literal string "-->"
# (U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN).
return "<!--{$this->data}-->";
}
use LeafNode, Moonwalk, ToString;
}

365
lib/DOM/Document.php

@ -18,6 +18,12 @@ class Document extends AbstractDocument {
public $quirksMode = self::NO_QUIRKS_MODE;
protected $_body = null;
// List of elements that are treated as block elements when pretty printing
protected static $blockElements = [ 'address', 'article', 'aside', 'blockquote', 'body', 'details', 'dialog', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr', 'html', 'li', 'main', 'nav', 'ol', 'p', 'pre', 'section', 'script', 'source', 'style', 'table', 'template', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul' ];
// List of elements where content is ignored when pretty printing
protected static $ignoredContentElements = [ 'pre', 'title' ];
// List of elements which are self-closing; used when serializing
protected static $voidElements = [ 'area', 'base', 'basefont', 'bgsound', 'br', 'col', 'embed', 'frame', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' ];
public function __construct() {
@ -30,6 +36,7 @@ class Document extends AbstractDocument {
$this->registerNodeClass('DOMText', '\MensBeam\HTML\Text');
}
public function createAttribute($name) {
return $this->createAttributeNS(null, $name);
}
@ -127,13 +134,7 @@ class Document extends AbstractDocument {
}
public function saveHTML(\DOMNode $node = null): string {
if ($node === null) {
$node = $this;
} elseif (!$node->ownerDocument->isSameNode($this)) {
throw new DOMException(DOMException::WRONG_DOCUMENT);
}
return $node->serialize();
return $node->serialize($node);
}
public function saveHTMLFile($filename): int {
@ -144,87 +145,34 @@ class Document extends AbstractDocument {
return false;
}
public function validate(): bool {
return true;
}
public function xinclude($options = null): bool {
return false;
}
public function serialize(\DOMNode $node = null): string {
$node = $node ?? $this;
public function __get(string $prop) {
if ($prop === 'body') {
if ($this->documentElement === null || $this->documentElement->childNodes->length === 0) {
return null;
if ($node !== $this) {
if (!$node->ownerDocument->isSameNode($this)) {
throw new DOMException(DOMException::WRONG_DOCUMENT);
}
$body = null;
# The body element of a document is the first of the html element's children
# that is either a body element or a frameset element, or null if there is no
# such element.
$n = $this->documentElement->firstChild;
do {
if ($n instanceof Element && $n->namespaceURI === null && ($n->nodeName === 'body' || $n->nodeName === 'frameset')) {
$body = $n;
break;
}
} while ($n = $n->nextSibling);
if ($body !== null) {
// References are handled weirdly by PHP's DOM. Return a stored body element
// unless it is changed so operations (like classList) can be done without
// losing the reference.
if ($body !== $this->_body) {
$this->_body = $body;
}
return $this->_body;
// This implementation uses the specification's fragment serializing algorithm to
// serialize everything to eliminate duplicate code as the specification
// for innerHTML and outerHTML are nearly identical. If not a Document or a
// DocumentFragment clone the node in a fragment and serialize that.
if (!$node instanceof Document && !$node->instanceof DocumentFragment) {
$frag = $this->createDocumentFragment();
$frag->appendChild($node->cloneNode(true));
$node = $frag;
}
$this->_body = null;
return null;
}
}
public function __set(string $prop, $value) {
if ($prop === 'body') {
# On setting, the following algorithm must be run:
#
# 1. If the new value is not a body or frameset element, then throw a
# "HierarchyRequestError" DOMException.
if (!$value instanceof Element || $value->namespaceURI !== null) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
if ($value->nodeName !== 'body' && $value->nodeName !== 'frameset') {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
if ($this->_body !== null) {
# 2. Otherwise, if the new value is the same as the body element, return.
if ($value->isSameNode($this->_body)) {
return;
}
# 3. Otherwise, if the body element is not null, then replace the body element
# with the new value within the body element's parent and return.
$this->documentElement->replaceChild($value, $this->_body);
$this->_body = $value;
return;
}
return $this->serializeFragment($node);
}
# 4. Otherwise, if there is no document element, throw a "HierarchyRequestError"
# DOMException.
if ($this->documentElement === null) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
public function validate(): bool {
return true;
}
# 5. Otherwise, the body element is null, but there's a document element. Append
# the new value to the document element.
$this->documentElement->appendChild($value);
$this->_body = $value;
}
public function xinclude($options = null): bool {
return false;
}
@ -314,11 +262,268 @@ class Document extends AbstractDocument {
}
}
protected function serializeFragment(\DOMNode $node): string {
# 13.3. Serializing HTML fragments
#
# 1. If the node serializes as void, then return the empty string.
if (in_array($node->nodeName, self::$voidElements)) {
return '';
}
# 2. Let s be a string, and initialize it to the empty string.
$s = '';
# 3. If the node is a template element, then let the node instead be the
# template element’s template contents (a DocumentFragment node).
if ($node instanceof TemplateElement) {
$node = $node->content;
}
$nodesLength = $node->childNodes->length;
if ($nodesLength > 0) {
// If the provided node is a document node and the first element in
// the tree is a document type then print the document type. There's
// no sense in checking for this on every single element in the tree.
// If the document type is present it will always be the first node
// because of how PHP's XML DOM works.
$start = 0;
if ($node instanceof Document && $node->childNodes->item(0)->nodeType === XML_DOCUMENT_TYPE_NODE) {
# Append the literal string "<!DOCTYPE" (U+003C LESS-THAN SIGN, U+0021
# EXCLAMATION MARK, U+0044 LATIN CAPITAL LETTER D, U+004F LATIN CAPITAL LETTER
# O, U+0043 LATIN CAPITAL LETTER C, U+0054 LATIN CAPITAL LETTER T, U+0059
# LATIN CAPITAL LETTER Y, U+0050 LATIN CAPITAL LETTER P, U+0045 LATIN CAPITAL
# LETTER E), followed by a space (U+0020 SPACE), followed by the value of
# current node's name IDL attribute, followed by the literal string ">" (U+003E
# GREATER-THAN SIGN).
// DEVIATION: The name is trimmed because PHP's DOM does not
// accept the empty string as a DOCTYPE name
$name = trim($node->childNodes->item(0)->name, ' ');
$s .= "<!DOCTYPE $name>";
$start++;
}
# 4. For each child node of the node, in tree order, run the following steps:
for ($i = $start; $i < $nodesLength; $i++) {
# 1. Let current node be the child node being processed.
$currentNode = $node->childNodes->item($i);
# 2. Append the appropriate string from the following list to s:
# If current node is an Element
if ($node instanceof Element) {
# If current node is an element in the HTML namespace, the MathML namespace, or
# the SVG namespace, then let tagname be current node's local name. Otherwise,
# let tagname be current node's qualified name.
$tagName = ($currentNode->namespaceURI === null || $currentNode->namespaceURI === Parser::MATHML_NAMESPACE || $currentNode->namespaceURI === Parser::SVG_NAMESPACE) ? $currentNode->localName : $currentNode->nodeName;
// Since tag names can contain characters that are invalid in PHP's XML DOM
// uncoerce the name when printing if necessary.
if (strpos($tagName, 'U') !== false) {
$tagName = $currentNode->uncoerceName($tagName);
}
# Append a U+003C LESS-THAN SIGN character (<), followed by tagname.
$s = "<$tagName";
# If current node's is value is not null, and the element does not have an is
# attribute in its attribute list, then append the string " is="", followed by
# current node's is value escaped as described below in attribute mode, followed
# by a U+0022 QUOTATION MARK character (").
// DEVIATION: There is no scripting support in this implementation.
# For each attribute that the element has, append a U+0020 SPACE character,
# the attribute’s serialized name as described below, a U+003D EQUALS SIGN
# character (=), a U+0022 QUOTATION MARK character ("), the attribute’s value,
# escaped as described below in attribute mode, and a second U+0022 QUOTATION
# MARK character (").
for ($j = 0; $j < $currentNode->attributes->length; $j++) {
$attr = $currentNode->attributes->item($j);
# An attribute’s serialized name for the purposes of the previous paragraph
# must be determined as follows:
switch ($attr->namespaceURI) {
# If the attribute has no namespace
case null:
# The attribute’s serialized name is the attribute’s local name.
$name = $attr->localName;
break;
# If the attribute is in the XML namespace
case Parser::XML_NAMESPACE:
# The attribute’s serialized name is the string "xml:" followed by the
# attribute’s local name.
$name = 'xml:' . $attr->localName;
break;
# If the attribute is in the XMLNS namespace...
case Parser::XMLNS_NAMESPACE:
# ...and the attribute’s local name is xmlns
if ($attr->localName === 'xmlns') {
# The attribute’s serialized name is the string "xmlns".
$name = 'xmlns';
}
# ... and the attribute’s local name is not xmlns
else {
# The attribute’s serialized name is the string "xmlns:" followed by the
# attribute’s local name.
$name = 'xmlns:' . $attr->localName;
}
break;
# If the attribute is in the XLink namespace
case Parser::XLINK_NAMESPACE:
# The attribute’s serialized name is the string "xlink:" followed by the
# attribute’s local name.
$name = 'xlink:' . $attr->localName;
break;
# If the attribute is in some other namespace
default:
# The attribute’s serialized name is the attribute’s qualified name.
$name = $attr->nodeName;
}
// undo any name mangling
if (strpos($name, 'U') !== false) {
$name = $currentNode->uncoerceName($name);
}
$value = $currentNode->escapeString($attr->value, true);
$s .= " $name=\"$value\"";
}
# While the exact order of attributes is UA-defined, and may depend on factors
# such as the order that the attributes were given in the original markup, the
# sort order must be stable, such that consecutive invocations of this
# algorithm serialize an element’s attributes in the same order.
// Okay.
# Append a U+003E GREATER-THAN SIGN character (>).
$s .= '>';
# If current node serializes as void, then continue on to the next child node at
# this point.
if (in_array($currentNode->nodeName, self::$voidElements)) {
continue;
}
# Append the value of running the HTML fragment serialization algorithm on the
# current node element (thus recursing into this algorithm for that element),
# followed by a U+003C LESS-THAN SIGN character (<), a U+002F SOLIDUS character (/),
# tagname again, and finally a U+003E GREATER-THAN SIGN character (>).
$s .= $this->serializeFragment($currentNode);
$s .= "</$tagName>";
}
# If current node is a Text node
elseif ($node instanceof Text) {
# If the parent of current node is a style, script, xmp, iframe, noembed,
# noframes, or plaintext element, or if the parent of current node is a noscript
# element and scripting is enabled for the node, then append the value of
# current node’s data IDL attribute literally.
// DEVIATION: No scripting, so <noscript> is not included
if ($this->parentNode->namespaceURI === null && in_array($this->parentNode->nodeName, [ 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'plaintext' ])) {
$s .= $this->data;
}
# Otherwise, append the value of current node’s data IDL attribute, escaped as
# described below.
else {
$s .= $this->escapeString($this->data);
}
}
# If current node is a Comment
elseif ($node instanceof Comment) {
# Append the literal string "<!--" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION
# MARK, U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS), followed by the value of
# current node’s data IDL attribute, followed by the literal string "-->"
# (U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN).
$s .= "<!--{$this->data}-->";
}
# If current node is a ProcessingInstruction
elseif ($node instanceof ProcessingInstruction) {
# Append the literal string "<?" (U+003C LESS-THAN SIGN, U+003F QUESTION MARK),
# followed by the value of current node’s target IDL attribute, followed by a
# single U+0020 SPACE character, followed by the value of current node’s data
# IDL attribute, followed by a single U+003E GREATER-THAN SIGN character (>).
$s .= "<?{$this->target} {$this->data}>";
}
}
}
# 5. Return s.
return $s;
}
public function __destruct() {
ElementMap::destroy($this);
}
public function __get(string $prop) {
if ($prop === 'body') {
if ($this->documentElement === null || $this->documentElement->childNodes->length === 0) {
return null;
}
$body = null;
# The body element of a document is the first of the html element's children
# that is either a body element or a frameset element, or null if there is no
# such element.
$n = $this->documentElement->firstChild;
do {
if ($n instanceof Element && $n->namespaceURI === null && ($n->nodeName === 'body' || $n->nodeName === 'frameset')) {
$body = $n;
break;
}
} while ($n = $n->nextSibling);
if ($body !== null) {
// References are handled weirdly by PHP's DOM. Return a stored body element
// unless it is changed so operations (like classList) can be done without
// losing the reference.
if ($body !== $this->_body) {
$this->_body = $body;
}
return $this->_body;
}
$this->_body = null;
return null;
}
}
public function __set(string $prop, $value) {
if ($prop === 'body') {
# On setting, the following algorithm must be run:
#
# 1. If the new value is not a body or frameset element, then throw a
# "HierarchyRequestError" DOMException.
if (!$value instanceof Element || $value->namespaceURI !== null) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
if ($value->nodeName !== 'body' && $value->nodeName !== 'frameset') {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
if ($this->_body !== null) {
# 2. Otherwise, if the new value is the same as the body element, return.
if ($value->isSameNode($this->_body)) {
return;
}
# 3. Otherwise, if the body element is not null, then replace the body element
# with the new value within the body element's parent and return.
$this->documentElement->replaceChild($value, $this->_body);
$this->_body = $value;
return;
}
# 4. Otherwise, if there is no document element, throw a "HierarchyRequestError"
# DOMException.
if ($this->documentElement === null) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
# 5. Otherwise, the body element is null, but there's a document element. Append
# the new value to the document element.
$this->documentElement->appendChild($value);
$this->_body = $value;
}
}
public function __toString() {
return $this->serialize();
}

6
lib/DOM/DocumentFragment.php

@ -7,9 +7,5 @@ declare(strict_types=1);
namespace MensBeam\HTML;
class DocumentFragment extends \DOMDocumentFragment {
use ContainerNode, Moonwalk, Serialize, Walk;
public function __toString() {
return $this->serialize();
}
use ContainerNode, Moonwalk, ToString, Walk;
}

118
lib/DOM/Element.php

@ -7,7 +7,7 @@ declare(strict_types=1);
namespace MensBeam\HTML;
class Element extends \DOMElement {
use ContainerNode, EscapeString, Moonwalk, Serialize, Walk;
use ContainerNode, EscapeString, Moonwalk, ToString, Walk;
protected $_classList;
@ -142,6 +142,11 @@ class Element extends \DOMElement {
return $result;
}
public function serialize(): string {
return $this->ownerDocument->serialize($this);
}
public function __get(string $prop) {
switch ($prop) {
case 'classList':
@ -163,7 +168,7 @@ class Element extends \DOMElement {
# might throw an exception instead of returning a string).
// DEVIATION: Parsing of XML documents will not be handled by this
// implementation, so there's no need for the well-formed flag.
case 'innerHTML':
case 'innerHTML':
return $this->serialize($this);
### DOM Parsing Specification ###
# 2.4 Extensions to the Element interface
@ -178,9 +183,9 @@ class Element extends \DOMElement {
// OPTIMIZATION: When following the instructions above the fragment serializing
// algorithm (Element::serialize) would invoke Element::__toString, so just
// doing that instead of multiple function calls.
case 'outerHTML':
case 'outerHTML':
return $this->__toString();
default:
default:
return null;
}
}
@ -284,109 +289,4 @@ class Element extends \DOMElement {
break;
}
}
public function __toString(): string {
# If current node is an element in the HTML namespace, the MathML namespace,
# or the SVG namespace, then let tagname be current node’s local name.
# Otherwise, let tagname be current node’s qualified name.
if ($this->namespaceURI === null || $this->namespaceURI === Parser::MATHML_NAMESPACE || $this->namespaceURI === Parser::SVG_NAMESPACE) {
$tagName = $this->localName;
} else {
$tagName = $this->nodeName;
}
// Since tag names can contain characters that are invalid in PHP's XML DOM
// uncoerce the name when printing if necessary.
if (strpos($tagName, 'U') !== false) {
$tagName = $this->uncoerceName($tagName);
}
# Append a U+003C LESS-THAN SIGN character (<), followed by tagname.
$s = "<$tagName";
# If current node's is value is not null, and the element does not have an is
# attribute in its attribute list, then append the string " is="", followed by
# current node's is value escaped as described below in attribute mode, followed
# by a U+0022 QUOTATION MARK character (").
// DEVIATION: There is no scripting support in this implementation.
# For each attribute that the element has, append a U+0020 SPACE character,
# the attribute’s serialized name as described below, a U+003D EQUALS SIGN
# character (=), a U+0022 QUOTATION MARK character ("), the attribute’s value,
# escaped as described below in attribute mode, and a second U+0022 QUOTATION
# MARK character (").
for ($j = 0; $j < $this->attributes->length; $j++) {
$attr = $this->attributes->item($j);
# An attribute’s serialized name for the purposes of the previous paragraph
# must be determined as follows:
switch ($attr->namespaceURI) {
# If the attribute has no namespace
case null:
# The attribute’s serialized name is the attribute’s local name.
$name = $attr->localName;
break;
# If the attribute is in the XML namespace
case Parser::XML_NAMESPACE:
# The attribute’s serialized name is the string "xml:" followed by the
# attribute’s local name.
$name = 'xml:' . $attr->localName;
break;
# If the attribute is in the XMLNS namespace...
case Parser::XMLNS_NAMESPACE:
# ...and the attribute’s local name is xmlns
if ($attr->localName === 'xmlns') {
# The attribute’s serialized name is the string "xmlns".
$name = 'xmlns';
}
# ... and the attribute’s local name is not xmlns
else {
# The attribute’s serialized name is the string "xmlns:" followed by the
# attribute’s local name.
$name = 'xmlns:' . $attr->localName;
}
break;
# If the attribute is in the XLink namespace
case Parser::XLINK_NAMESPACE:
# The attribute’s serialized name is the string "xlink:" followed by the
# attribute’s local name.
$name = 'xlink:' . $attr->localName;
break;
# If the attribute is in some other namespace
default:
# The attribute’s serialized name is the attribute’s qualified name.
$name = $attr->nodeName;
}
// undo any name mangling
if (strpos($name, 'U') !== false) {
$name = $this->uncoerceName($name);
}
$value = $this->escapeString($attr->value, true);
$s .= " $name=\"$value\"";
}
# While the exact order of attributes is UA-defined, and may depend on factors
# such as the order that the attributes were given in the original markup, the
# sort order must be stable, such that consecutive invocations of this
# algorithm serialize an element’s attributes in the same order.
// Okay.
# Append a U+003E GREATER-THAN SIGN character (>).
$s .= '>';
# If current node serializes as void, then continue on to the next child node at
# this point.
if ($this->serializesAsVoid()) {
return $s;
}
# Append the value of running the HTML fragment serialization algorithm on the
# current node element (thus recursing into this algorithm for that element),
# followed by a U+003C LESS-THAN SIGN character (<), a U+002F SOLIDUS character (/),
# tagname again, and finally a U+003E GREATER-THAN SIGN character (>).
$s .= $this->serialize($this);
$s .= "</$tagName>";
return $s;
}
}

10
lib/DOM/ProcessingInstruction.php

@ -7,13 +7,5 @@ declare(strict_types=1);
namespace MensBeam\HTML;
class ProcessingInstruction extends \DOMProcessingInstruction {
use LeafNode, Moonwalk;
public function __toString(): string {
# Append the literal string "<?" (U+003C LESS-THAN SIGN, U+003F QUESTION MARK),
# followed by the value of current node’s target IDL attribute, followed by a
# single U+0020 SPACE character, followed by the value of current node’s data
# IDL attribute, followed by a single U+003E GREATER-THAN SIGN character (>).
return "<?{$this->target} {$this->data}>";
}
use LeafNode, Moonwalk, ToString;
}

16
lib/DOM/Text.php

@ -7,19 +7,5 @@ declare(strict_types=1);
namespace MensBeam\HTML;
class Text extends \DOMText {
use EscapeString, LeafNode, Moonwalk;
function __toString(): string {
# If the parent of current node is a style, script, xmp, iframe, noembed,
# noframes, or plaintext element, or if the parent of current node is a noscript
# element and scripting is enabled for the node, then append the value of
# current node’s data IDL attribute literally.
// DEVIATION: No scripting, so <noscript> is not included
if ($this->parentNode->namespaceURI === null && in_array($this->parentNode->nodeName, ['style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'plaintext'])) {
return $this->data;
}
# Otherwise, append the value of current node’s data IDL attribute, escaped as
# described below.
return $this->escapeString($this->data);
}
use EscapeString, LeafNode, Moonwalk, ToString;
}

85
lib/DOM/traits/Serialize.php

@ -1,85 +0,0 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
trait Serialize {
// List of elements that are treated as block elements when pretty printing
protected static $blockElements = [ 'address', 'article', 'aside', 'blockquote', 'body', 'details', 'dialog', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr', 'html', 'li', 'main', 'nav', 'ol', 'p', 'pre', 'section', 'script', 'source', 'style', 'table', 'template', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul' ];
// List of elements where content is ignored when pretty printing
protected static $ignoredContentElements = [ 'pre', 'title' ];
// List of elements which are self-closing
protected static $voidElements = [ 'area', 'base', 'basefont', 'bgsound', 'br', 'col', 'embed', 'frame', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' ];
protected function serializesAsVoid(): bool {
$name = $this->nodeName;
if (in_array($name, self::$voidElements)) {
return true;
}
return false;
}
protected function serialize(\DOMNode $node = null): string {
$node = $node ?? $this;
if (!$node instanceof Element && !$node instanceof Document && !$node instanceof DocumentFragment) {
throw new DOMException(DOMException::DOCUMENT_ELEMENT_DOCUMENTFRAG_EXPECTED, gettype($node));
}
# 13.3. Serializing HTML fragments
#
# 1. If the node serializes as void, then return the empty string.
if ($this->serializesAsVoid()) {
return '';
}
# 2. Let s be a string, and initialize it to the empty string.
$s = '';
# 3. If the node is a template element, then let the node instead be the
# template element’s template contents (a DocumentFragment node).
if ($node instanceof TemplateElement) {
$node = $node->content;
}
$nodesLength = $node->childNodes->length;
if ($nodesLength > 0) {
// If the provided node is a document node and the first element in
// the tree is a document type then print the document type. There's
// no sense in checking for this on every single element in the tree.
// If the document type is present it will always be the first node
// because of how PHP's XML DOM works.
$start = 0;
if ($node->nodeType === XML_DOCUMENT_NODE && $node->childNodes->item(0)->nodeType === XML_DOCUMENT_TYPE_NODE) {
# Append the literal string "<!DOCTYPE" (U+003C LESS-THAN SIGN, U+0021
# EXCLAMATION MARK, U+0044 LATIN CAPITAL LETTER D, U+004F LATIN CAPITAL LETTER
# O, U+0043 LATIN CAPITAL LETTER C, U+0054 LATIN CAPITAL LETTER T, U+0059
# LATIN CAPITAL LETTER Y, U+0050 LATIN CAPITAL LETTER P, U+0045 LATIN CAPITAL
# LETTER E), followed by a space (U+0020 SPACE), followed by the value of
# current node's name IDL attribute, followed by the literal string ">" (U+003E
# GREATER-THAN SIGN).
// DEVIATION: The name is trimmed because PHP's DOM does not
// accept the empty string as a DOCTYPE name
$name = trim($node->childNodes->item(0)->name, " ");
$s .= "<!DOCTYPE $name>";
$start = 1;
}
# 4. For each child node of the node, in tree order, run the following steps:
for ($i = $start; $i < $nodesLength; $i++) {
# 1. Let current node be the child node being processed.
# 2. Append the appropriate string from the following list to s:
$s .= $node->childNodes->item($i);
}
}
# 5. Return s.
return $s;
}
}

13
lib/DOM/traits/ToString.php

@ -0,0 +1,13 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
trait ToString {
public function __toString(): string {
return $this->ownerDocument->serialize($this);
}
}
Loading…
Cancel
Save