Starting to move all DOM actual serialization to Document::serialize
This commit is contained in:
parent
8be1364737
commit
70f8769c93
9 changed files with 314 additions and 315 deletions
|
@ -8,5 +8,5 @@ namespace MensBeam\HTML;
|
|||
|
||||
// Exists so Document can extend a method gained from the ContainerNode trait.
|
||||
abstract class AbstractDocument extends \DOMDocument {
|
||||
use ContainerNode, EscapeString, Serialize, Walk;
|
||||
use ContainerNode, EscapeString, Walk;
|
||||
}
|
||||
|
|
|
@ -7,13 +7,5 @@ declare(strict_types=1);
|
|||
namespace MensBeam\HTML;
|
||||
|
||||
class Comment extends \DOMComment {
|
||||
use LeafNode, Moonwalk;
|
||||
|
||||
public function __toString(): string {
|
||||
# Append the literal string "<!--" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION
|
||||
# MARK, U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS), followed by the value of
|
||||
# current node’s data IDL attribute, followed by the literal string "-->"
|
||||
# (U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN).
|
||||
return "<!--{$this->data}-->";
|
||||
}
|
||||
use LeafNode, Moonwalk, ToString;
|
||||
}
|
||||
|
|
|
@ -18,6 +18,12 @@ class Document extends AbstractDocument {
|
|||
public $quirksMode = self::NO_QUIRKS_MODE;
|
||||
|
||||
protected $_body = null;
|
||||
// List of elements that are treated as block elements when pretty printing
|
||||
protected static $blockElements = [ 'address', 'article', 'aside', 'blockquote', 'body', 'details', 'dialog', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr', 'html', 'li', 'main', 'nav', 'ol', 'p', 'pre', 'section', 'script', 'source', 'style', 'table', 'template', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul' ];
|
||||
// List of elements where content is ignored when pretty printing
|
||||
protected static $ignoredContentElements = [ 'pre', 'title' ];
|
||||
// List of elements which are self-closing; used when serializing
|
||||
protected static $voidElements = [ 'area', 'base', 'basefont', 'bgsound', 'br', 'col', 'embed', 'frame', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' ];
|
||||
|
||||
|
||||
public function __construct() {
|
||||
|
@ -30,6 +36,7 @@ class Document extends AbstractDocument {
|
|||
$this->registerNodeClass('DOMText', '\MensBeam\HTML\Text');
|
||||
}
|
||||
|
||||
|
||||
public function createAttribute($name) {
|
||||
return $this->createAttributeNS(null, $name);
|
||||
}
|
||||
|
@ -127,13 +134,7 @@ class Document extends AbstractDocument {
|
|||
}
|
||||
|
||||
public function saveHTML(\DOMNode $node = null): string {
|
||||
if ($node === null) {
|
||||
$node = $this;
|
||||
} elseif (!$node->ownerDocument->isSameNode($this)) {
|
||||
throw new DOMException(DOMException::WRONG_DOCUMENT);
|
||||
}
|
||||
|
||||
return $node->serialize();
|
||||
return $node->serialize($node);
|
||||
}
|
||||
|
||||
public function saveHTMLFile($filename): int {
|
||||
|
@ -144,6 +145,28 @@ class Document extends AbstractDocument {
|
|||
return false;
|
||||
}
|
||||
|
||||
public function serialize(\DOMNode $node = null): string {
|
||||
$node = $node ?? $this;
|
||||
|
||||
if ($node !== $this) {
|
||||
if (!$node->ownerDocument->isSameNode($this)) {
|
||||
throw new DOMException(DOMException::WRONG_DOCUMENT);
|
||||
}
|
||||
|
||||
// This implementation uses the specification's fragment serializing algorithm to
|
||||
// serialize everything to eliminate duplicate code as the specification
|
||||
// for innerHTML and outerHTML are nearly identical. If not a Document or a
|
||||
// DocumentFragment clone the node in a fragment and serialize that.
|
||||
if (!$node instanceof Document && !$node->instanceof DocumentFragment) {
|
||||
$frag = $this->createDocumentFragment();
|
||||
$frag->appendChild($node->cloneNode(true));
|
||||
$node = $frag;
|
||||
}
|
||||
}
|
||||
|
||||
return $this->serializeFragment($node);
|
||||
}
|
||||
|
||||
public function validate(): bool {
|
||||
return true;
|
||||
}
|
||||
|
@ -153,81 +176,6 @@ class Document extends AbstractDocument {
|
|||
}
|
||||
|
||||
|
||||
public function __get(string $prop) {
|
||||
if ($prop === 'body') {
|
||||
if ($this->documentElement === null || $this->documentElement->childNodes->length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$body = null;
|
||||
|
||||
# The body element of a document is the first of the html element's children
|
||||
# that is either a body element or a frameset element, or null if there is no
|
||||
# such element.
|
||||
$n = $this->documentElement->firstChild;
|
||||
do {
|
||||
if ($n instanceof Element && $n->namespaceURI === null && ($n->nodeName === 'body' || $n->nodeName === 'frameset')) {
|
||||
$body = $n;
|
||||
break;
|
||||
}
|
||||
} while ($n = $n->nextSibling);
|
||||
|
||||
if ($body !== null) {
|
||||
// References are handled weirdly by PHP's DOM. Return a stored body element
|
||||
// unless it is changed so operations (like classList) can be done without
|
||||
// losing the reference.
|
||||
if ($body !== $this->_body) {
|
||||
$this->_body = $body;
|
||||
}
|
||||
|
||||
return $this->_body;
|
||||
}
|
||||
|
||||
$this->_body = null;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public function __set(string $prop, $value) {
|
||||
if ($prop === 'body') {
|
||||
# On setting, the following algorithm must be run:
|
||||
#
|
||||
# 1. If the new value is not a body or frameset element, then throw a
|
||||
# "HierarchyRequestError" DOMException.
|
||||
if (!$value instanceof Element || $value->namespaceURI !== null) {
|
||||
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
|
||||
}
|
||||
if ($value->nodeName !== 'body' && $value->nodeName !== 'frameset') {
|
||||
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
|
||||
}
|
||||
|
||||
if ($this->_body !== null) {
|
||||
# 2. Otherwise, if the new value is the same as the body element, return.
|
||||
if ($value->isSameNode($this->_body)) {
|
||||
return;
|
||||
}
|
||||
|
||||
# 3. Otherwise, if the body element is not null, then replace the body element
|
||||
# with the new value within the body element's parent and return.
|
||||
$this->documentElement->replaceChild($value, $this->_body);
|
||||
$this->_body = $value;
|
||||
return;
|
||||
}
|
||||
|
||||
# 4. Otherwise, if there is no document element, throw a "HierarchyRequestError"
|
||||
# DOMException.
|
||||
if ($this->documentElement === null) {
|
||||
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
|
||||
}
|
||||
|
||||
# 5. Otherwise, the body element is null, but there's a document element. Append
|
||||
# the new value to the document element.
|
||||
$this->documentElement->appendChild($value);
|
||||
$this->_body = $value;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected function preInsertionValidity(\DOMNode $node, ?\DOMNode $child = null) {
|
||||
parent::preInsertionValidity($node, $child);
|
||||
|
||||
|
@ -314,11 +262,268 @@ class Document extends AbstractDocument {
|
|||
}
|
||||
}
|
||||
|
||||
protected function serializeFragment(\DOMNode $node): string {
|
||||
# 13.3. Serializing HTML fragments
|
||||
#
|
||||
# 1. If the node serializes as void, then return the empty string.
|
||||
if (in_array($node->nodeName, self::$voidElements)) {
|
||||
return '';
|
||||
}
|
||||
|
||||
# 2. Let s be a string, and initialize it to the empty string.
|
||||
$s = '';
|
||||
|
||||
# 3. If the node is a template element, then let the node instead be the
|
||||
# template element’s template contents (a DocumentFragment node).
|
||||
if ($node instanceof TemplateElement) {
|
||||
$node = $node->content;
|
||||
}
|
||||
|
||||
$nodesLength = $node->childNodes->length;
|
||||
if ($nodesLength > 0) {
|
||||
// If the provided node is a document node and the first element in
|
||||
// the tree is a document type then print the document type. There's
|
||||
// no sense in checking for this on every single element in the tree.
|
||||
// If the document type is present it will always be the first node
|
||||
// because of how PHP's XML DOM works.
|
||||
$start = 0;
|
||||
if ($node instanceof Document && $node->childNodes->item(0)->nodeType === XML_DOCUMENT_TYPE_NODE) {
|
||||
# Append the literal string "<!DOCTYPE" (U+003C LESS-THAN SIGN, U+0021
|
||||
# EXCLAMATION MARK, U+0044 LATIN CAPITAL LETTER D, U+004F LATIN CAPITAL LETTER
|
||||
# O, U+0043 LATIN CAPITAL LETTER C, U+0054 LATIN CAPITAL LETTER T, U+0059
|
||||
# LATIN CAPITAL LETTER Y, U+0050 LATIN CAPITAL LETTER P, U+0045 LATIN CAPITAL
|
||||
# LETTER E), followed by a space (U+0020 SPACE), followed by the value of
|
||||
# current node's name IDL attribute, followed by the literal string ">" (U+003E
|
||||
# GREATER-THAN SIGN).
|
||||
// DEVIATION: The name is trimmed because PHP's DOM does not
|
||||
// accept the empty string as a DOCTYPE name
|
||||
$name = trim($node->childNodes->item(0)->name, ' ');
|
||||
$s .= "<!DOCTYPE $name>";
|
||||
$start++;
|
||||
}
|
||||
|
||||
# 4. For each child node of the node, in tree order, run the following steps:
|
||||
for ($i = $start; $i < $nodesLength; $i++) {
|
||||
# 1. Let current node be the child node being processed.
|
||||
$currentNode = $node->childNodes->item($i);
|
||||
# 2. Append the appropriate string from the following list to s:
|
||||
# If current node is an Element
|
||||
if ($node instanceof Element) {
|
||||
# If current node is an element in the HTML namespace, the MathML namespace, or
|
||||
# the SVG namespace, then let tagname be current node's local name. Otherwise,
|
||||
# let tagname be current node's qualified name.
|
||||
$tagName = ($currentNode->namespaceURI === null || $currentNode->namespaceURI === Parser::MATHML_NAMESPACE || $currentNode->namespaceURI === Parser::SVG_NAMESPACE) ? $currentNode->localName : $currentNode->nodeName;
|
||||
|
||||
// Since tag names can contain characters that are invalid in PHP's XML DOM
|
||||
// uncoerce the name when printing if necessary.
|
||||
if (strpos($tagName, 'U') !== false) {
|
||||
$tagName = $currentNode->uncoerceName($tagName);
|
||||
}
|
||||
|
||||
# Append a U+003C LESS-THAN SIGN character (<), followed by tagname.
|
||||
$s = "<$tagName";
|
||||
|
||||
# If current node's is value is not null, and the element does not have an is
|
||||
# attribute in its attribute list, then append the string " is="", followed by
|
||||
# current node's is value escaped as described below in attribute mode, followed
|
||||
# by a U+0022 QUOTATION MARK character (").
|
||||
// DEVIATION: There is no scripting support in this implementation.
|
||||
|
||||
# For each attribute that the element has, append a U+0020 SPACE character,
|
||||
# the attribute’s serialized name as described below, a U+003D EQUALS SIGN
|
||||
# character (=), a U+0022 QUOTATION MARK character ("), the attribute’s value,
|
||||
# escaped as described below in attribute mode, and a second U+0022 QUOTATION
|
||||
# MARK character (").
|
||||
for ($j = 0; $j < $currentNode->attributes->length; $j++) {
|
||||
$attr = $currentNode->attributes->item($j);
|
||||
|
||||
# An attribute’s serialized name for the purposes of the previous paragraph
|
||||
# must be determined as follows:
|
||||
switch ($attr->namespaceURI) {
|
||||
# If the attribute has no namespace
|
||||
case null:
|
||||
# The attribute’s serialized name is the attribute’s local name.
|
||||
$name = $attr->localName;
|
||||
break;
|
||||
# If the attribute is in the XML namespace
|
||||
case Parser::XML_NAMESPACE:
|
||||
# The attribute’s serialized name is the string "xml:" followed by the
|
||||
# attribute’s local name.
|
||||
$name = 'xml:' . $attr->localName;
|
||||
break;
|
||||
# If the attribute is in the XMLNS namespace...
|
||||
case Parser::XMLNS_NAMESPACE:
|
||||
# ...and the attribute’s local name is xmlns
|
||||
if ($attr->localName === 'xmlns') {
|
||||
# The attribute’s serialized name is the string "xmlns".
|
||||
$name = 'xmlns';
|
||||
}
|
||||
# ... and the attribute’s local name is not xmlns
|
||||
else {
|
||||
# The attribute’s serialized name is the string "xmlns:" followed by the
|
||||
# attribute’s local name.
|
||||
$name = 'xmlns:' . $attr->localName;
|
||||
}
|
||||
break;
|
||||
# If the attribute is in the XLink namespace
|
||||
case Parser::XLINK_NAMESPACE:
|
||||
# The attribute’s serialized name is the string "xlink:" followed by the
|
||||
# attribute’s local name.
|
||||
$name = 'xlink:' . $attr->localName;
|
||||
break;
|
||||
# If the attribute is in some other namespace
|
||||
default:
|
||||
# The attribute’s serialized name is the attribute’s qualified name.
|
||||
$name = $attr->nodeName;
|
||||
}
|
||||
// undo any name mangling
|
||||
if (strpos($name, 'U') !== false) {
|
||||
$name = $currentNode->uncoerceName($name);
|
||||
}
|
||||
$value = $currentNode->escapeString($attr->value, true);
|
||||
$s .= " $name=\"$value\"";
|
||||
}
|
||||
|
||||
# While the exact order of attributes is UA-defined, and may depend on factors
|
||||
# such as the order that the attributes were given in the original markup, the
|
||||
# sort order must be stable, such that consecutive invocations of this
|
||||
# algorithm serialize an element’s attributes in the same order.
|
||||
// Okay.
|
||||
|
||||
# Append a U+003E GREATER-THAN SIGN character (>).
|
||||
$s .= '>';
|
||||
|
||||
# If current node serializes as void, then continue on to the next child node at
|
||||
# this point.
|
||||
if (in_array($currentNode->nodeName, self::$voidElements)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
# Append the value of running the HTML fragment serialization algorithm on the
|
||||
# current node element (thus recursing into this algorithm for that element),
|
||||
# followed by a U+003C LESS-THAN SIGN character (<), a U+002F SOLIDUS character (/),
|
||||
# tagname again, and finally a U+003E GREATER-THAN SIGN character (>).
|
||||
$s .= $this->serializeFragment($currentNode);
|
||||
$s .= "</$tagName>";
|
||||
}
|
||||
# If current node is a Text node
|
||||
elseif ($node instanceof Text) {
|
||||
# If the parent of current node is a style, script, xmp, iframe, noembed,
|
||||
# noframes, or plaintext element, or if the parent of current node is a noscript
|
||||
# element and scripting is enabled for the node, then append the value of
|
||||
# current node’s data IDL attribute literally.
|
||||
// DEVIATION: No scripting, so <noscript> is not included
|
||||
if ($this->parentNode->namespaceURI === null && in_array($this->parentNode->nodeName, [ 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'plaintext' ])) {
|
||||
$s .= $this->data;
|
||||
}
|
||||
# Otherwise, append the value of current node’s data IDL attribute, escaped as
|
||||
# described below.
|
||||
else {
|
||||
$s .= $this->escapeString($this->data);
|
||||
}
|
||||
}
|
||||
# If current node is a Comment
|
||||
elseif ($node instanceof Comment) {
|
||||
# Append the literal string "<!--" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION
|
||||
# MARK, U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS), followed by the value of
|
||||
# current node’s data IDL attribute, followed by the literal string "-->"
|
||||
# (U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN).
|
||||
$s .= "<!--{$this->data}-->";
|
||||
}
|
||||
# If current node is a ProcessingInstruction
|
||||
elseif ($node instanceof ProcessingInstruction) {
|
||||
# Append the literal string "<?" (U+003C LESS-THAN SIGN, U+003F QUESTION MARK),
|
||||
# followed by the value of current node’s target IDL attribute, followed by a
|
||||
# single U+0020 SPACE character, followed by the value of current node’s data
|
||||
# IDL attribute, followed by a single U+003E GREATER-THAN SIGN character (>).
|
||||
$s .= "<?{$this->target} {$this->data}>";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# 5. Return s.
|
||||
return $s;
|
||||
}
|
||||
|
||||
|
||||
public function __destruct() {
|
||||
ElementMap::destroy($this);
|
||||
}
|
||||
|
||||
public function __get(string $prop) {
|
||||
if ($prop === 'body') {
|
||||
if ($this->documentElement === null || $this->documentElement->childNodes->length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$body = null;
|
||||
|
||||
# The body element of a document is the first of the html element's children
|
||||
# that is either a body element or a frameset element, or null if there is no
|
||||
# such element.
|
||||
$n = $this->documentElement->firstChild;
|
||||
do {
|
||||
if ($n instanceof Element && $n->namespaceURI === null && ($n->nodeName === 'body' || $n->nodeName === 'frameset')) {
|
||||
$body = $n;
|
||||
break;
|
||||
}
|
||||
} while ($n = $n->nextSibling);
|
||||
|
||||
if ($body !== null) {
|
||||
// References are handled weirdly by PHP's DOM. Return a stored body element
|
||||
// unless it is changed so operations (like classList) can be done without
|
||||
// losing the reference.
|
||||
if ($body !== $this->_body) {
|
||||
$this->_body = $body;
|
||||
}
|
||||
|
||||
return $this->_body;
|
||||
}
|
||||
|
||||
$this->_body = null;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public function __set(string $prop, $value) {
|
||||
if ($prop === 'body') {
|
||||
# On setting, the following algorithm must be run:
|
||||
#
|
||||
# 1. If the new value is not a body or frameset element, then throw a
|
||||
# "HierarchyRequestError" DOMException.
|
||||
if (!$value instanceof Element || $value->namespaceURI !== null) {
|
||||
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
|
||||
}
|
||||
if ($value->nodeName !== 'body' && $value->nodeName !== 'frameset') {
|
||||
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
|
||||
}
|
||||
|
||||
if ($this->_body !== null) {
|
||||
# 2. Otherwise, if the new value is the same as the body element, return.
|
||||
if ($value->isSameNode($this->_body)) {
|
||||
return;
|
||||
}
|
||||
|
||||
# 3. Otherwise, if the body element is not null, then replace the body element
|
||||
# with the new value within the body element's parent and return.
|
||||
$this->documentElement->replaceChild($value, $this->_body);
|
||||
$this->_body = $value;
|
||||
return;
|
||||
}
|
||||
|
||||
# 4. Otherwise, if there is no document element, throw a "HierarchyRequestError"
|
||||
# DOMException.
|
||||
if ($this->documentElement === null) {
|
||||
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
|
||||
}
|
||||
|
||||
# 5. Otherwise, the body element is null, but there's a document element. Append
|
||||
# the new value to the document element.
|
||||
$this->documentElement->appendChild($value);
|
||||
$this->_body = $value;
|
||||
}
|
||||
}
|
||||
|
||||
public function __toString() {
|
||||
return $this->serialize();
|
||||
}
|
||||
|
|
|
@ -7,9 +7,5 @@ declare(strict_types=1);
|
|||
namespace MensBeam\HTML;
|
||||
|
||||
class DocumentFragment extends \DOMDocumentFragment {
|
||||
use ContainerNode, Moonwalk, Serialize, Walk;
|
||||
|
||||
public function __toString() {
|
||||
return $this->serialize();
|
||||
}
|
||||
use ContainerNode, Moonwalk, ToString, Walk;
|
||||
}
|
||||
|
|
|
@ -7,7 +7,7 @@ declare(strict_types=1);
|
|||
namespace MensBeam\HTML;
|
||||
|
||||
class Element extends \DOMElement {
|
||||
use ContainerNode, EscapeString, Moonwalk, Serialize, Walk;
|
||||
use ContainerNode, EscapeString, Moonwalk, ToString, Walk;
|
||||
|
||||
protected $_classList;
|
||||
|
||||
|
@ -142,6 +142,11 @@ class Element extends \DOMElement {
|
|||
return $result;
|
||||
}
|
||||
|
||||
public function serialize(): string {
|
||||
return $this->ownerDocument->serialize($this);
|
||||
}
|
||||
|
||||
|
||||
public function __get(string $prop) {
|
||||
switch ($prop) {
|
||||
case 'classList':
|
||||
|
@ -163,7 +168,7 @@ class Element extends \DOMElement {
|
|||
# might throw an exception instead of returning a string).
|
||||
// DEVIATION: Parsing of XML documents will not be handled by this
|
||||
// implementation, so there's no need for the well-formed flag.
|
||||
case 'innerHTML':
|
||||
case 'innerHTML':
|
||||
return $this->serialize($this);
|
||||
### DOM Parsing Specification ###
|
||||
# 2.4 Extensions to the Element interface
|
||||
|
@ -178,9 +183,9 @@ class Element extends \DOMElement {
|
|||
// OPTIMIZATION: When following the instructions above the fragment serializing
|
||||
// algorithm (Element::serialize) would invoke Element::__toString, so just
|
||||
// doing that instead of multiple function calls.
|
||||
case 'outerHTML':
|
||||
case 'outerHTML':
|
||||
return $this->__toString();
|
||||
default:
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -284,109 +289,4 @@ class Element extends \DOMElement {
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
public function __toString(): string {
|
||||
# If current node is an element in the HTML namespace, the MathML namespace,
|
||||
# or the SVG namespace, then let tagname be current node’s local name.
|
||||
# Otherwise, let tagname be current node’s qualified name.
|
||||
if ($this->namespaceURI === null || $this->namespaceURI === Parser::MATHML_NAMESPACE || $this->namespaceURI === Parser::SVG_NAMESPACE) {
|
||||
$tagName = $this->localName;
|
||||
} else {
|
||||
$tagName = $this->nodeName;
|
||||
}
|
||||
|
||||
// Since tag names can contain characters that are invalid in PHP's XML DOM
|
||||
// uncoerce the name when printing if necessary.
|
||||
if (strpos($tagName, 'U') !== false) {
|
||||
$tagName = $this->uncoerceName($tagName);
|
||||
}
|
||||
|
||||
# Append a U+003C LESS-THAN SIGN character (<), followed by tagname.
|
||||
$s = "<$tagName";
|
||||
|
||||
# If current node's is value is not null, and the element does not have an is
|
||||
# attribute in its attribute list, then append the string " is="", followed by
|
||||
# current node's is value escaped as described below in attribute mode, followed
|
||||
# by a U+0022 QUOTATION MARK character (").
|
||||
// DEVIATION: There is no scripting support in this implementation.
|
||||
|
||||
# For each attribute that the element has, append a U+0020 SPACE character,
|
||||
# the attribute’s serialized name as described below, a U+003D EQUALS SIGN
|
||||
# character (=), a U+0022 QUOTATION MARK character ("), the attribute’s value,
|
||||
# escaped as described below in attribute mode, and a second U+0022 QUOTATION
|
||||
# MARK character (").
|
||||
for ($j = 0; $j < $this->attributes->length; $j++) {
|
||||
$attr = $this->attributes->item($j);
|
||||
|
||||
# An attribute’s serialized name for the purposes of the previous paragraph
|
||||
# must be determined as follows:
|
||||
switch ($attr->namespaceURI) {
|
||||
# If the attribute has no namespace
|
||||
case null:
|
||||
# The attribute’s serialized name is the attribute’s local name.
|
||||
$name = $attr->localName;
|
||||
break;
|
||||
# If the attribute is in the XML namespace
|
||||
case Parser::XML_NAMESPACE:
|
||||
# The attribute’s serialized name is the string "xml:" followed by the
|
||||
# attribute’s local name.
|
||||
$name = 'xml:' . $attr->localName;
|
||||
break;
|
||||
# If the attribute is in the XMLNS namespace...
|
||||
case Parser::XMLNS_NAMESPACE:
|
||||
# ...and the attribute’s local name is xmlns
|
||||
if ($attr->localName === 'xmlns') {
|
||||
# The attribute’s serialized name is the string "xmlns".
|
||||
$name = 'xmlns';
|
||||
}
|
||||
# ... and the attribute’s local name is not xmlns
|
||||
else {
|
||||
# The attribute’s serialized name is the string "xmlns:" followed by the
|
||||
# attribute’s local name.
|
||||
$name = 'xmlns:' . $attr->localName;
|
||||
}
|
||||
break;
|
||||
# If the attribute is in the XLink namespace
|
||||
case Parser::XLINK_NAMESPACE:
|
||||
# The attribute’s serialized name is the string "xlink:" followed by the
|
||||
# attribute’s local name.
|
||||
$name = 'xlink:' . $attr->localName;
|
||||
break;
|
||||
# If the attribute is in some other namespace
|
||||
default:
|
||||
# The attribute’s serialized name is the attribute’s qualified name.
|
||||
$name = $attr->nodeName;
|
||||
}
|
||||
// undo any name mangling
|
||||
if (strpos($name, 'U') !== false) {
|
||||
$name = $this->uncoerceName($name);
|
||||
}
|
||||
$value = $this->escapeString($attr->value, true);
|
||||
$s .= " $name=\"$value\"";
|
||||
}
|
||||
|
||||
# While the exact order of attributes is UA-defined, and may depend on factors
|
||||
# such as the order that the attributes were given in the original markup, the
|
||||
# sort order must be stable, such that consecutive invocations of this
|
||||
# algorithm serialize an element’s attributes in the same order.
|
||||
// Okay.
|
||||
|
||||
# Append a U+003E GREATER-THAN SIGN character (>).
|
||||
$s .= '>';
|
||||
|
||||
# If current node serializes as void, then continue on to the next child node at
|
||||
# this point.
|
||||
if ($this->serializesAsVoid()) {
|
||||
return $s;
|
||||
}
|
||||
|
||||
# Append the value of running the HTML fragment serialization algorithm on the
|
||||
# current node element (thus recursing into this algorithm for that element),
|
||||
# followed by a U+003C LESS-THAN SIGN character (<), a U+002F SOLIDUS character (/),
|
||||
# tagname again, and finally a U+003E GREATER-THAN SIGN character (>).
|
||||
$s .= $this->serialize($this);
|
||||
$s .= "</$tagName>";
|
||||
|
||||
return $s;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,13 +7,5 @@ declare(strict_types=1);
|
|||
namespace MensBeam\HTML;
|
||||
|
||||
class ProcessingInstruction extends \DOMProcessingInstruction {
|
||||
use LeafNode, Moonwalk;
|
||||
|
||||
public function __toString(): string {
|
||||
# Append the literal string "<?" (U+003C LESS-THAN SIGN, U+003F QUESTION MARK),
|
||||
# followed by the value of current node’s target IDL attribute, followed by a
|
||||
# single U+0020 SPACE character, followed by the value of current node’s data
|
||||
# IDL attribute, followed by a single U+003E GREATER-THAN SIGN character (>).
|
||||
return "<?{$this->target} {$this->data}>";
|
||||
}
|
||||
use LeafNode, Moonwalk, ToString;
|
||||
}
|
||||
|
|
|
@ -7,19 +7,5 @@ declare(strict_types=1);
|
|||
namespace MensBeam\HTML;
|
||||
|
||||
class Text extends \DOMText {
|
||||
use EscapeString, LeafNode, Moonwalk;
|
||||
|
||||
function __toString(): string {
|
||||
# If the parent of current node is a style, script, xmp, iframe, noembed,
|
||||
# noframes, or plaintext element, or if the parent of current node is a noscript
|
||||
# element and scripting is enabled for the node, then append the value of
|
||||
# current node’s data IDL attribute literally.
|
||||
// DEVIATION: No scripting, so <noscript> is not included
|
||||
if ($this->parentNode->namespaceURI === null && in_array($this->parentNode->nodeName, ['style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'plaintext'])) {
|
||||
return $this->data;
|
||||
}
|
||||
# Otherwise, append the value of current node’s data IDL attribute, escaped as
|
||||
# described below.
|
||||
return $this->escapeString($this->data);
|
||||
}
|
||||
use EscapeString, LeafNode, Moonwalk, ToString;
|
||||
}
|
||||
|
|
|
@ -1,85 +0,0 @@
|
|||
<?php
|
||||
/** @license MIT
|
||||
* Copyright 2017 , Dustin Wilson, J. King et al.
|
||||
* See LICENSE and AUTHORS files for details */
|
||||
|
||||
declare(strict_types=1);
|
||||
namespace MensBeam\HTML;
|
||||
|
||||
trait Serialize {
|
||||
// List of elements that are treated as block elements when pretty printing
|
||||
protected static $blockElements = [ 'address', 'article', 'aside', 'blockquote', 'body', 'details', 'dialog', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr', 'html', 'li', 'main', 'nav', 'ol', 'p', 'pre', 'section', 'script', 'source', 'style', 'table', 'template', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul' ];
|
||||
// List of elements where content is ignored when pretty printing
|
||||
protected static $ignoredContentElements = [ 'pre', 'title' ];
|
||||
// List of elements which are self-closing
|
||||
protected static $voidElements = [ 'area', 'base', 'basefont', 'bgsound', 'br', 'col', 'embed', 'frame', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' ];
|
||||
|
||||
|
||||
|
||||
protected function serializesAsVoid(): bool {
|
||||
$name = $this->nodeName;
|
||||
if (in_array($name, self::$voidElements)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
protected function serialize(\DOMNode $node = null): string {
|
||||
$node = $node ?? $this;
|
||||
|
||||
if (!$node instanceof Element && !$node instanceof Document && !$node instanceof DocumentFragment) {
|
||||
throw new DOMException(DOMException::DOCUMENT_ELEMENT_DOCUMENTFRAG_EXPECTED, gettype($node));
|
||||
}
|
||||
|
||||
# 13.3. Serializing HTML fragments
|
||||
#
|
||||
# 1. If the node serializes as void, then return the empty string.
|
||||
if ($this->serializesAsVoid()) {
|
||||
return '';
|
||||
}
|
||||
|
||||
# 2. Let s be a string, and initialize it to the empty string.
|
||||
$s = '';
|
||||
|
||||
# 3. If the node is a template element, then let the node instead be the
|
||||
# template element’s template contents (a DocumentFragment node).
|
||||
if ($node instanceof TemplateElement) {
|
||||
$node = $node->content;
|
||||
}
|
||||
|
||||
$nodesLength = $node->childNodes->length;
|
||||
if ($nodesLength > 0) {
|
||||
// If the provided node is a document node and the first element in
|
||||
// the tree is a document type then print the document type. There's
|
||||
// no sense in checking for this on every single element in the tree.
|
||||
// If the document type is present it will always be the first node
|
||||
// because of how PHP's XML DOM works.
|
||||
$start = 0;
|
||||
if ($node->nodeType === XML_DOCUMENT_NODE && $node->childNodes->item(0)->nodeType === XML_DOCUMENT_TYPE_NODE) {
|
||||
# Append the literal string "<!DOCTYPE" (U+003C LESS-THAN SIGN, U+0021
|
||||
# EXCLAMATION MARK, U+0044 LATIN CAPITAL LETTER D, U+004F LATIN CAPITAL LETTER
|
||||
# O, U+0043 LATIN CAPITAL LETTER C, U+0054 LATIN CAPITAL LETTER T, U+0059
|
||||
# LATIN CAPITAL LETTER Y, U+0050 LATIN CAPITAL LETTER P, U+0045 LATIN CAPITAL
|
||||
# LETTER E), followed by a space (U+0020 SPACE), followed by the value of
|
||||
# current node's name IDL attribute, followed by the literal string ">" (U+003E
|
||||
# GREATER-THAN SIGN).
|
||||
// DEVIATION: The name is trimmed because PHP's DOM does not
|
||||
// accept the empty string as a DOCTYPE name
|
||||
$name = trim($node->childNodes->item(0)->name, " ");
|
||||
$s .= "<!DOCTYPE $name>";
|
||||
$start = 1;
|
||||
}
|
||||
|
||||
# 4. For each child node of the node, in tree order, run the following steps:
|
||||
for ($i = $start; $i < $nodesLength; $i++) {
|
||||
# 1. Let current node be the child node being processed.
|
||||
# 2. Append the appropriate string from the following list to s:
|
||||
$s .= $node->childNodes->item($i);
|
||||
}
|
||||
}
|
||||
|
||||
# 5. Return s.
|
||||
return $s;
|
||||
}
|
||||
}
|
13
lib/DOM/traits/ToString.php
Normal file
13
lib/DOM/traits/ToString.php
Normal file
|
@ -0,0 +1,13 @@
|
|||
<?php
|
||||
/** @license MIT
|
||||
* Copyright 2017 , Dustin Wilson, J. King et al.
|
||||
* See LICENSE and AUTHORS files for details */
|
||||
|
||||
declare(strict_types=1);
|
||||
namespace MensBeam\HTML;
|
||||
|
||||
trait ToString {
|
||||
public function __toString(): string {
|
||||
return $this->ownerDocument->serialize($this);
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue