A modern, accurate HTML parser and serializer for PHP
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

755 lines
36 KiB

<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
class Document extends AbstractDocument {
protected ?Element $_body = null;
/** Nonstandard */
protected ?\DOMXPath $_xpath = null;
// List of elements that are treated as block elements for the purposes of
// output formatting when serializing
protected const BLOCK_ELEMENTS = [ 'address', 'article', 'aside', 'blockquote', 'base', 'body', 'details', 'dialog', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'isindex', 'li', 'link', 'main', 'meta', 'nav', 'ol', 'p', 'picture', 'pre', 'section', 'script', 'source', 'style', 'table', 'template', 'td', 'tfoot', 'th', 'thead', 'title', 'tr', 'ul' ];
// List of h-elements used when determining extra spacing for the purposes of
// output formatting when serializing
protected const H_ELEMENTS = [ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' ];
// List of preformatted elements where content is ignored for the purposes of
// output formatting when serializing
protected const PREFORMATTED_ELEMENTS = [ 'iframe', 'listing', 'noembed', 'noframes', 'noscript', 'plaintext', 'pre', 'style', 'script', 'textarea', 'title', 'xmp' ];
// List of elements which are self-closing; used when serializing
protected const VOID_ELEMENTS = [ 'area', 'base', 'basefont', 'bgsound', 'br', 'col', 'embed', 'frame', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' ];
public function __get_body(): \DOMNode {
if ($this->documentElement === null || $this->documentElement->childNodes->length === 0) {
return null;
}
$body = null;
# The body element of a document is the first of the html element's children
# that is either a body element or a frameset element, or null if there is no
# such element.
$n = $this->documentElement->firstChild;
do {
if ($n instanceof Element && $n->namespaceURI === null && ($n->nodeName === 'body' || $n->nodeName === 'frameset')) {
$body = $n;
break;
}
} while ($n = $n->nextSibling);
if ($body !== null) {
// References are handled weirdly by PHP's DOM. Return a stored body element
// unless it is changed so operations (like classList) can be done without
// losing the reference.
if ($body !== $this->_body) {
$this->_body = $body;
}
return $this->_body;
}
$this->_body = null;
return null;
}
public function __set_body($value) {
# On setting, the following algorithm must be run:
#
# 1. If the new value is not a body or frameset element, then throw a
# "HierarchyRequestError" DOMException.
if (!$value instanceof Element || $value->namespaceURI !== null) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
if ($value->nodeName !== 'body' && $value->nodeName !== 'frameset') {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
if ($this->_body !== null) {
# 2. Otherwise, if the new value is the same as the body element, return.
if ($value->isSameNode($this->_body)) {
return;
}
# 3. Otherwise, if the body element is not null, then replace the body element
# with the new value within the body element's parent and return.
$this->documentElement->replaceChild($value, $this->_body);
$this->_body = $value;
return;
}
# 4. Otherwise, if there is no document element, throw a "HierarchyRequestError"
# DOMException.
if ($this->documentElement === null) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
# 5. Otherwise, the body element is null, but there's a document element. Append
# the new value to the document element.
$this->documentElement->appendChild($value);
$this->_body = $value;
}
public function __get_xpath(): \DOMXPath {
if ($this->_xpath === null) {
$this->_xpath = new \DOMXPath($this);
}
return $this->_xpath;
}
public function __construct($source = null, ?string $encodingOrContentType = null) {
// Because we cannot have union types until php 8... :)
if ($source !== null && !$source instanceof \DOMDocument && !is_string($source)) {
throw new DOMException(DOMException::ARGUMENT_TYPE_ERROR, 1, 'source', 'string|\DOMDocument', gettype($source));
} elseif ($source instanceof self) {
return $source;
}
parent::__construct();
$this->registerNodeClass('DOMDocument', '\MensBeam\HTML\Document');
$this->registerNodeClass('DOMComment', '\MensBeam\HTML\Comment');
$this->registerNodeClass('DOMDocumentFragment', '\MensBeam\HTML\DocumentFragment');
$this->registerNodeClass('DOMElement', '\MensBeam\HTML\Element');
$this->registerNodeClass('DOMProcessingInstruction', '\MensBeam\HTML\ProcessingInstruction');
$this->registerNodeClass('DOMText', '\MensBeam\HTML\Text');
if ($source !== null) {
if (is_string($source)) {
$source = Parser::parse($source, null, $encodingOrContentType);
}
foreach ($source->childNodes as $child) {
if (!$child instanceof \DOMDocumentType) {
$this->appendChild($this->importNode($child, true));
} else {
$this->appendChild($this->implementation->createDocumentType($child->name ?? ' ', $child->public ?? '', $child->system ?? ''));
}
}
}
}
public function createAttribute($name) {
return $this->createAttributeNS(null, $name);
}
public function createAttributeNS($namespaceURI, $qualifiedName) {
// Normalize the attribute name and namespace URI per modern DOM specifications.
if ($namespaceURI !== null) {
$namespaceURI = trim($namespaceURI);
}
$qualifiedName = trim($qualifiedName);
try {
return parent::createAttributeNS($namespaceURI, $qualifiedName);
} catch (\DOMException $e) {
// The element name is invalid for XML
// Replace any offending characters with "UHHHHHH" where H are the
// uppercase hexadecimal digits of the character's code point
$this->mangledAttributes = true;
if ($namespaceURI !== null) {
$qualifiedName = implode(":", array_map([$this, "coerceName"], explode(":", $qualifiedName, 2)));
} else {
$qualifiedName = $this->coerceName($qualifiedName);
}
return parent::createAttributeNS($namespaceURI, $qualifiedName);
}
}
public function createElement($name, $value = "") {
return $this->createElementNS(null, $name, $value);
}
public function createElementNS($namespaceURI, $qualifiedName, $value = "") {
// Normalize the element name and namespace URI per modern DOM specifications.
if ($namespaceURI !== null) {
$namespaceURI = trim($namespaceURI);
$namespaceURI = ($namespaceURI === Parser::HTML_NAMESPACE) ? null : $namespaceURI;
}
$qualifiedName = ($namespaceURI === null) ? strtolower(trim($qualifiedName)) : trim($qualifiedName);
try {
if ($qualifiedName !== 'template' || $namespaceURI !== null) {
$e = parent::createElementNS($namespaceURI, $qualifiedName, $value);
} else {
$e = new TemplateElement($this, $qualifiedName, $value);
// Template elements need to have a reference kept in userland
ElementMap::set($e);
$e->content = $this->createDocumentFragment();
}
return $e;
} catch (\DOMException $e) {
// The element name is invalid for XML
// Replace any offending characters with "UHHHHHH" where H are the
// uppercase hexadecimal digits of the character's code point
$this->mangledElements = true;
if ($namespaceURI !== null) {
$qualifiedName = implode(":", array_map([$this, "coerceName"], explode(":", $qualifiedName, 2)));
} else {
$qualifiedName = $this->coerceName($qualifiedName);
}
return parent::createElementNS($namespaceURI, $qualifiedName, $value);
}
}
public function createEntityReference($name): bool {
return false;
}
public function load($filename, $options = null, ?string $encodingOrContentType = null): bool {
$data = Parser::fetchFile($filename, $encodingOrContentType);
if (!$data) {
return false;
}
[$data, $encodingOrContentType] = $data;
Parser::parse($data, $this, $encodingOrContentType, null, (string)$filename);
return true;
}
public function loadHTML($source, $options = null, ?string $encodingOrContentType = null): bool {
if (!is_string($source)) {
throw new DOMException(DOMException::ARGUMENT_TYPE_ERROR, 1, 'source', 'string', gettype($source));
}
if (is_string($source)) {
$source = Parser::parse($source, null, $encodingOrContentType);
}
foreach ($source->childNodes as $child) {
if (!$child instanceof \DOMDocumentType) {
$this->appendChild($this->importNode($child, true));
} else {
$this->appendChild($this->implementation->createDocumentType($child->name ?? ' ', $child->public ?? '', $child->system ?? ''));
}
}
assert(is_string($source), new DOMException(DOMException::STRING_EXPECTED, 'source', gettype($source)));
Parser::parse($source, $this, $encodingOrContentType);
return true;
}
public function loadHTMLFile($filename, $options = null, ?string $encodingOrContentType = null): bool {
return $this->load($filename, $options, $encodingOrContentType);
}
public function loadXML($source, $options = null): bool {
return false;
}
public function save($filename, $options = null) {
return file_put_contents($filename, $this->serialize());
}
public function saveHTML(\DOMNode $node = null): string {
return $node->serialize($node);
}
public function saveHTMLFile($filename): int {
return $this->save($filename);
}
public function saveXML(?\DOMNode $node = null, $options = null): bool {
return false;
}
public function serialize(\DOMNode $node = null): string {
$node = $node ?? $this;
$formatOutput = $this->formatOutput;
if ($node !== $this) {
if (!$node->ownerDocument->isSameNode($this)) {
throw new DOMException(DOMException::WRONG_DOCUMENT);
}
// This method is used to serialize any node. If not a Document or a
// DocumentFragment or a DocumentType clone the node in a fragment and serialize
// that. Otherwise, if a DocumentFragment create a new Document with a clone of
// the DocumentFragment as its doctype and then serialize the new document.
if (!$node instanceof Document && !$node instanceof DocumentFragment) {
// If the node isn't an element disable output formatting
if ($formatOutput && !$node instanceof Element) {
$formatOutput = false;
}
if (!$node instanceof \DOMDocumentType) {
$frag = $this->createDocumentFragment();
$frag->appendChild($node->cloneNode(true));
$node = $frag;
} else {
$newDoc = new self();
$newDoc->appendChild($newDoc->implementation->createDocumentType($node->name, $node->publicId, $node->systemId));
$node = $newDoc;
}
}
} elseif ($formatOutput && $node instanceof DocumentFragment) {
// If node is a document fragment disable output formatting if the
// DocumentFragment doesn't have any Element children.
$formatOutput = ($node->childElementCount > 0);
}
return $this->serializeFragment($node, $formatOutput);
}
public function validate(): bool {
return true;
}
public function xinclude($options = null): bool {
return false;
}
protected function preInsertionValidity(\DOMNode $node, ?\DOMNode $child = null) {
parent::preInsertionValidity($node, $child);
# 6. If parent is a document, and any of the statements below, switched on node,
# are true, then throw a "HierarchyRequestError" DOMException.
#
# DocumentFragment node
# If node has more than one element child or has a Text node child.
# Otherwise, if node has one element child and either parent has an element
# child, child is a doctype, or child is non-null and a doctype is following
# child.
if ($node instanceof \DOMDocumentType) {
if ($node->childNodes->length > 1 || $node->firstChild instanceof Text) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
} else {
if ($node->firstChild instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
foreach ($this->childNodes as $c) {
if ($c instanceof Element) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
if ($child !== null) {
$n = $child;
while ($n = $n->nextSibling) {
if ($n instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
}
}
}
# element
# parent has an element child, child is a doctype, or child is non-null and a
# doctype is following child.
elseif ($node instanceof Element) {
if ($child instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
if ($child !== null) {
$n = $child;
while ($n = $n->nextSibling) {
if ($n instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
}
foreach ($this->childNodes as $c) {
if ($c instanceof Element) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
}
# doctype
# parent has a doctype child, child is non-null and an element is preceding
# child, or child is null and parent has an element child.
elseif ($node instanceof \DOMDocumentType) {
foreach ($this->childNodes as $c) {
if ($c instanceof \DOMDocumentType) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
if ($child !== null) {
$n = $child;
while ($n = $n->prevSibling) {
if ($n instanceof Element) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
} else {
foreach ($this->childNodes as $c) {
if ($c instanceof Element) {
throw new DOMException(DOMException::HIERARCHY_REQUEST_ERROR);
}
}
}
}
}
protected function serializeBlockElementFilter(\DOMNode $ignoredNode): \Closure {
$blockElementFilter = function($n) use ($ignoredNode) {
if (!$n->isSameNode($ignoredNode) && $n instanceof Element && $n->namespaceURI === null && (in_array($n->nodeName, self::BLOCK_ELEMENTS) || $n->walk(function($nn) {
if ($nn instanceof Element && $nn->namespaceURI === null && in_array($nn->nodeName, self::BLOCK_ELEMENTS)) {
return true;
}
})->current() !== null)) {
return true;
}
};
return $blockElementFilter;
}
protected function serializeFragment(\DOMNode $node, bool $formatOutput = false): string {
if ($formatOutput) {
// Stores the root foreign element when parsing its descendants
static $foreignElement = null;
// Flag used if the root foreign element above has block element siblings
static $foreignElementWithBlockElementSiblings = false;
// Stores the indention level
static $indent = 0;
// Stores the root preformatted element when parsing its descendants
static $preformattedElement = null;
// Stores the previous non text node name so it can be used to check for adding
// additional space.
static $previousNonTextNodeSiblingName = null;
}
# 13.3. Serializing HTML fragments
#
# 1. If the node serializes as void, then return the empty string.
if (in_array($node->nodeName, self::VOID_ELEMENTS)) {
return '';
}
# 2. Let s be a string, and initialize it to the empty string.
$s = '';
# 3. If the node is a template element, then let the node instead be the
# template element’s template contents (a DocumentFragment node).
if ($node instanceof TemplateElement) {
$node = $node->content;
}
$nodesLength = $node->childNodes->length;
# 4. For each child node of the node, in tree order, run the following steps:
## 1. Let current node be the child node being processed.
foreach ($node->childNodes as $currentNode) {
$foreign = ($currentNode->namespaceURI !== null);
if ($this->formatOutput) {
// Filter meant to be used with DOM walker generator methods which checks if
// elements are block or if elements are inline with block descendants
$blockElementFilter = self::serializeBlockElementFilter($currentNode->parentNode);
}
# 2. Append the appropriate string from the following list to s:
# If current node is an Element
if ($currentNode instanceof Element) {
# If current node is an element in the HTML namespace, the MathML namespace, or
# the SVG namespace, then let tagname be current node's local name. Otherwise,
# let tagname be current node's qualified name.
$tagName = (!$foreign || $currentNode->namespaceURI === Parser::MATHML_NAMESPACE || $currentNode->namespaceURI === Parser::SVG_NAMESPACE) ? $currentNode->localName : $currentNode->nodeName;
// Since tag names can contain characters that are invalid in PHP's XML DOM
// uncoerce the name when printing if necessary.
if (strpos($tagName, 'U') !== false) {
$tagName = $this->uncoerceName($tagName);
}
if ($formatOutput) {
$blockElementFilter = self::serializeBlockElementFilter($currentNode);
$hasChildNodes = ($currentNode->hasChildNodes());
$modify = false;
if (!$foreign) {
if ($hasChildNodes && $preformattedElement === null && in_array($tagName, self::PREFORMATTED_ELEMENTS)) {
$preformattedElement = $currentNode;
}
// If a block element, an inline element with block element siblings, or an
// inline element with block element descendants...
if (in_array($tagName, self::BLOCK_ELEMENTS) || $currentNode->parentNode->walkShallow($blockElementFilter)->current() !== null || ($hasChildNodes && $currentNode->walk($blockElementFilter)->current() !== null)) {
$modify = true;
}
} else {
// If a foreign element with block element siblings
if ($hasChildNodes && $foreignElement === null) {
$foreignElement = $currentNode;
if ($currentNode->parentNode->walkShallow($blockElementFilter)->current() !== null) {
$foreignElementWithBlockElementSiblings = true;
$modify = true;
}
}
// If a foreign element with a foreign element ancestor with block element
// siblings
elseif ($foreignElement !== null && $foreignElementWithBlockElementSiblings) {
$modify = true;
}
}
if ($modify) {
// If the previous non text node sibling doesn't have the same name as the
// current node and neither are h1-h6 elements then add an additional newline.
if ($previousNonTextNodeSiblingName !== null && $previousNonTextNodeSiblingName !== $tagName && !(in_array($previousNonTextNodeSiblingName, self::H_ELEMENTS) && in_array($tagName, self::H_ELEMENTS))) {
$s .= "\n";
}
$s .= "\n" . str_repeat(' ', $indent);
}
}
# Append a U+003C LESS-THAN SIGN character (<), followed by tagname.
$s .= "<$tagName";
# If current node's is value is not null, and the element does not have an is
# attribute in its attribute list, then append the string " is="", followed by
# current node's is value escaped as described below in attribute mode, followed
# by a U+0022 QUOTATION MARK character (").
// DEVIATION: There is no scripting support in this implementation.
# For each attribute that the element has, append a U+0020 SPACE character,
# the attribute’s serialized name as described below, a U+003D EQUALS SIGN
# character (=), a U+0022 QUOTATION MARK character ("), the attribute’s value,
# escaped as described below in attribute mode, and a second U+0022 QUOTATION
# MARK character (").
foreach ($currentNode->attributes as $attr) {
# An attribute’s serialized name for the purposes of the previous paragraph
# must be determined as follows:
switch ($attr->namespaceURI) {
# If the attribute has no namespace
case null:
# The attribute’s serialized name is the attribute’s local name.
$name = $attr->localName;
break;
# If the attribute is in the XML namespace
case Parser::XML_NAMESPACE:
# The attribute’s serialized name is the string "xml:" followed by the
# attribute’s local name.
$name = 'xml:' . $attr->localName;
break;
# If the attribute is in the XMLNS namespace...
case Parser::XMLNS_NAMESPACE:
# ...and the attribute’s local name is xmlns
if ($attr->localName === 'xmlns') {
# The attribute’s serialized name is the string "xmlns".
$name = 'xmlns';
}
# ... and the attribute’s local name is not xmlns
else {
# The attribute’s serialized name is the string "xmlns:" followed by the
# attribute’s local name.
$name = 'xmlns:' . $attr->localName;
}
break;
# If the attribute is in the XLink namespace
case Parser::XLINK_NAMESPACE:
# The attribute’s serialized name is the string "xlink:" followed by the
# attribute’s local name.
$name = 'xlink:' . $attr->localName;
break;
# If the attribute is in some other namespace
default:
# The attribute’s serialized name is the attribute’s qualified name.
$name = $attr->nodeName;
}
// undo any name mangling
if (strpos($name, 'U') !== false) {
$name = $this->uncoerceName($name);
}
$value = $this->escapeString($attr->value, true);
$s .= " $name=\"$value\"";
}
# While the exact order of attributes is UA-defined, and may depend on factors
# such as the order that the attributes were given in the original markup, the
# sort order must be stable, such that consecutive invocations of this
# algorithm serialize an element’s attributes in the same order.
// Okay.
// When formatting output set the previous non text node sibling name to the
// current node name so void elements and empty foreign elements will be
// recognized by their next sibling.
if ($formatOutput) {
$previousNonTextNodeSiblingName = $tagName;
}
# Append a U+003E GREATER-THAN SIGN character (>).
// DEVIATION: Printing XML-based content such as SVG as if it's HTML might be
// practical when a browser is serializing, but it's not in this library's
// usage. So, if the element is foreign and doesn't contain any children close
// the element instead and continue on to the next child node.
$hasChildNodes = $currentNode->hasChildNodes();
if (!$foreign || $hasChildNodes) {
$s .= '>';
} elseif (!$hasChildNodes) {
$s .= '/>';
continue;
}
# If current node serializes as void, then continue on to the next child node at
# this point.
if (in_array($currentNode->nodeName, self::VOID_ELEMENTS)) {
continue;
}
if ($formatOutput) {
// If formatting output set the previous non text node sibling to null before
// serializing children.
$previousNonTextNodeSiblingName = null;
// If formatting output and the element has already been modified increment the
// indention level
if ($modify) {
$indent++;
}
}
# Append the value of running the HTML fragment serialization algorithm on the
# current node element (thus recursing into this algorithm for that element),
# followed by a U+003C LESS-THAN SIGN character (<), a U+002F SOLIDUS character (/),
# tagname again, and finally a U+003E GREATER-THAN SIGN character (>).
$s .= $this->serializeFragment($currentNode, $formatOutput);
if ($formatOutput) {
if ($modify) {
// Decrement the indention level.
$indent--;
if ($preformattedElement === null) {
// If a foreign element with a foreign element ancestor with block element
// siblings and has at least one element child or any element with a block
// element descendant...
if (($foreign && $foreignElementWithBlockElementSiblings && $currentNode->firstElementChild !== null) || ($currentNode->walk($blockElementFilter)->current() !== null)) {
$s .= "\n" . str_repeat(' ', $indent);
}
}
}
if ($foreignElement !== null && $currentNode->isSameNode($foreignElement)) {
$foreignElement = null;
$foreignElementWithBlockElementSiblings = false;
} elseif ($preformattedElement !== null && $currentNode->isSameNode($preformattedElement)) {
$preformattedElement = null;
}
// Set the previous text node sibling name to the current node's name so it may
// be recognized by the following sibling.
$previousNonTextNodeSiblingName = $tagName;
}
$s .= "</$tagName>";
}
# If current node is a Text node
elseif ($currentNode instanceof Text) {
$text = $currentNode->data;
# If the parent of current node is a style, script, xmp, iframe, noembed,
# noframes, or plaintext element, or if the parent of current node is a noscript
# element and scripting is enabled for the node, then append the value of
# current node’s data IDL attribute literally.
if ($currentNode->parentNode->namespaceURI === null && in_array($currentNode->parentNode->nodeName, [ 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'plaintext' ])) {
$s .= $text;
}
# Otherwise, append the value of current node’s data IDL attribute, escaped as
# described below.
else {
if ($formatOutput) {
if ($preformattedElement === null) {
// Condense spaces and tabs into a single space.
$text = preg_replace('/ +/', ' ', str_replace("\t", ' ', $text));
if ($foreignElementWithBlockElementSiblings || $currentNode->parentNode->walk($blockElementFilter)->current() !== null) {
// If the text node's data is made up of only whitespace characters continue
// onto the next node
if (strspn($text, Data::WHITESPACE) === strlen($text)) {
continue;
}
// Otherwise, remove newlines from the text node's data; if that causes the data
// to be empty then continue onto the next node.
$text = preg_replace('/[\n\x0C\x0D]+/', '', $text);
if ($text === '') {
continue;
}
}
}
}
$s .= $this->escapeString($text);
}
}
# If current node is a Comment
elseif ($currentNode instanceof Comment) {
if ($formatOutput) {
if ($preformattedElement === null && $foreignElementWithBlockElementSiblings || $currentNode->parentNode->walk($blockElementFilter)->current() !== null) {
// Add an additional newline if the previous sibling wasn't a comment.
if ($previousNonTextNodeSiblingName !== null && $previousNonTextNodeSiblingName !== $this->nodeName) {
$s .= "\n";
}
$s .= "\n" . str_repeat(' ', $indent);
}
$previousNonTextNodeSiblingName = $this->nodeName;
}
# Append the literal string "<!--" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION
# MARK, U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS), followed by the value of
# current node’s data IDL attribute, followed by the literal string "-->"
# (U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN).
$s .= "<!--{$currentNode->data}-->";
}
# If current node is a ProcessingInstruction
elseif ($currentNode instanceof ProcessingInstruction) {
if ($formatOutput) {
if ($preformattedElement === null && $foreignElementWithBlockElementSiblings || $currentNode->parentNode->walk($blockElementFilter)->current() !== null) {
// Add an additional newline if the previous sibling wasn't a processing
// instruction.
if ($previousNonTextNodeSiblingName !== null && $previousNonTextNodeSiblingName !== $this->nodeName) {
$s .= "\n";
}
$s .= "\n" . str_repeat(' ', $indent);
}
$previousNonTextNodeSiblingName = $this->nodeName;
}
# Append the literal string "<?" (U+003C LESS-THAN SIGN, U+003F QUESTION MARK),
# followed by the value of current node’s target IDL attribute, followed by a
# single U+0020 SPACE character, followed by the value of current node’s data
# IDL attribute, followed by a single U+003E GREATER-THAN SIGN character (>).
$s .= "<?{$currentNode->target} {$currentNode->data}>";
}
# If current node is a DocumentFragment
elseif ($currentNode instanceof \DOMDocumentType) {
# Append the literal string "<!DOCTYPE" (U+003C LESS-THAN SIGN, U+0021
# EXCLAMATION MARK, U+0044 LATIN CAPITAL LETTER D, U+004F LATIN CAPITAL LETTER
# O, U+0043 LATIN CAPITAL LETTER C, U+0054 LATIN CAPITAL LETTER T, U+0059
# LATIN CAPITAL LETTER Y, U+0050 LATIN CAPITAL LETTER P, U+0045 LATIN CAPITAL
# LETTER E), followed by a space (U+0020 SPACE), followed by the value of
# current node's name IDL attribute, followed by the literal string ">" (U+003E
# GREATER-THAN SIGN).
// DEVIATION: The name is trimmed because PHP's DOM does not
// accept the empty string as a DOCTYPE name
$name = trim($node->childNodes->item(0)->name, ' ');
$s .= "<!DOCTYPE $name>";
}
}
# 5. Return s.
return $s;
}
public function __toString() {
return $this->serialize();
}
}