Dustin Wilson
3 years ago
3 changed files with 253 additions and 1710 deletions
@ -1,616 +0,0 @@ |
|||
<?php |
|||
/** @license MIT |
|||
* Copyright 2017 , Dustin Wilson, J. King et al. |
|||
* See LICENSE and AUTHORS files for details */ |
|||
|
|||
declare(strict_types=1); |
|||
namespace MensBeam\HTML\Parser; |
|||
|
|||
use MensBeam\HTML\Parser; |
|||
|
|||
abstract class Serializer { |
|||
use NameCoercion; |
|||
|
|||
// Elements treated as block elements when reformatting whitespace |
|||
protected const BLOCK_ELEMENTS = [ 'address', 'article', 'aside', 'blockquote', 'base', 'body', 'details', 'dialog', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'isindex', 'li', 'link', 'main', 'meta', 'nav', 'ol', 'p', 'picture', 'pre', 'section', 'script', 'source', 'style', 'table', 'td', 'tfoot', 'th', 'thead', 'title', 'tr', 'ul' ]; |
|||
// List of h-elements which are used to determine element grouping for the |
|||
// purposes of reformatting whitespace |
|||
protected const H_ELEMENTS = [ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' ]; |
|||
// List of preformatted elements where content is ignored for the purposes of |
|||
// reformatting whitespace |
|||
protected const PREFORMATTED_ELEMENTS = [ 'iframe', 'listing', 'noembed', 'noframes', 'noscript', 'plaintext', 'pre', 'style', 'script', 'textarea', 'title', 'xmp' ]; |
|||
protected const VOID_ELEMENTS = ["basefont", "bgsound", "frame", "keygen", "area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "param", "source", "track", "wbr"]; |
|||
protected const RAWTEXT_ELEMENTS = ["style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext"]; |
|||
protected const BOOLEAN_ATTRIBUTES = [ |
|||
'allowfullscreen' => ["iframe"], |
|||
'async' => ["script"], |
|||
'autofocus' => true, |
|||
'autoplay' => ["audio", "video"], |
|||
'checked' => ["input"], |
|||
'compact' => ["dir", "dl", "menu", "ol", "ul"], |
|||
'controls' => ["audio", "video"], |
|||
'declare' => ["object"], |
|||
'default' => ["track"], |
|||
'defer' => ["script"], |
|||
'disabled' => ["button", "fieldset", "input", "link", "optgroup", "option", "select", "textarea"], |
|||
'formnovalidate' => ["button", "input"], |
|||
'hidden' => true, |
|||
'ismap' => ["img"], |
|||
'itemscope' => true, |
|||
'loop' => ["audio", "video"], |
|||
'multiple' => ["input", "select"], |
|||
'muted' => ["audio", "video"], |
|||
'nohref' => ["area"], |
|||
'nomodule' => ["script"], |
|||
'noresize' => ["frame"], |
|||
'noshade' => ["hr"], |
|||
'novalidate' => ["form"], |
|||
'nowrap' => ["td", "th"], |
|||
'open' => ["details", "dialog"], |
|||
'playsinline' => ["video"], |
|||
'readonly' => ["input", "textarea"], |
|||
'required' => ["input", "select", "textarea"], |
|||
'reversed' => ["ol"], |
|||
'selected' => ["option"], |
|||
]; |
|||
|
|||
// Used when reformatting whitespace when nodes are checked for being treated as block. |
|||
|
|||
protected const BLOCK_QUERY = 'count(.//*[namespace-uri()="" or namespace-uri()="http://www.w3.org/1999/xhtml"][not(ancestor::iframe[namespace-uri()="" or namespace-uri()="http://www.w3.org/1999/xhtml"] or ancestor::listing[namespace-uri()="" or namespace-uri()="http://www.w3.org/1999/xhtml"] or ancestor::noembed[namespace-uri()="" or namespace-uri()="http://www.w3.org/1999/xhtml"] or ancestor::noframes[namespace-uri()="" or namespace-uri()="http://www.w3.org/1999/xhtml"] or ancestor::noscript[namespace-uri()="" or namespace-uri()="http://www.w3.org/1999/xhtml"] or ancestor::plaintext[namespace-uri()="" or namespace-uri()="http://www.w3.org/1999/xhtml"] or ancestor::pre[namespace-uri()="" or namespace-uri()="http://www.w3.org/1999/xhtml"] or ancestor::style[namespace-uri()="" or namespace-uri()="http://www.w3.org/1999/xhtml"] or ancestor::script[namespace-uri()="" or namespace-uri()="http://www.w3.org/1999/xhtml"] or ancestor::textarea[namespace-uri()="" or namespace-uri()="http://www.w3.org/1999/xhtml"] or ancestor::title[namespace-uri()="" or namespace-uri()="http://www.w3.org/1999/xhtml"] or ancestor::xmp[namespace-uri()="" or namespace-uri()="http://www.w3.org/1999/xhtml"])][name()="address" or name()="article" or name()="aside" or name()="blockquote" or name()="base" or name()="body" or name()="details" or name()="dialog" or name()="dd" or name()="div" or name()="dl" or name()="dt" or name()="fieldset" or name()="figcaption" or name()="figure" or name()="footer" or name()="form" or name()="frame" or name()="frameset" or name()="h1" or name()="h2" or name()="h3" or name()="h4" or name()="h5" or name()="h6" or name()="head" or name()="header" or name()="hr" or name()="html" or name()="isindex" or name()="li" or name()="link" or name()="main" or name()="meta" or name()="nav" or name()="ol" or name()="p" or name()="picture" or name()="pre" or name()="section" or name()="script" or name()="source" or name()="style" or name()="table" or name()="td" or name()="tfoot" or name()="th" or name()="thead" or name()="title" or name()="tr" or name()="ul"][1])'; |
|||
|
|||
/** Serializes an HTML DOM node to a string. This is equivalent to the outerHTML getter |
|||
* |
|||
* @param \DOMDocument|\DOMElement|\DOMText|\DOMComment|\DOMProcessingInstruction|\DOMDocumentFragment|\DOMDocumentType $node The node to serialize |
|||
* @param \MensBeam\HTML\Parser\Config|null $config The configuration parameters to use, if any |
|||
*/ |
|||
public static function serialize(\DOMNode $node, ?Config $config = null): string { |
|||
$config = $config ?? new Config; |
|||
$boolAttr = $config->serializeBooleanAttributeValues ?? true; |
|||
$endTags = $config->serializeForeignVoidEndTags ?? true; |
|||
$reformatWhitespace = $config->reformatWhitespace ?? false; |
|||
|
|||
if ($reformatWhitespace) { |
|||
$indentStep = $config->indentStep ?? 1; |
|||
$indentChar = ($config->indentWithSpaces ?? true) ? ' ' : "\t"; |
|||
} |
|||
|
|||
$s = ""; |
|||
$stack = []; |
|||
$n = $node; |
|||
|
|||
if ($reformatWhitespace) { |
|||
$first = true; |
|||
$indentionLevel = 0; |
|||
$modifyStack = []; |
|||
} |
|||
|
|||
do { |
|||
# If current node is an Element |
|||
if ($n instanceof \DOMElement) { |
|||
$htmlElement = ($n->namespaceURI ?? Parser::HTML_NAMESPACE) === Parser::HTML_NAMESPACE; |
|||
# If current node is an element in the HTML namespace, |
|||
# the MathML namespace, or the SVG namespace, then let |
|||
# tagname be current node's local name. Otherwise, let |
|||
# tagname be current node's qualified name. |
|||
if (in_array($n->namespaceURI ?? Parser::HTML_NAMESPACE, [Parser::HTML_NAMESPACE, Parser::SVG_NAMESPACE, Parser::MATHML_NAMESPACE])) { |
|||
$tagName = self::uncoerceName($n->localName); |
|||
} else { |
|||
$tagName = self::uncoerceName($n->tagName); |
|||
} |
|||
|
|||
if ($reformatWhitespace) { |
|||
$hasChildNodes = $n->hasChildNodes(); |
|||
$modify = false; |
|||
|
|||
// Start off by finding the first non-text node child in the document or fragment. |
|||
$firstNonTextNodeChild = null; |
|||
// If the parent node is null this means the element itself is being serialized. |
|||
// It is the first non-text node child. |
|||
if ($n->parentNode === null) { |
|||
$firstNonTextNodeChild = $n; |
|||
} |
|||
// Otherwise, if the node's parent node is a Document or a DocumentFragment then |
|||
// iterate through that parent node's children and get the first non-text node |
|||
// child. |
|||
elseif (($n->parentNode instanceof \DOMDocument || $n->parentNode instanceof \DOMDocumentFragment)) { |
|||
$t = $n->parentNode->firstChild; |
|||
do { |
|||
if (!$t instanceof \DOMText) { |
|||
$firstNonTextNodeChild = $t; |
|||
break; |
|||
} |
|||
} while ($t = $t->nextSibling); |
|||
} |
|||
|
|||
// If the node is an HTML element... |
|||
if ($htmlElement) { |
|||
// If the element is to be treated as block then we need to modify whitespace. |
|||
if (self::treatAsBlock($n->parentNode)) { |
|||
$modify = true; |
|||
} |
|||
} |
|||
// If the node is not an HTML element... |
|||
else { |
|||
// If the parent node is null then we need to modify whitespace. |
|||
if ($n->parentNode === null) { |
|||
$modify = true; |
|||
} |
|||
// If a foreign element with an html element parent |
|||
elseif (($n->parentNode->namespaceURI ?? Parser::HTML_NAMESPACE) === Parser::HTML_NAMESPACE) { |
|||
// If the foreign element should be treated as block then we need to modify |
|||
// whitespace |
|||
$modify = self::treatAsBlock($n->parentNode); |
|||
} |
|||
// Otherwise, walk up the DOM and find the root foreign ancestor. If that |
|||
// ancestor is to be treated as block then we need to modify whitespace. |
|||
else { |
|||
$modify = self::treatForeignRootAsBlock($n); |
|||
} |
|||
} |
|||
|
|||
// Only modify the whitespace here if the current node is not the first non-text |
|||
// node child. This is to prevent newlines from being printed when elements |
|||
// themsleves are serialized or if they're the first node in the tree when a |
|||
// Document or DocumentFragment. |
|||
if ($modify && $firstNonTextNodeChild !== $n) { |
|||
$previousNonTextNodeSiblingName = null; |
|||
$nn = $n; |
|||
while ($nn = $nn->previousSibling) { |
|||
if (!$nn instanceof \DOMText) { |
|||
$previousNonTextNodeSiblingName = $nn->nodeName; |
|||
break; |
|||
} |
|||
} |
|||
|
|||
// If the previous non text node sibling doesn't have the same name as the |
|||
// current node and neither are h1-h6 elements then add an additional newline. |
|||
if ($previousNonTextNodeSiblingName !== null && $previousNonTextNodeSiblingName !== $tagName && count(array_intersect([ $previousNonTextNodeSiblingName, $tagName ], self::H_ELEMENTS)) !== 2) { |
|||
$s .= "\n"; |
|||
} |
|||
|
|||
|
|||
$s .= "\n" . str_repeat($indentChar, $indentionLevel * $indentStep); |
|||
} |
|||
} |
|||
|
|||
# Append a U+003C LESS-THAN SIGN character (<), followed by tagname. |
|||
$s .= "<$tagName"; |
|||
# If current node's is value is not null, and the element does |
|||
# not have an is attribute in its attribute list, then |
|||
# append the string " is="", followed by current node's is |
|||
# value escaped as described below in attribute mode, |
|||
# followed by a U+0022 QUOTATION MARK character ("). |
|||
// DEVIATION: We don't support custom elements |
|||
# For each attribute that the element has, append a |
|||
# U+0020 SPACE character, the attribute's serialized name as |
|||
# described below, a U+003D EQUALS SIGN character (=), a |
|||
# U+0022 QUOTATION MARK character ("), the attribute's |
|||
# value, escaped as described below in attribute mode, and |
|||
# a second U+0022 QUOTATION MARK character ("). |
|||
foreach ($n->attributes as $a) { |
|||
# An attribute's serialized name for the purposes of the previous |
|||
# paragraph must be determined as follows: |
|||
|
|||
# If the attribute has no namespace |
|||
if ($a->namespaceURI === null) { |
|||
# The attribute's serialized name is the attribute's local name. |
|||
$name = self::uncoerceName($a->localName); |
|||
} |
|||
# If the attribute is in the XML namespace |
|||
elseif ($a->namespaceURI === Parser::XML_NAMESPACE) { |
|||
# The attribute's serialized name is the string "xml:" followed |
|||
# by the attribute's local name. |
|||
$name = "xml:".self::uncoerceName($a->localName); |
|||
} |
|||
# If the attribute is in the XMLNS namespace... |
|||
elseif ($a->namespaceURI === Parser::XMLNS_NAMESPACE) { |
|||
# ... and the attribute's local name is xmlns |
|||
if ($a->localName === "xmlns") { |
|||
# The attribute's serialized name is the string "xmlns". |
|||
$name = "xmlns"; |
|||
} |
|||
# ... and the attribute's local name is not xmlns |
|||
else { |
|||
# The attribute's serialized name is the string "xmlns:" |
|||
# followed by the attribute's local name. |
|||
$name = "xmlns:".self::uncoerceName($a->localName); |
|||
} |
|||
} |
|||
# If the attribute is in the XLink namespace |
|||
elseif ($a->namespaceURI === Parser::XLINK_NAMESPACE) { |
|||
# The attribute's serialized name is the string "xlink:" |
|||
# followed by the attribute's local name. |
|||
$name = "xlink:".self::uncoerceName($a->localName); |
|||
} |
|||
# If the attribute is in some other namespace |
|||
else { |
|||
# The attribute's serialized name is the attribute's qualified name. |
|||
$name = ($a->prefix !== "") ? $a->prefix.":".$a->name : $a->name; |
|||
} |
|||
// retrieve the attribute value |
|||
$value = self::escapeString((string) $a->value, true); |
|||
if ( |
|||
$boolAttr |
|||
|| !$htmlElement |
|||
|| !isset(self::BOOLEAN_ATTRIBUTES[$name]) |
|||
|| is_array(self::BOOLEAN_ATTRIBUTES[$name]) && !in_array($tagName, self::BOOLEAN_ATTRIBUTES[$name]) |
|||
|| (strlen($value) && strtolower($value) !== $name) |
|||
) { |
|||
// print the attribute value unless the stars align |
|||
$s .= " $name=\"$value\""; |
|||
} else { |
|||
// omit the value if the stars do align |
|||
$s .= " $name"; |
|||
} |
|||
} |
|||
|
|||
# Append a U+003E GREATER-THAN SIGN character (>). |
|||
// If we're minimizing void foreign elements, insert a slash first where appropriate |
|||
if (!$endTags && !$htmlElement && !$n->hasChildNodes()) { |
|||
$s .= "/>"; |
|||
} else { |
|||
$s .= ">"; |
|||
# If current node serializes as void, then continue on to the next child node at |
|||
# this point. |
|||
# Append the value of running the HTML fragment serialization algorithm on the |
|||
# current node element (thus recursing into this algorithm for that element), |
|||
# followed by a U+003C LESS-THAN SIGN character (<), a U+002F SOLIDUS character (/), |
|||
# tagname again, and finally a U+003E GREATER-THAN SIGN character (>). |
|||
if (($n->namespaceURI ?? Parser::HTML_NAMESPACE) !== Parser::HTML_NAMESPACE || !in_array($tagName, self::VOID_ELEMENTS)) { |
|||
# If the node is a template element, then let the node instead be the template |
|||
# element's template contents (a DocumentFragment node). |
|||
if ($htmlElement && $tagName === "template") { |
|||
// Disable pretty printing when serializing templates in preformatted content |
|||
$templateConfig = $config; |
|||
$isPreformattedContent = self::isPreformattedContent($n); |
|||
if ($reformatWhitespace && $isPreformattedContent) { |
|||
$templateConfig->reformatWhitespace = false; |
|||
} |
|||
|
|||
$nn = self::getTemplateContent($n); |
|||
$ss = ''; |
|||
|
|||
# For each child node of the node, in tree order, run the following steps: |
|||
foreach ($nn->childNodes as $nnn) { |
|||
$ss .= self::serialize($nnn, $config); |
|||
} |
|||
|
|||
if ($reformatWhitespace) { |
|||
if (!$isPreformattedContent && $indentionLevel > 0) { |
|||
// If the template's content is to be treated as block content then post-indent |
|||
// newlines at 1 + the current indention level in the serialized template |
|||
// contents. Then append a newline followed by another indention at the current |
|||
// indention level for the end tag. |
|||
if (self::treatAsBlock($n)) { |
|||
$ss = str_replace("\n", "\n" . str_repeat($indentChar, ($indentionLevel + 1) * $indentStep), $ss) . "\n" . str_repeat($indentChar, $indentionLevel * $indentStep); |
|||
} |
|||
} |
|||
} |
|||
|
|||
$s .= $ss; |
|||
} elseif ($n->hasChildNodes()) { |
|||
if ($reformatWhitespace) { |
|||
// If formatting output and the element's whitespace has already been modified |
|||
// increment the indention level |
|||
$indentionLevel++; |
|||
$prettyPrintStack[] = $n; |
|||
} |
|||
|
|||
// If the element has children, store its tag name and continue the loop with |
|||
// its first child; its end tag will be written out further down |
|||
$stack[] = $tagName; |
|||
$n = $n->firstChild; |
|||
continue; |
|||
} |
|||
|
|||
// Otherwise just append the end tag now |
|||
$s .= "</$tagName>"; |
|||
} |
|||
} |
|||
} |
|||
# If current node is a Text node |
|||
elseif ($n instanceof \DOMText) { |
|||
# If the parent of current node is a style, script, xmp, |
|||
# iframe, noembed, noframes, or plaintext element, or |
|||
# if the parent of current node is a noscript element |
|||
# and scripting is enabled for the node, then append |
|||
# the value of current node's data IDL attribute literally. |
|||
$p = $n->parentNode; |
|||
if ($p instanceof \DOMElement && ($p->namespaceURI ?? Parser::HTML_NAMESPACE) === Parser::HTML_NAMESPACE && in_array($p->tagName, self::RAWTEXT_ELEMENTS)) { |
|||
// NOTE: scripting is assumed not to be enabled |
|||
$s .= $n->data; |
|||
} |
|||
# Otherwise, append the value of current node's data IDL attribute, escaped as described below. |
|||
else { |
|||
$t = $n->data; |
|||
if ($reformatWhitespace && !self::isPreformattedContent($n)) { |
|||
// If the node's parent node is to be treated as block or if it is not an HTML |
|||
// element and its root foreign element is to be treated as block... |
|||
if (self::treatAsBlock($n->parentNode) || (($n->namespaceURI ?? Parser::HTML_NAMESPACE) !== Parser::HTML_NAMESPACE && self::treatForeignRootAsBlock($n))) { |
|||
// If the text node's data is made up of only whitespace characters continue |
|||
// onto the next node |
|||
if (strspn($t, Data::WHITESPACE) === strlen($t)) { |
|||
// FIXME: this is temporary |
|||
goto next; |
|||
} |
|||
} |
|||
|
|||
// Condense spaces and tabs into a single space. |
|||
$t = preg_replace('/ +/', ' ', str_replace("\t", ' ', $t)); |
|||
} |
|||
|
|||
$s .= self::escapeString($t); |
|||
} |
|||
} |
|||
# If current node is a Comment |
|||
elseif ($n instanceof \DOMComment) { |
|||
if ($reformatWhitespace && !self::isPreformattedContent($n)) { |
|||
$modify = false; |
|||
if (($n->parentNode->namespaceURI ?? Parser::HTML_NAMESPACE) !== Parser::HTML_NAMESPACE) { |
|||
if (self::treatAsBlock($n->parentNode)) { |
|||
$modify = true; |
|||
} |
|||
} else { |
|||
if ($n->parentNode->parentNode !== null && ($n->parentNode->parentNode->namespaceURI ?? Parser::HTML_NAMESPACE) === Parser::HTML_NAMESPACE) { |
|||
if (self::treatAsBlock($n->parentNode)) { |
|||
$modify = true; |
|||
} |
|||
} elseif (self::treatForeignRootAsBlock($n)) { |
|||
$modify = true; |
|||
} |
|||
} |
|||
|
|||
if ($modify) { |
|||
$previousNonTextNodeSiblingName = null; |
|||
$nn = $n; |
|||
while ($nn = $nn->previousSibling) { |
|||
if (!$nn instanceof \DOMText) { |
|||
$previousNonTextNodeSiblingName = $nn->nodeName; |
|||
break; |
|||
} |
|||
} |
|||
|
|||
// Add an additional newline if the previous sibling wasn't a comment. |
|||
if ($previousNonTextNodeSiblingName !== null && $previousNonTextNodeSiblingName !== $n->nodeName) { |
|||
$s .= "\n"; |
|||
} |
|||
|
|||
$s .= "\n" . str_repeat($indentChar, $indentionLevel * $indentStep); |
|||
} |
|||
} |
|||
|
|||
# Append the literal string "<!--" (U+003C LESS-THAN SIGN, |
|||
# U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS, |
|||
# U+002D HYPHEN-MINUS), followed by the value of current |
|||
# node's data IDL attribute, followed by the literal |
|||
# string "-->" (U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS, |
|||
# U+003E GREATER-THAN SIGN). |
|||
$s .= "<!--".$n->data."-->"; |
|||
} |
|||
# If current node is a ProcessingInstruction |
|||
elseif ($n instanceof \DOMProcessingInstruction) { |
|||
if ($reformatWhitespace && !self::isPreformattedContent($n)) { |
|||
$modify = false; |
|||
if (($n->parentNode->namespaceURI ?? Parser::HTML_NAMESPACE) !== Parser::HTML_NAMESPACE) { |
|||
if (self::treatAsBlock($n->parentNode)) { |
|||
$modify = true; |
|||
} |
|||
} else { |
|||
if ($n->parentNode->parentNode !== null && ($n->parentNode->parentNode->namespaceURI ?? Parser::HTML_NAMESPACE) === Parser::HTML_NAMESPACE) { |
|||
if (self::treatAsBlock($n->parentNode)) { |
|||
$modify = true; |
|||
} |
|||
} elseif (self::treatForeignRootAsBlock($n)) { |
|||
$modify = true; |
|||
} |
|||
} |
|||
|
|||
if ($modify) { |
|||
$previousNonTextNodeSiblingName = null; |
|||
$nn = $n; |
|||
while ($nn = $nn->previousSibling) { |
|||
if (!$nn instanceof \DOMText) { |
|||
$previousNonTextNodeSiblingName = $nn->nodeName; |
|||
break; |
|||
} |
|||
} |
|||
|
|||
// Add an additional newline if the previous sibling wasn't a comment. |
|||
if ($previousNonTextNodeSiblingName !== null && $previousNonTextNodeSiblingName !== $n->nodeName) { |
|||
$s .= "\n"; |
|||
} |
|||
|
|||
$s .= "\n" . str_repeat($indentChar, $indentionLevel * $indentStep); |
|||
} |
|||
} |
|||
|
|||
# Append the literal string "<?" (U+003C LESS-THAN SIGN,
|
|||
# U+003F QUESTION MARK), followed by the value of |
|||
# current node's target IDL attribute, followed by a |
|||
# single U+0020 SPACE character, followed by the value |
|||
# of current node's data IDL attribute, followed by a |
|||
# single U+003E GREATER-THAN SIGN character (>). |
|||
$s .= "<?".self::uncoerceName($n->target)." ".$n->data.">";
|
|||
} |
|||
# If current node is a DocumentType |
|||
elseif ($n instanceof \DOMDocumentType) { |
|||
# Append the literal string "<!DOCTYPE" (U+003C LESS-THAN SIGN, |
|||
# U+0021 EXCLAMATION MARK, U+0044 LATIN CAPITAL LETTER D, |
|||
# U+004F LATIN CAPITAL LETTER O, U+0043 LATIN CAPITAL LETTER C, |
|||
# U+0054 LATIN CAPITAL LETTER T, U+0059 LATIN CAPITAL LETTER Y, |
|||
# U+0050 LATIN CAPITAL LETTER P, U+0045 LATIN CAPITAL LETTER E), |
|||
# followed by a space (U+0020 SPACE), followed by the value |
|||
# of current node's name IDL attribute, followed by the |
|||
# literal string ">" (U+003E GREATER-THAN SIGN). |
|||
$s .= "<!DOCTYPE ".trim($n->name).">"; |
|||
} |
|||
// NOTE: Documents and document fragments have no outer content, |
|||
// so we can just serialize the inner content |
|||
elseif ($n instanceof \DOMDocument || $n instanceof \DOMDocumentFragment) { |
|||
return self::serializeInner($n, $config); |
|||
} else { |
|||
throw new Exception(Exception::UNSUPPORTED_NODE_TYPE, [get_class($n)]); |
|||
} |
|||
|
|||
next: |
|||
// If the current node has no more siblings, go up the tree till a |
|||
// sibling is found or we've reached the original node |
|||
while (!$n->nextSibling && $stack) { |
|||
// Write out the stored end tag each time we go up the tree |
|||
$tagName = array_pop($stack); |
|||
|
|||
if ($reformatWhitespace) { |
|||
$indentionLevel--; |
|||
$tag = array_pop($prettyPrintStack); |
|||
$modify = false; |
|||
|
|||
// If the element popped off the stack isn't a preformatted element... |
|||
if (!self::isPreformattedContent($n)) { |
|||
// If it is in the HTML namespace and is to be treated as block then we need to |
|||
// modify whitespace. |
|||
if (($tag->namespaceURI ?? Parser::HTML_NAMESPACE) === Parser::HTML_NAMESPACE) { |
|||
if (self::treatAsBlock($tag)) { |
|||
$modify = true; |
|||
} |
|||
} else { |
|||
$firstElementChild = null; |
|||
if (property_exists($tag, 'firstElementChild')) { |
|||
$firstElementChild = $tag->firstElementChild; |
|||
} else { |
|||
$t = $tag->firstChild; |
|||
do { |
|||
if ($t instanceof \DOMElement) { |
|||
$firstElementChild = $t; |
|||
break; |
|||
} |
|||
} while ($t = $t->nextSibling); |
|||
} |
|||
|
|||
// Otherwise, if foreign and has a child element... |
|||
if ($firstElementChild !== null) { |
|||
// If the element popped off the stack has an HTML element parent and its parent |
|||
// is to be treated as block then we need to modify whitespace. |
|||
if ($tag->parentNode !== null && ($tag->parentNode->namespaceURI ?? Parser::HTML_NAMESPACE) === Parser::HTML_NAMESPACE) { |
|||
if (self::treatAsBlock($tag->parentNode)) { |
|||
$modify = true; |
|||
} |
|||
// Otherwise, if the element's foreign root is to be treated as block we need to |
|||
// modify whitespace, too. |
|||
} elseif ($tag->parentNode === null || self::treatForeignRootAsBlock($tag)) { |
|||
$modify = true; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
if ($modify) { |
|||
$s .= "\n" . str_repeat($indentChar, $indentionLevel * $indentStep); |
|||
} |
|||
} |
|||
|
|||
$s .= "</$tagName>"; |
|||
$n = $n->parentNode; |
|||
} |
|||
$n = $n->nextSibling; |
|||
} while ($stack); // Loop until we have traversed the subtree of the target node in full |
|||
return $s; |
|||
} |
|||
|
|||
/** Serializes the children of an HTML DOM node to a string. This is equivalent to the innerHTML getter |
|||
* |
|||
* @param \DOMDocument|\DOMElement|\DOMDocumentFragment $node The node to serialize |
|||
* @param \MensBeam\HTML\Parser\Config|null $config The configuration parameters to use, if any |
|||
*/ |
|||
public static function serializeInner(\DOMNode $node, ?Config $config = null): string { |
|||
$reformatWhitespace = $config->reformatWhitespace ?? false; |
|||
|
|||
# Let s be a string, and initialize it to the empty string. |
|||
$s = ""; |
|||
|
|||
if ($node instanceof \DOMElement && ($node->namespaceURI ?? Parser::HTML_NAMESPACE) === Parser::HTML_NAMESPACE) { |
|||
# If the node serializes as void, then return the empty string. |
|||
if (in_array($node->tagName, self::VOID_ELEMENTS)) { |
|||
return ""; |
|||
} |
|||
# If the node is a template element, then let the node instead be the template |
|||
# element's template contents (a DocumentFragment node). |
|||
elseif ($node->tagName === "template") { |
|||
$n = self::getTemplateContent($n); |
|||
|
|||
# For each child node of the node, in tree order, run the following steps: |
|||
// NOTE: the steps in question are implemented in the "serialize" routine |
|||
foreach ($n->childNodes as $nn) { |
|||
$s .= self::serialize($nn, $config); |
|||
} |
|||
|
|||
return $s; |
|||
} |
|||
} |
|||
if ($node instanceof \DOMElement || $node instanceof \DOMDocument || $node instanceof \DOMDocumentFragment) { |
|||
# For each child node of the node, in tree order, run the following steps: |
|||
// NOTE: the steps in question are implemented in the "serialize" routine |
|||
foreach ($node->childNodes as $n) { |
|||
$s .= self::serialize($n, $config); |
|||
} |
|||
} else { |
|||
throw new Exception(Exception::UNSUPPORTED_NODE_TYPE, [get_class($node)]); |
|||
} |
|||
|
|||
return $s; |
|||
} |
|||
|
|||
|
|||
protected static function getTemplateContent(\DOMElement $node, ?Config $config = null): \DOMNode { |
|||
// NOTE: PHP's DOM does not support the content property on template elements |
|||
// natively. This method exists purely so implementors of userland PHP DOM |
|||
// solutions may extend this method to get template contents how they need them. |
|||
return $node; |
|||
} |
|||
|
|||
protected static function isPreformattedContent(\DOMNode $node): bool { |
|||
// NOTE: This method is used only when pretty printing. Implementors of userland |
|||
// PHP DOM solutions with template contents will need to extend this method to |
|||
// be able to moonwalk through document fragment hosts. |
|||
|
|||
$n = $node; |
|||
do { |
|||
if ($n instanceof \DOMElement && ($n->namespaceURI ?? Parser::HTML_NAMESPACE) === Parser::HTML_NAMESPACE && in_array($n->tagName, self::PREFORMATTED_ELEMENTS)) { |
|||
return true; |
|||
} |
|||
} while ($n = $n->parentNode); |
|||
|
|||
return false; |
|||
} |
|||
|
|||
protected static function treatAsBlock(\DOMNode $node): bool { |
|||
// NOTE: This method is used only when pretty printing. Implementors of userland |
|||
// PHP DOM solutions with template contents will need to extend this method to |
|||
// check for any templates and look within their content fragments for "block" |
|||
// content. |
|||
if ($node instanceof \DOMDocument || $node instanceof \DOMDocumentFragment) { |
|||
return true; |
|||
} |
|||
|
|||
$xpath = new \DOMXPath($node->ownerDocument); |
|||
return ($xpath->evaluate(self::BLOCK_QUERY, $node) > 0); |
|||
} |
|||
|
|||
protected static function treatForeignRootAsBlock(\DOMNode $node): bool { |
|||
// NOTE: This method is used only when pretty printing. Implementors of userland |
|||
// PHP DOM solutions with template contents will need to extend this method to |
|||
// be able to moonwalk through document fragment hosts. |
|||
$n = $node; |
|||
while ($n = $n->parentNode) { |
|||
if ($n instanceof \DOMDocument || $n instanceof \DOMDocumentFragment || ($n instanceof \DOMElement && $n->parentNode === null)) { |
|||
return true; |
|||
} elseif (($n->parentNode->namespaceURI ?? Parser::HTML_NAMESPACE) === Parser::HTML_NAMESPACE) { |
|||
if (self::treatAsBlock($n->parentNode)) { |
|||
return true; |
|||
} |
|||
break; |
|||
} |
|||
} |
|||
|
|||
return false; |
|||
} |
|||
} |
File diff suppressed because it is too large
Loading…
Reference in new issue