diff --git a/lib/DOM/AbstractDocument.php b/lib/DOM/AbstractDocument.php index da474d1..f5fdadd 100644 --- a/lib/DOM/AbstractDocument.php +++ b/lib/DOM/AbstractDocument.php @@ -8,5 +8,5 @@ namespace MensBeam\HTML; // Exists so Document can extend methods from its traits. abstract class AbstractDocument extends \DOMDocument { - use ContainerNode, EscapeString, Walk; + use ContainerNode, EscapeString, MoonwalkShallow, Walk, WalkShallow; } diff --git a/lib/DOM/AbstractElement.php b/lib/DOM/AbstractElement.php index 8c46711..43f1e9a 100644 --- a/lib/DOM/AbstractElement.php +++ b/lib/DOM/AbstractElement.php @@ -8,5 +8,5 @@ namespace MensBeam\HTML; // Exists so Element can extend methods from its traits. abstract class AbstractElement extends \DOMElement { - use ContainerNode, EscapeString, Moonwalk, ToString, Walk; + use ContainerNode, EscapeString, Moonwalk, MoonwalkShallow, ToString, Walk, WalkShallow; } diff --git a/lib/DOM/Document.php b/lib/DOM/Document.php index ce87290..367db45 100644 --- a/lib/DOM/Document.php +++ b/lib/DOM/Document.php @@ -19,13 +19,13 @@ class Document extends AbstractDocument { protected $_body = null; // List of elements that are treated as block elements for the purposes of output formatting - protected static $blockElements = [ 'address', 'article', 'aside', 'blockquote', 'body', 'details', 'dialog', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr', 'html', 'li', 'main', 'nav', 'ol', 'p', 'pre', 'section', 'script', 'source', 'style', 'table', 'template', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul' ]; + protected const BLOCK_ELEMENTS = [ 'address', 'article', 'aside', 'blockquote', 'base', 'body', 'details', 'dialog', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'isindex', 'li', 'link', 'main', 'meta', 'nav', 'ol', 'p', 'pre', 'section', 'script', 'source', 'style', 'table', 'template', 'td', 'tfoot', 'th', 'thead', 'title', 'tr', 'ul' ]; // List of preformatted elements where content is ignored when output formatting - protected static $preformattedElements = [ 'iframe', 'listing', 'noembed', 'noframes', 'plaintext', 'pre', 'textarea', 'title', 'xmp' ]; + protected const PREFORMATTED_ELEMENTS = [ 'iframe', 'listing', 'noembed', 'noframes', 'plaintext', 'pre', 'textarea', 'title', 'xmp' ]; // List of elements where content is ignored except to indent - protected static $scriptElements = [ 'script', 'style' ]; + protected const SCRIPT_ELEMENTS = [ 'script', 'style' ]; // List of elements which are self-closing; used when serializing - protected static $voidElements = [ 'area', 'base', 'basefont', 'bgsound', 'br', 'col', 'embed', 'frame', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' ]; + protected const VOID_ELEMENTS = [ 'area', 'base', 'basefont', 'bgsound', 'br', 'col', 'embed', 'frame', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' ]; public function __construct() { @@ -282,23 +282,13 @@ class Document extends AbstractDocument { protected function serializeFragment(\DOMNode $node, bool $formatOutput = false): string { if ($formatOutput) { - static $foreignAncestorWithBlockElementSiblings = false; - static $foreignElement = null; static $indent = 0; - static $inlineWithBlockElementDescendants = false; - static $inlineWithBlockElementDescendantsNode = null; - static $inlineWithBlockElementSiblings = false; - static $inlineWithBlockElementSiblingsParent = null; - static $preformattedContent = false; - static $preformattedElement = null; - static $scriptContent = false; - static $scriptElement = null; } # 13.3. Serializing HTML fragments # # 1. If the node serializes as void, then return the empty string. - if (in_array($node->nodeName, self::$voidElements)) { + if (in_array($node->nodeName, self::VOID_ELEMENTS)) { return ''; } @@ -316,9 +306,7 @@ class Document extends AbstractDocument { ## 1. Let current node be the child node being processed. foreach ($node->childNodes as $currentNode) { if ($this->formatOutput) { - $blockElement = false; - $foreign = ($currentNode->namespaceURI !== null); - $modify = true; + $modify = false; } # 2. Append the appropriate string from the following list to s: @@ -336,93 +324,21 @@ class Document extends AbstractDocument { } if ($formatOutput) { - if ($foreign && $foreignElement === null) { - $foreignElement = $currentNode; - } - - if (!$preformattedContent) { - if (in_array($tagName, self::$preformattedElements)) { - $preformattedContent = true; - $preformattedElement = $currentNode; - // The element itself should be indented, but the content itself will be left - // alone when it is serialized. - $modify = true; - } elseif ($scriptContent) { - $modify = true; - } elseif (in_array($tagName, self::$scriptElements)) { - $scriptContent = true; - $scriptElement = $currentNode; - $modify = true; + if (in_array($tagName, self::BLOCK_ELEMENTS) && $currentNode->parentNode !== null && $currentNode->parentNode->walkShallow(function($n) use ($currentNode) { + if ($n->isSameNode($currentNode)) { + return false; } - if (!$foreignElement && !$blockElement && in_array($tagName, self::$blockElements)) { - $blockElement = true; - $modify = true; - } - - if (!$blockElement) { - if (!$inlineWithBlockElementSiblings) { - if ($currentNode->hasSiblingElementWithName(...self::$blockElements)) { - $inlineWithBlockElementSiblings = true; - $inlineWithBlockElementSiblingsParent = $currentNode->parentNode; - $modify = true; - } - } else { - if ($inlineWithBlockElementSiblingsParent !== null && $currentNode->parentNode->isSameNode($inlineWithBlockElementSiblingsParent)) { - $modify = true; - } elseif ($currentNode->hasSiblingElementWithName(...self::$blockElements)) { - $inlineWithBlockElementSiblings = true; - $inlineWithBlockElementSiblingsParent = $currentNode->parentNode; - $modify = true; - } else { - $inlineWithBlockElementSiblings = false; - $inlineWithBlockElementSiblingsParent = null; - } - - if (!$inlineWithBlockElementDescendants && $currentNode->hasDescendantWithName(...self::$blockElements)) { - $inlineWithBlockElementDescendants = true; - $inlineWithBlockElementDescendantsNode = $currentNode; - $modify = true; - } - - if ($foreignAncestorWithBlockElementSiblings) { - $modify = true; - } elseif ($foreign && $currentNode->isSameNode($foreignElement)) { - if ($inlineWithBlockElementSiblings) { - $foreignAncestorWithBlockElementSiblings = true; - $modify = true; - } elseif (in_array($currentNode->parentNode->nodeName, static::$blockElements)) { - $firstNonWhitespaceNode = null; - foreach ($currentNode->parentNode->childNodes as $child) { - if (!$child instanceof Text || strspn($child->data, Data::WHITESPACE) !== strlen($child->data)) { - $firstNonWhitespaceNode = $child; - break; - } - } - - $lastNonWhitespaceNode = null; - for ($i = $currentNode->parentNode->childNodes->length - 1; $i >= 0; $i--) { - $child = $currentNode->parentNode->childNodes[$i]; - if (!$child instanceof Text || strspn($child->data, Data::WHITESPACE) !== strlen($child->data)) { - $lastNonWhitespaceNode = $child; - } - } - - if ($currentNode->isSameNode($firstNonWhitespaceNode) && $currentNode->isSameNode->lastNonWhitespaceNode) { - $foreignAncestorWithBlockElementSiblings = true; - $modify = true; - } - } - } - } + if ($n instanceof Element && !in_array($n->nodeName, self::BLOCK_ELEMENTS)) { + return true; } - } - - if ($modify) { + })->current() === null) { $s .= "\n" . str_repeat(' ', $indent); + $modify = true; } } + # Append a U+003C LESS-THAN SIGN character (<), followed by tagname. $s .= "<$tagName"; @@ -505,34 +421,47 @@ class Document extends AbstractDocument { # If current node serializes as void, then continue on to the next child node at # this point. - if (in_array($currentNode->nodeName, self::$voidElements)) { + if (in_array($currentNode->nodeName, self::VOID_ELEMENTS)) { continue; } + // If formatting output and the element has already been modified increment the + // indention level + if ($formatOutput && $modify) { + $indent++; + } + # Append the value of running the HTML fragment serialization algorithm on the # current node element (thus recursing into this algorithm for that element), # followed by a U+003C LESS-THAN SIGN character (<), a U+002F SOLIDUS character (/), # tagname again, and finally a U+003E GREATER-THAN SIGN character (>). $s .= $this->serializeFragment($currentNode, $formatOutput); + + if ($formatOutput && $modify) { + // Decrement the indention level. + $indent--; + + // If the current node has any block element children append a newline followed + // by a number of spaces equal to the indention level. + if ($currentNode->walkShallow(function($n) use($currentNode) { + if ($n->isSameNode($currentNode)) { + return false; + } + + if ($n instanceof Element && in_array($n->nodeName, self::BLOCK_ELEMENTS)) { + return true; + } + })->current() !== null) { + $s .= "\n" . str_repeat(' ', $indent); + } + } + $s .= ""; } # If current node is a Text node elseif ($currentNode instanceof Text) { $text = $currentNode->data; - if ($formatOutput && $preformattedElement !== null && $scriptElement !== null) { - if ($foreignElement !== null || (in_array($currentNode->parentNode->nodeName, self::$blockElements) && $currentNode->hasSiblingElementWithName(self::$blockElements) && strspn($text, Data::WHITESPACE) !== strlen($text))) { - continue; - } - - $normalized = preg_replace([ '/[\n\r]/', '/(){2,}/' ], [ '', '$1' ], str_replace("\t", ' ', $text)); - if ($text === '') { - continue; - } - - $text = ($normalized !== $text) ? $normalized : $text; - } - # If the parent of current node is a style, script, xmp, iframe, noembed, # noframes, or plaintext element, or if the parent of current node is a noscript # element and scripting is enabled for the node, then append the value of @@ -544,6 +473,46 @@ class Document extends AbstractDocument { # Otherwise, append the value of current node’s data IDL attribute, escaped as # described below. else { + // If formatting the output and the text node has neither a preformatted element + // ancestor nor a script element ancestor (both for the purposes of formatting + // serialized output) + if ($formatOutput && $currentNode->moonWalk(function($n) { + if (in_array($n->nodeName, self::PREFORMATTED_ELEMENTS) || in_array($n->nodeName, self::SCRIPT_ELEMENTS)) { + return true; + } + })->current() === null) { + // If the text node has a foreign element ancestor or the text node's parent is + // a block element (for the purposes of formatting serialized output), the text + // node has only block element siblings, and the text node's data itself is + // entirely made up of whitespace then move onto the next node. + if ($currentNode->moonWalk(function($n) { + if ($n->namespaceURI !== null) { + return true; + } + })->current() !== null || ($currentNode->parentNode !== null && in_array($currentNode->parentNode->nodeName, self::BLOCK_ELEMENTS) && $currentNode->parentNode->walkShallow(function($n) use($currentNode) { + if ($n->isSameNode($currentNode)) { + return false; + } + + if ($n instanceof Element && !in_array($n->nodeName, self::BLOCK_ELEMENTS)) { + return true; + } + })->current() === null && strspn($text, Data::WHITESPACE) === strlen($text))) { + continue; + } + + // Otherwise, if the text node's data normalizes into an empty string move onto + // the next node. + // Normalization here means that newlines are removed and simple spaces and tabs + // are condensed a single space. + $normalized = preg_replace([ '/[\n\x0C\x0D]+/', '/[ \t]+/' ], [ '', ' ' ], $text); + if ($text === '') { + continue; + } + + $text = $normalized; + } + $s .= $this->escapeString($text); } } diff --git a/lib/DOM/DocumentFragment.php b/lib/DOM/DocumentFragment.php index 2822fa8..7c3d5df 100644 --- a/lib/DOM/DocumentFragment.php +++ b/lib/DOM/DocumentFragment.php @@ -7,5 +7,5 @@ declare(strict_types=1); namespace MensBeam\HTML; class DocumentFragment extends \DOMDocumentFragment { - use ContainerNode, Moonwalk, ToString, Walk; + use ContainerNode, MoonwalkShallow, ToString, Walk, WalkShallow; } diff --git a/lib/DOM/Element.php b/lib/DOM/Element.php index b0e8d1a..59f5251 100644 --- a/lib/DOM/Element.php +++ b/lib/DOM/Element.php @@ -36,20 +36,6 @@ class Element extends AbstractElement { return $value; } - /** Nonstandard */ - public function isAncestorOf(\DOMNode $node): bool { - # An object A is called an ancestor of an object B if and only if B is a - # descendant of A. - // object A is $this, object B is $node - $tree = $this->walk(function($n) use($node) { - if ($n->isSameNode($node)) { - return true; - } - }); - - return ($tree->current() !== null); - } - public function hasAttribute($name) { if (!parent::hasAttribute($name)) { foreach ($this->attributes as $a) { @@ -62,82 +48,6 @@ class Element extends AbstractElement { return true; } - /** Nonstandard */ - public function hasDescendant(...$nodes): bool { - if ($this->childNodes->length === 0) { - return false; - } - - $tree = $this->walk(function($descendant) use($nodes) { - foreach ($nodes as $n) { - if ($n->isSameNode($descendant)) { - return true; - } - } - }); - - return ($tree->current() !== null); - } - - /** Nonstandard */ - public function hasDescendantElementWithName(...$nodeNames): bool { - if ($this->childNodes->length === 0) { - return false; - } - - $tree = $this->walk(function($descendant) use($nodeNames) { - foreach ($nodeNames as $n) { - if ($n instanceof Element && $n->nodeName === $descendant->nodeName) { - return true; - } - } - }); - - return ($tree->current() !== null); - } - - /** Nonstandard */ - public function hasSibling(\DOMNode ...$nodes): bool { - if ($this->parentNode === null) { - return false; - } - - foreach ($this->parentNode->childNodes as $child) { - if ($child->isSameNode($this)) { - continue; - } - - foreach ($nodes as $n) { - if ($n->isSameNode($child)) { - return true; - } - } - } - - return false; - } - - /** Nonstandard */ - public function hasSiblingElementWithName(string ...$nodeNames): bool { - if ($this->parentNode === null) { - return false; - } - - foreach ($this->parentNode->childNodes as $child) { - if ($child->isSameNode($this)) { - continue; - } - - foreach ($nodeNames as $n) { - if ($n instanceof Element && $n->nodeName === $child->nodeName) { - return true; - } - } - } - - return false; - } - public function setAttribute($name, $value) { $this->setAttributeNS(null, $name, $value); } diff --git a/lib/DOM/Text.php b/lib/DOM/Text.php index f02789b..3736a8e 100644 --- a/lib/DOM/Text.php +++ b/lib/DOM/Text.php @@ -8,47 +8,4 @@ namespace MensBeam\HTML; class Text extends \DOMText { use LeafNode, Moonwalk, ToString; - - - /** Nonstandard */ - public function hasSibling(\DOMNode ...$nodes): bool { - if ($this->parentNode === null) { - return false; - } - - foreach ($this->parentNode->childNodes as $child) { - if ($child->isSameNode($this)) { - continue; - } - - foreach ($nodes as $n) { - if ($n->isSameNode($child)) { - return true; - } - } - } - - return false; - } - - /** Nonstandard */ - public function hasSiblingElementWithName(string ...$nodeNames): bool { - if ($this->parentNode === null) { - return false; - } - - foreach ($this->parentNode->childNodes as $child) { - if ($child->isSameNode($this)) { - continue; - } - - foreach ($nodeNames as $n) { - if ($n instanceof Element && $n->nodeName === $child->nodeName) { - return true; - } - } - } - - return false; - } } diff --git a/lib/DOM/traits/Moonwalk.php b/lib/DOM/traits/Moonwalk.php index 8771000..823fdfd 100644 --- a/lib/DOM/traits/Moonwalk.php +++ b/lib/DOM/traits/Moonwalk.php @@ -7,6 +7,7 @@ declare(strict_types=1); namespace MensBeam\HTML; trait Moonwalk { + /** Generator which walks up the DOM. Nonstandard. */ public function moonwalk(?\Closure $filter = null): \Generator { return $this->moonwalkGenerator($this, $filter); } diff --git a/lib/DOM/traits/MoonwalkShallow.php b/lib/DOM/traits/MoonwalkShallow.php new file mode 100644 index 0000000..8be4258 --- /dev/null +++ b/lib/DOM/traits/MoonwalkShallow.php @@ -0,0 +1,27 @@ +hasChildNodes()) { + $childNodesLength = $this->childNodes->length; + for ($childNodesLength = $this->childNodes->length, $i = $childNodesLength - 1; $i >= 0; $i--) { + $child = $this->childNodes[$i]; + if ($filter === null || $filter($child)) { + yield $child; + } + } + } + } +} diff --git a/lib/DOM/traits/Walk.php b/lib/DOM/traits/Walk.php index 8da3ef9..a67ea1c 100644 --- a/lib/DOM/traits/Walk.php +++ b/lib/DOM/traits/Walk.php @@ -7,6 +7,7 @@ declare(strict_types=1); namespace MensBeam\HTML; trait Walk { + /** Generator which walks down the DOM. Nonstandard. */ public function walk(?\Closure $filter = null): \Generator { return $this->walkGenerator($this, $filter); } diff --git a/lib/DOM/traits/WalkShallow.php b/lib/DOM/traits/WalkShallow.php new file mode 100644 index 0000000..fbb16a2 --- /dev/null +++ b/lib/DOM/traits/WalkShallow.php @@ -0,0 +1,22 @@ +childNodes as $child) { + if ($filter === null || $filter($child)) { + yield $child; + } + } + } +}