Browse Source

More pretty printing work, cleanup of DOM methods

ns
Dustin Wilson 3 years ago
parent
commit
82eb4828ed
  1. 2
      lib/DOM/AbstractDocument.php
  2. 2
      lib/DOM/AbstractElement.php
  3. 193
      lib/DOM/Document.php
  4. 2
      lib/DOM/DocumentFragment.php
  5. 90
      lib/DOM/Element.php
  6. 43
      lib/DOM/Text.php
  7. 1
      lib/DOM/traits/Moonwalk.php
  8. 27
      lib/DOM/traits/MoonwalkShallow.php
  9. 1
      lib/DOM/traits/Walk.php
  10. 22
      lib/DOM/traits/WalkShallow.php

2
lib/DOM/AbstractDocument.php

@ -8,5 +8,5 @@ namespace MensBeam\HTML;
// Exists so Document can extend methods from its traits.
abstract class AbstractDocument extends \DOMDocument {
use ContainerNode, EscapeString, Walk;
use ContainerNode, EscapeString, MoonwalkShallow, Walk, WalkShallow;
}

2
lib/DOM/AbstractElement.php

@ -8,5 +8,5 @@ namespace MensBeam\HTML;
// Exists so Element can extend methods from its traits.
abstract class AbstractElement extends \DOMElement {
use ContainerNode, EscapeString, Moonwalk, ToString, Walk;
use ContainerNode, EscapeString, Moonwalk, MoonwalkShallow, ToString, Walk, WalkShallow;
}

193
lib/DOM/Document.php

@ -19,13 +19,13 @@ class Document extends AbstractDocument {
protected $_body = null;
// List of elements that are treated as block elements for the purposes of output formatting
protected static $blockElements = [ 'address', 'article', 'aside', 'blockquote', 'body', 'details', 'dialog', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr', 'html', 'li', 'main', 'nav', 'ol', 'p', 'pre', 'section', 'script', 'source', 'style', 'table', 'template', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul' ];
protected const BLOCK_ELEMENTS = [ 'address', 'article', 'aside', 'blockquote', 'base', 'body', 'details', 'dialog', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'isindex', 'li', 'link', 'main', 'meta', 'nav', 'ol', 'p', 'pre', 'section', 'script', 'source', 'style', 'table', 'template', 'td', 'tfoot', 'th', 'thead', 'title', 'tr', 'ul' ];
// List of preformatted elements where content is ignored when output formatting
protected static $preformattedElements = [ 'iframe', 'listing', 'noembed', 'noframes', 'plaintext', 'pre', 'textarea', 'title', 'xmp' ];
protected const PREFORMATTED_ELEMENTS = [ 'iframe', 'listing', 'noembed', 'noframes', 'plaintext', 'pre', 'textarea', 'title', 'xmp' ];
// List of elements where content is ignored except to indent
protected static $scriptElements = [ 'script', 'style' ];
protected const SCRIPT_ELEMENTS = [ 'script', 'style' ];
// List of elements which are self-closing; used when serializing
protected static $voidElements = [ 'area', 'base', 'basefont', 'bgsound', 'br', 'col', 'embed', 'frame', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' ];
protected const VOID_ELEMENTS = [ 'area', 'base', 'basefont', 'bgsound', 'br', 'col', 'embed', 'frame', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' ];
public function __construct() {
@ -282,23 +282,13 @@ class Document extends AbstractDocument {
protected function serializeFragment(\DOMNode $node, bool $formatOutput = false): string {
if ($formatOutput) {
static $foreignAncestorWithBlockElementSiblings = false;
static $foreignElement = null;
static $indent = 0;
static $inlineWithBlockElementDescendants = false;
static $inlineWithBlockElementDescendantsNode = null;
static $inlineWithBlockElementSiblings = false;
static $inlineWithBlockElementSiblingsParent = null;
static $preformattedContent = false;
static $preformattedElement = null;
static $scriptContent = false;
static $scriptElement = null;
}
# 13.3. Serializing HTML fragments
#
# 1. If the node serializes as void, then return the empty string.
if (in_array($node->nodeName, self::$voidElements)) {
if (in_array($node->nodeName, self::VOID_ELEMENTS)) {
return '';
}
@ -316,9 +306,7 @@ class Document extends AbstractDocument {
## 1. Let current node be the child node being processed.
foreach ($node->childNodes as $currentNode) {
if ($this->formatOutput) {
$blockElement = false;
$foreign = ($currentNode->namespaceURI !== null);
$modify = true;
$modify = false;
}
# 2. Append the appropriate string from the following list to s:
@ -336,93 +324,21 @@ class Document extends AbstractDocument {
}
if ($formatOutput) {
if ($foreign && $foreignElement === null) {
$foreignElement = $currentNode;
}
if (!$preformattedContent) {
if (in_array($tagName, self::$preformattedElements)) {
$preformattedContent = true;
$preformattedElement = $currentNode;
// The element itself should be indented, but the content itself will be left
// alone when it is serialized.
$modify = true;
} elseif ($scriptContent) {
$modify = true;
} elseif (in_array($tagName, self::$scriptElements)) {
$scriptContent = true;
$scriptElement = $currentNode;
$modify = true;
if (in_array($tagName, self::BLOCK_ELEMENTS) && $currentNode->parentNode !== null && $currentNode->parentNode->walkShallow(function($n) use ($currentNode) {
if ($n->isSameNode($currentNode)) {
return false;
}
if (!$foreignElement && !$blockElement && in_array($tagName, self::$blockElements)) {
$blockElement = true;
$modify = true;
}
if (!$blockElement) {
if (!$inlineWithBlockElementSiblings) {
if ($currentNode->hasSiblingElementWithName(...self::$blockElements)) {
$inlineWithBlockElementSiblings = true;
$inlineWithBlockElementSiblingsParent = $currentNode->parentNode;
$modify = true;
}
} else {
if ($inlineWithBlockElementSiblingsParent !== null && $currentNode->parentNode->isSameNode($inlineWithBlockElementSiblingsParent)) {
$modify = true;
} elseif ($currentNode->hasSiblingElementWithName(...self::$blockElements)) {
$inlineWithBlockElementSiblings = true;
$inlineWithBlockElementSiblingsParent = $currentNode->parentNode;
$modify = true;
} else {
$inlineWithBlockElementSiblings = false;
$inlineWithBlockElementSiblingsParent = null;
}
if (!$inlineWithBlockElementDescendants && $currentNode->hasDescendantWithName(...self::$blockElements)) {
$inlineWithBlockElementDescendants = true;
$inlineWithBlockElementDescendantsNode = $currentNode;
$modify = true;
}
if ($foreignAncestorWithBlockElementSiblings) {
$modify = true;
} elseif ($foreign && $currentNode->isSameNode($foreignElement)) {
if ($inlineWithBlockElementSiblings) {
$foreignAncestorWithBlockElementSiblings = true;
$modify = true;
} elseif (in_array($currentNode->parentNode->nodeName, static::$blockElements)) {
$firstNonWhitespaceNode = null;
foreach ($currentNode->parentNode->childNodes as $child) {
if (!$child instanceof Text || strspn($child->data, Data::WHITESPACE) !== strlen($child->data)) {
$firstNonWhitespaceNode = $child;
break;
}
}
$lastNonWhitespaceNode = null;
for ($i = $currentNode->parentNode->childNodes->length - 1; $i >= 0; $i--) {
$child = $currentNode->parentNode->childNodes[$i];
if (!$child instanceof Text || strspn($child->data, Data::WHITESPACE) !== strlen($child->data)) {
$lastNonWhitespaceNode = $child;
}
}
if ($currentNode->isSameNode($firstNonWhitespaceNode) && $currentNode->isSameNode->lastNonWhitespaceNode) {
$foreignAncestorWithBlockElementSiblings = true;
$modify = true;
}
}
}
}
if ($n instanceof Element && !in_array($n->nodeName, self::BLOCK_ELEMENTS)) {
return true;
}
}
if ($modify) {
})->current() === null) {
$s .= "\n" . str_repeat(' ', $indent);
$modify = true;
}
}
# Append a U+003C LESS-THAN SIGN character (<), followed by tagname.
$s .= "<$tagName";
@ -505,34 +421,47 @@ class Document extends AbstractDocument {
# If current node serializes as void, then continue on to the next child node at
# this point.
if (in_array($currentNode->nodeName, self::$voidElements)) {
if (in_array($currentNode->nodeName, self::VOID_ELEMENTS)) {
continue;
}
// If formatting output and the element has already been modified increment the
// indention level
if ($formatOutput && $modify) {
$indent++;
}
# Append the value of running the HTML fragment serialization algorithm on the
# current node element (thus recursing into this algorithm for that element),
# followed by a U+003C LESS-THAN SIGN character (<), a U+002F SOLIDUS character (/),
# tagname again, and finally a U+003E GREATER-THAN SIGN character (>).
$s .= $this->serializeFragment($currentNode, $formatOutput);
if ($formatOutput && $modify) {
// Decrement the indention level.
$indent--;
// If the current node has any block element children append a newline followed
// by a number of spaces equal to the indention level.
if ($currentNode->walkShallow(function($n) use($currentNode) {
if ($n->isSameNode($currentNode)) {
return false;
}
if ($n instanceof Element && in_array($n->nodeName, self::BLOCK_ELEMENTS)) {
return true;
}
})->current() !== null) {
$s .= "\n" . str_repeat(' ', $indent);
}
}
$s .= "</$tagName>";
}
# If current node is a Text node
elseif ($currentNode instanceof Text) {
$text = $currentNode->data;
if ($formatOutput && $preformattedElement !== null && $scriptElement !== null) {
if ($foreignElement !== null || (in_array($currentNode->parentNode->nodeName, self::$blockElements) && $currentNode->hasSiblingElementWithName(self::$blockElements) && strspn($text, Data::WHITESPACE) !== strlen($text))) {
continue;
}
$normalized = preg_replace([ '/[\n\r]/', '/(){2,}/' ], [ '', '$1' ], str_replace("\t", ' ', $text));
if ($text === '') {
continue;
}
$text = ($normalized !== $text) ? $normalized : $text;
}
# If the parent of current node is a style, script, xmp, iframe, noembed,
# noframes, or plaintext element, or if the parent of current node is a noscript
# element and scripting is enabled for the node, then append the value of
@ -544,6 +473,46 @@ class Document extends AbstractDocument {
# Otherwise, append the value of current node’s data IDL attribute, escaped as
# described below.
else {
// If formatting the output and the text node has neither a preformatted element
// ancestor nor a script element ancestor (both for the purposes of formatting
// serialized output)
if ($formatOutput && $currentNode->moonWalk(function($n) {
if (in_array($n->nodeName, self::PREFORMATTED_ELEMENTS) || in_array($n->nodeName, self::SCRIPT_ELEMENTS)) {
return true;
}
})->current() === null) {
// If the text node has a foreign element ancestor or the text node's parent is
// a block element (for the purposes of formatting serialized output), the text
// node has only block element siblings, and the text node's data itself is
// entirely made up of whitespace then move onto the next node.
if ($currentNode->moonWalk(function($n) {
if ($n->namespaceURI !== null) {
return true;
}
})->current() !== null || ($currentNode->parentNode !== null && in_array($currentNode->parentNode->nodeName, self::BLOCK_ELEMENTS) && $currentNode->parentNode->walkShallow(function($n) use($currentNode) {
if ($n->isSameNode($currentNode)) {
return false;
}
if ($n instanceof Element && !in_array($n->nodeName, self::BLOCK_ELEMENTS)) {
return true;
}
})->current() === null && strspn($text, Data::WHITESPACE) === strlen($text))) {
continue;
}
// Otherwise, if the text node's data normalizes into an empty string move onto
// the next node.
// Normalization here means that newlines are removed and simple spaces and tabs
// are condensed a single space.
$normalized = preg_replace([ '/[\n\x0C\x0D]+/', '/[ \t]+/' ], [ '', ' ' ], $text);
if ($text === '') {
continue;
}
$text = $normalized;
}
$s .= $this->escapeString($text);
}
}

2
lib/DOM/DocumentFragment.php

@ -7,5 +7,5 @@ declare(strict_types=1);
namespace MensBeam\HTML;
class DocumentFragment extends \DOMDocumentFragment {
use ContainerNode, Moonwalk, ToString, Walk;
use ContainerNode, MoonwalkShallow, ToString, Walk, WalkShallow;
}

90
lib/DOM/Element.php

@ -36,20 +36,6 @@ class Element extends AbstractElement {
return $value;
}
/** Nonstandard */
public function isAncestorOf(\DOMNode $node): bool {
# An object A is called an ancestor of an object B if and only if B is a
# descendant of A.
// object A is $this, object B is $node
$tree = $this->walk(function($n) use($node) {
if ($n->isSameNode($node)) {
return true;
}
});
return ($tree->current() !== null);
}
public function hasAttribute($name) {
if (!parent::hasAttribute($name)) {
foreach ($this->attributes as $a) {
@ -62,82 +48,6 @@ class Element extends AbstractElement {
return true;
}
/** Nonstandard */
public function hasDescendant(...$nodes): bool {
if ($this->childNodes->length === 0) {
return false;
}
$tree = $this->walk(function($descendant) use($nodes) {
foreach ($nodes as $n) {
if ($n->isSameNode($descendant)) {
return true;
}
}
});
return ($tree->current() !== null);
}
/** Nonstandard */
public function hasDescendantElementWithName(...$nodeNames): bool {
if ($this->childNodes->length === 0) {
return false;
}
$tree = $this->walk(function($descendant) use($nodeNames) {
foreach ($nodeNames as $n) {
if ($n instanceof Element && $n->nodeName === $descendant->nodeName) {
return true;
}
}
});
return ($tree->current() !== null);
}
/** Nonstandard */
public function hasSibling(\DOMNode ...$nodes): bool {
if ($this->parentNode === null) {
return false;
}
foreach ($this->parentNode->childNodes as $child) {
if ($child->isSameNode($this)) {
continue;
}
foreach ($nodes as $n) {
if ($n->isSameNode($child)) {
return true;
}
}
}
return false;
}
/** Nonstandard */
public function hasSiblingElementWithName(string ...$nodeNames): bool {
if ($this->parentNode === null) {
return false;
}
foreach ($this->parentNode->childNodes as $child) {
if ($child->isSameNode($this)) {
continue;
}
foreach ($nodeNames as $n) {
if ($n instanceof Element && $n->nodeName === $child->nodeName) {
return true;
}
}
}
return false;
}
public function setAttribute($name, $value) {
$this->setAttributeNS(null, $name, $value);
}

43
lib/DOM/Text.php

@ -8,47 +8,4 @@ namespace MensBeam\HTML;
class Text extends \DOMText {
use LeafNode, Moonwalk, ToString;
/** Nonstandard */
public function hasSibling(\DOMNode ...$nodes): bool {
if ($this->parentNode === null) {
return false;
}
foreach ($this->parentNode->childNodes as $child) {
if ($child->isSameNode($this)) {
continue;
}
foreach ($nodes as $n) {
if ($n->isSameNode($child)) {
return true;
}
}
}
return false;
}
/** Nonstandard */
public function hasSiblingElementWithName(string ...$nodeNames): bool {
if ($this->parentNode === null) {
return false;
}
foreach ($this->parentNode->childNodes as $child) {
if ($child->isSameNode($this)) {
continue;
}
foreach ($nodeNames as $n) {
if ($n instanceof Element && $n->nodeName === $child->nodeName) {
return true;
}
}
}
return false;
}
}

1
lib/DOM/traits/Moonwalk.php

@ -7,6 +7,7 @@ declare(strict_types=1);
namespace MensBeam\HTML;
trait Moonwalk {
/** Generator which walks up the DOM. Nonstandard. */
public function moonwalk(?\Closure $filter = null): \Generator {
return $this->moonwalkGenerator($this, $filter);
}

27
lib/DOM/traits/MoonwalkShallow.php

@ -0,0 +1,27 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
trait MoonwalkShallow {
/**
* Generator which just walks through a node's child nodes in reverse.
* Nonstandard.
*
* @param ?\Closure $filter - An optional closure to use to filter
*/
public function moonwalkShallow(?\Closure $filter = null): \Generator {
if ($this->hasChildNodes()) {
$childNodesLength = $this->childNodes->length;
for ($childNodesLength = $this->childNodes->length, $i = $childNodesLength - 1; $i >= 0; $i--) {
$child = $this->childNodes[$i];
if ($filter === null || $filter($child)) {
yield $child;
}
}
}
}
}

1
lib/DOM/traits/Walk.php

@ -7,6 +7,7 @@ declare(strict_types=1);
namespace MensBeam\HTML;
trait Walk {
/** Generator which walks down the DOM. Nonstandard. */
public function walk(?\Closure $filter = null): \Generator {
return $this->walkGenerator($this, $filter);
}

22
lib/DOM/traits/WalkShallow.php

@ -0,0 +1,22 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML;
trait WalkShallow {
/**
* Generator which just walks through a node's child nodes. Nonstandard.
*
* @param ?\Closure $filter - An optional closure to use to filter
*/
public function walkShallow(?\Closure $filter = null): \Generator {
foreach ($this->childNodes as $child) {
if ($filter === null || $filter($child)) {
yield $child;
}
}
}
}
Loading…
Cancel
Save