Browse Source

100% coverage on pretty printing

pretty-print
Dustin Wilson 2 years ago
parent
commit
08ba468214
  1. 104
      lib/Parser/Serializer.php
  2. 329
      tests/cases/TestSerializer.php

104
lib/Parser/Serializer.php

@ -11,9 +11,6 @@ use MensBeam\HTML\Parser;
abstract class Serializer {
use NameCoercion;
protected const BLOCK_ELEMENTS = [ 'address', 'article', 'aside', 'blockquote', 'canvas', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hr', 'li', 'main', 'nav', 'noscript', 'ol', 'p', 'pre', 'section', 'table', 'tfoot', 'ul', 'video' ];
// Elements treated as block elements when reformatting whitespace
protected const PRINTING_BLOCK_ELEMENTS = [ 'address', 'article', 'aside', 'blockquote', 'base', 'body', 'canvas', 'details', 'dialog', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'isindex', 'li', 'link', 'main', 'meta', 'nav', 'noscript', 'ol', 'p', 'picture', 'pre', 'section', 'script', 'source', 'style', 'table', 'td', 'tfoot', 'th', 'thead', 'title', 'tr', 'ul', 'video' ];
// List of h-elements which are used to determine element grouping for the
// purposes of reformatting whitespace
protected const H_ELEMENTS = [ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' ];
@ -321,6 +318,7 @@ abstract class Serializer {
$firstElementChild = null;
if (property_exists($node, 'firstElementChild')) {
$firstElementChild = $node->firstElementChild;
// @codeCoverageIgnoreStart
} else {
$n = $node->firstChild;
do {
@ -330,6 +328,7 @@ abstract class Serializer {
}
} while ($n = $n->nextSibling);
}
// @codeCoverageIgnoreEnd
if ($firstElementChild !== null && ($foreignAsBlock || ($htmlElement && self::treatAsBlock($node)))) {
$s .= "\n" . str_repeat($indentChar, $indentionLevel * $indentStep);
@ -355,19 +354,21 @@ abstract class Serializer {
# Otherwise, append the value of current node's data IDL attribute, escaped as described below.
else {
$data = $node->data;
if ($serializerState['reformatWhitespace']) {
// The serializer should disable 'reformatWhitespace' on children of a
// preformatted element, but just in case check for it here.
$preformattedContent = $serializerState['preformattedContent'] ?: static::isPreformattedContent($node);
if (!$preformattedContent) {
$treatAsBlock = self::treatAsBlock($node);
$modify = false;
if (($serializerState['foreignAsBlock'] || $treatAsBlock || (self::treatAsBlock($node->parentNode) && count($node->parentNode->childNodes) === 1)) && strspn($data, Data::WHITESPACE) === strlen($data)) {
if (($serializerState['foreignAsBlock'] || $treatAsBlock || ($node->parentNode !== null && self::treatAsBlock($node->parentNode) && count($node->parentNode->childNodes) === 1)) && strspn($data, Data::WHITESPACE) === strlen($data)) {
return $s;
}
if ($treatAsBlock) {
// Block formatting context -- remove all whitespace
$data = preg_replace(Data::WHITESPACE_REGEX, '', $data);
// Block formatting context -- trim data and convert all whitespace to a single
// space
$data = preg_replace('/[\t\n\x0c\x0D ]+/', ' ', trim($data));
if ($data === '') {
return $s;
}
@ -386,34 +387,55 @@ abstract class Serializer {
' '
], $data);
// 4. Convert multiple spaces to a single space even across inline elements.
//
// This will be accomplished by looking backwards through siblings, checking
// if the previous text node had whitespace at the end and then lobbing off
// whitespace at the beginning of the current text node. This has the added
// benefit of doing part of the work of #5 as well -- if it matches.
// Moonwalk and find the closest block element (actual block element, not
// elements treated as block for the purposes of serializing) then grab all
// descendant text nodes that aren't descendants of templates.
$xpath = new \DOMXPath($node->ownerDocument);
$previousTextNode = $xpath->query('./preceding-sibling::text()[1] | ./preceding-sibling::*/descendant::text()[1]', $node);
$ltrimmed = false;
if ($previousTextNode->length > 0) {
$data2 = $previousTextNode->item(0)->data;
if (preg_match('/[\t\n\x0c\x0D ]+$/', $data2)) {
$data = ltrim($data);
$ltrimmed = true;
$textNodes = $xpath->query('./ancestor::*[namespace-uri()="" or namespace-uri()="http://www.w3.org/1999/xhtml"][name()="address" or name()="article" or name()="aside" or name()="blockquote" or name="body" or name()="canvas" or name()="dd" or name()="div" or name()="dl" or name()="dt" or name()="fieldset" or name()="figcaption" or name()="figure" or name()="footer" or name()="form" or name()="h1" or name()="h2" or name()="h3" or name()="h4" or name()="h5" or name()="h6" or name()="head" or name()="header" or name()="hr" or name()="html" or name()="li" or name()="main" or name()="nav" or name()="ol" or name()="p" or name()="section" or name()="table" or name()="tfoot" or name()="ul" or name()="video"][1]/descendant::text()[not(ancestor::template[namespace-uri()="" or namespace-uri()="http://www.w3.org/1999/xhtml"])]', $node);
// If nothing was matched then the text node is either disconnected from its
// document and being serialized alone or an inline descendant of a document
// fragment.
if ($textNodes->length > 0) {
$firstOfLine = ($node === $textNodes->item(0));
$lastOfLine = ($node === $textNodes->item($textNodes->length - 1));
} else {
// If the text node is either disconnected from its document then firstOfLine
// and lastOfLine is true.
if ($node->parentNode === null) {
$firstOfLine = $lastOfLine = true;
}
// Otherwise, it's an inline descendant of a document fragment. Find its root
// node and then grab all text node descendants of that fragment.
else {
$n = $node;
while ($n = $n->parentNode) {
$root = $n;
}
$textNodes = $xpath->query('.//text()[not(ancestor::template[namespace-uri()="" or namespace-uri()="http://www.w3.org/1999/xhtml"])]', $root);
$firstOfLine = ($node === $textNodes->item(0));
$lastOfLine = ($node === $textNodes->item($textNodes->length - 1));
}
}
// 5. Spaces at the beginning and ending of a line (beginning and ending of
// inline content) are removed.
if (!$ltrimmed) {
$firstOfLine = $xpath->query('./ancestor::*[namespace-uri()="" or namespace-uri()="http://www.w3.org/1999/xhtml"][name()="address" or name()="article" or name()="aside" or name()="blockquote" or name="body" or name()="canvas" or name()="dd" or name()="div" or name()="dl" or name()="dt" or name()="fieldset" or name()="figcaption" or name()="figure" or name()="footer" or name()="form" or name()="h1" or name()="h2" or name()="h3" or name()="h4" or name()="h5" or name()="h6" or name="head" or name="header" or name="hr" or name="html" or name="li" or name="main" or name="nav" or name="ol" or name="p" or name="section" or name="table" or name="tfoot" or name="ul" or name="video"][1]/descendant::text()[1]', $node);
if ($firstOfLine->length > 0 && $node === $firstOfLine->item(0)) {
$data = ltrim($data);
// 4. Convert multiple spaces to a single space even across inline elements.
$data = preg_replace('/ +/', ' ', $data);
if (!$firstOfLine) {
foreach ($textNodes as $key => $t) {
if ($t === $node && preg_match('/[\t\n\x0c\x0D ]+$/', $textNodes[$key - 1]->data)) {
$data = ltrim($data);
break;
}
}
}
$lastOfLine = $xpath->query('./ancestor::*[namespace-uri()="" or namespace-uri()="http://www.w3.org/1999/xhtml"][name()="address" or name()="article" or name()="aside" or name()="blockquote" or name="body" or name()="canvas" or name()="dd" or name()="div" or name()="dl" or name()="dt" or name()="fieldset" or name()="figcaption" or name()="figure" or name()="footer" or name()="form" or name()="h1" or name()="h2" or name()="h3" or name()="h4" or name()="h5" or name()="h6" or name="head" or name="header" or name="hr" or name="html" or name="li" or name="main" or name="nav" or name="ol" or name="p" or name="section" or name="table" or name="tfoot" or name="ul" or name="video"][1]/descendant::text()[last()]', $node);
if ($lastOfLine->length > 0 && $node === $lastOfLine->item(0)) {
// 5. Spaces at the beginning and ending of a line (beginning and ending of
// inline content) are removed.
if ($firstOfLine) {
$data = ltrim($data);
}
if ($lastOfLine) {
$data = rtrim($data);
}
}
@ -552,7 +574,7 @@ abstract class Serializer {
}
protected static function treatAsBlock(\DOMNode $node): bool {
if ($node instanceof \DOMDocument || $node instanceof \DOMDocumentFragment || ($node instanceof \DOMElement && in_array($node->tagName, self::PRINTING_BLOCK_ELEMENTS))) {
if ($node instanceof \DOMDocument || $node instanceof \DOMDocumentFragment) {
return true;
}
@ -565,11 +587,12 @@ abstract class Serializer {
}
$xpath = new \DOMXPath($node->ownerDocument);
if ($xpath->evaluate(self::BLOCK_QUERY, $node) === 0) {
$result = ($xpath->evaluate(self::BLOCK_QUERY, $node) > 0);
if (!$result) {
return static::treatAsBlockWithTemplates($node);
}
return true;
return $result;
}
protected static function treatAsBlockWithTemplates(\DOMNode $node): bool {
@ -577,7 +600,7 @@ abstract class Serializer {
// PHP DOM solutions with template contents will need to extend this method to
// check for any templates and look within their content fragments for "block"
// content.
return $result;
return false;
}
protected static function treatForeignRootAsBlock(\DOMNode $node): bool {
@ -585,16 +608,17 @@ abstract class Serializer {
// PHP DOM solutions with template contents will need to extend this method to
// be able to moonwalk through document fragment hosts.
$n = $node;
while ($n = $n->parentNode) {
if ($n instanceof \DOMDocument || $n instanceof \DOMDocumentFragment || ($n instanceof \DOMElement && $n->parentNode === null)) {
do {
if ($n->parentNode !== null && ($n->parentNode->namespaceURI ?? Parser::HTML_NAMESPACE) !== Parser::HTML_NAMESPACE) {
continue;
}
if (self::treatAsBlock($n->parentNode)) {
return true;
} elseif (($n->parentNode->namespaceURI ?? Parser::HTML_NAMESPACE) === Parser::HTML_NAMESPACE) {
if (self::treatAsBlock($n->parentNode)) {
return true;
}
break;
}
}
break;
} while ($n = $n->parentNode);
return false;
}

329
tests/cases/TestSerializer.php

@ -122,7 +122,7 @@ class TestSerializer extends \PHPUnit\Framework\TestCase {
}
/** @dataProvider provideCustomSerializations */
public function testSerializeWithOptions(int $indentStep, bool $indentWithSpaces, bool $processingInstructions, bool $reformatWhitespace, bool $boolAttr, bool $foreignVoid, string $in, string $exp): void {
public function testSerializeWithOptions(bool $fragment, ?string $fragmentContext, int $indentStep, bool $indentWithSpaces, bool $processingInstructions, bool $reformatWhitespace, bool $boolAttr, bool $foreignVoid, string $in, string $exp): void {
$config = new Config;
$config->indentStep = $indentStep;
$config->indentWithSpaces = $indentWithSpaces;
@ -130,15 +130,22 @@ class TestSerializer extends \PHPUnit\Framework\TestCase {
$config->reformatWhitespace = $reformatWhitespace;
$config->serializeBooleanAttributeValues = $boolAttr;
$config->serializeForeignVoidEndTags = $foreignVoid;
$d = Parser::parse($in, "UTF-8")->document;
$act = Parser::serialize($d, $config);
if (!$fragment) {
$d = Parser::parse($in, "UTF-8", $config)->document;
$act = Parser::serialize($d, $config);
} else {
$d = new \DOMDocument();
$act = Parser::serialize(Parser::parseFragment($d->createElement($fragmentContext), 0, $in, 'UTF-8', $config), $config);
}
$this->assertSame($exp, $act);
}
public function provideCustomSerializations(): iterable {
return [
// Boolean attribute values serialized
[0, false, false, false, true, true,
[false, null, 0, false, false, false, true, true,
<<<HTML
<a hidden="hidden"></a><b hidden=""></b><c hidden="HIDDEN"></c><d hidden="true"></d>
HTML,
@ -149,7 +156,7 @@ class TestSerializer extends \PHPUnit\Framework\TestCase {
],
// Boolean attribute values not serialized
[0, false, false, false, false, true,
[false, null, 0, false, false, false, false, true,
<<<HTML
<a hidden="hidden"></a><b hidden=""></b><c hidden="HIDDEN"></c><d hidden="true"></d>
HTML,
@ -160,7 +167,7 @@ class TestSerializer extends \PHPUnit\Framework\TestCase {
],
// Boolean attribute values serialized, foreign void end tags serialized
[0, false, false, false, true, true,
[false, null, 0, false, false, false, true, true,
<<<HTML
<br><svg/><svg>blah</svg><math/><math>blah</math><input>
HTML,
@ -171,7 +178,7 @@ class TestSerializer extends \PHPUnit\Framework\TestCase {
],
// Boolean attribute values serialized, foreign void end tags not serialized
[0, false, false, false, true, false,
[false, null, 0, false, false, false, true, false,
<<<HTML
<br><svg/><svg>blah</svg><math/><math>blah</math><input>
HTML,
@ -182,7 +189,7 @@ class TestSerializer extends \PHPUnit\Framework\TestCase {
],
// Neither attribute values nor foreign void end tags serialized
[0, false, false, false, false, false,
[false, null, 0, false, false, false, false, false,
<<<HTML
<audio loop hidden></audio><svg/>
HTML,
@ -193,7 +200,7 @@ class TestSerializer extends \PHPUnit\Framework\TestCase {
],
// Reformat whitespace, empty document
[1, true, false, true, false, false,
[false, null, 1, true, false, true, false, false,
<<<HTML
<html></html>
HTML,
@ -208,7 +215,7 @@ class TestSerializer extends \PHPUnit\Framework\TestCase {
],
// Reformat whitespace, comment before doctype
[1, true, false, true, false, false,
[false, null, 1, true, false, true, false, false,
<<<HTML
<!--data-->
<!DOCTYPE html>
@ -227,7 +234,7 @@ class TestSerializer extends \PHPUnit\Framework\TestCase {
],
// Reformat whitespace, preformatted element
[1, true, false, true, false, false,
[false, null, 1, true, false, true, false, false,
<<<HTML
<pre><code></code></pre>
HTML,
@ -245,7 +252,7 @@ class TestSerializer extends \PHPUnit\Framework\TestCase {
// Reformat whitespace, element grouping, foreign "block" content, & foreign
// void end tags not serialized
[1, true, false, true, false, false,
[false, null, 1, true, false, true, false, false,
<<<HTML
<div></div><svg><g id="ook"></g></svg>
HTML,
@ -265,8 +272,60 @@ class TestSerializer extends \PHPUnit\Framework\TestCase {
HTML
],
// Inline serialized comments and processing instructions, parsing of processing instructions off
[false, null, 1, true, false, true, false, false,
<<<HTML
<html>
<head></head>
<body>
<!--ook-->
<?ook eeeeek ?>
</body>
</html>
HTML,
<<<HTML
<html>
<head></head>
<body><!--ook--><!--?ook eeeeek ?--></body>
</html>
HTML
],
// Block serialized comments and processing instructions, parsing of processing instructions on
[false, null, 1, true, true, true, false, false,
<<<HTML
<html>
<head></head>
<body>
<div></div>
<!--ook-->
<?ook eeeeek ?>
<div></div>
</body>
</html>
HTML,
<<<HTML
<html>
<head></head>
<body>
<div></div>
<!--ook-->
<?ook eeeeek ?>
<div></div>
</body>
</html>
HTML
],
// Reformat whitespace, whitespace collapsing, custom indentions
[4, true, false, true, false, false,
[false, null, 4, true, false, true, false, false,
<<<HTML
<!DOCTYPE html>
<html>
@ -277,9 +336,15 @@ class TestSerializer extends \PHPUnit\Framework\TestCase {
</head>
<body>
ook
<div/>
ook eek
<pre>
This should be ignored
also this
</pre>
<div></div>
<p> Ook
<span> Eek!</span> </p>
</body>
</html>
HTML,
@ -289,15 +354,241 @@ class TestSerializer extends \PHPUnit\Framework\TestCase {
<html>
<head></head>
<body>
ook
<body>ook eek
<pre> This should be ignored
also this
</pre>
<div></div>
<p>Ook <span>Eek!</span></p>
</body>
</html>
HTML
],
// Fragment, html elements
[true, 'div', 1, true, false, true, false, false,
<<<HTML
<span> <span> Ook!</span></span>
HTML,
<<<HTML
<span><span>Ook!</span></span>
HTML
],
// Fragment, foreign elements
[true, 'div', 1, true, false, true, false, false,
<<<HTML
<svg> <g><path d=""/></g></svg>
HTML,
<<<HTML
<svg>
<g>
<path d=""/>
</g>
</svg>
HTML
],
// Fragment, foreign elements
[true, 'div', 1, true, false, true, false, false,
<<<HTML
<svg> <g><path d=""/></g></svg>
HTML,
<<<HTML
<svg>
<g>
<path d=""/>
</g>
</svg>
HTML
],
];
}
/** @dataProvider provideCustomSerializationsForNodes */
public function testSerializeNodesWithOptions(int $indentStep, bool $indentWithSpaces, bool $processingInstructions, bool $reformatWhitespace, bool $boolAttr, bool $foreignVoid, \Closure $in, string $exp): void {
$config = new Config;
$config->indentStep = $indentStep;
$config->indentWithSpaces = $indentWithSpaces;
$config->processingInstructions = $processingInstructions;
$config->reformatWhitespace = $reformatWhitespace;
$config->serializeBooleanAttributeValues = $boolAttr;
$config->serializeForeignVoidEndTags = $foreignVoid;
$act = $in($config);
$this->assertSame($exp, $act);
}
public function provideCustomSerializationsForNodes(): iterable {
return [
// Solo html element with context
[1, true, false, true, false, false,
function (Config $config): string {
$html = <<<HTML
<!DOCTYPE html>
<html>
<body>
<p> Ook! </p>
</body>
</html>
HTML;
$d = Parser::parse($html, "UTF-8")->document;
return Parser::serialize($d->getElementsByTagName('p')->item(0), $config);
},
<<<HTML
<p>Ook!</p>
HTML
],
// Solo html element without context
[1, true, false, true, false, false,
function (Config $config): string {
$html = <<<HTML
<!DOCTYPE html>
<html>
<body>
<p> Ook! </p>
</body>
</html>
HTML;
$d = Parser::parse($html, "UTF-8")->document;
$p = $d->getElementsByTagName('p')->item(0);
$p->parentNode->removeChild($p);
return Parser::serialize($p, $config);
},
<<<HTML
<p>Ook!</p>
HTML
],
// Solo svg element serializing as inline with context
[1, true, false, true, false, true,
function (Config $config): string {
$html = <<<HTML
<!DOCTYPE html>
<html>
<body>
<svg role="img" viewBox="0 0 26 26"><title>Ook</title>
<rect id="eek--a" width="5" height="5"/></svg>
</body>
</html>
HTML;
$d = Parser::parse($html, "UTF-8")->document;
$svg = $d->getElementsByTagName('svg')->item(0);
return Parser::serialize($svg, $config);
},
<<<HTML
<svg role="img" viewBox="0 0 26 26"><title>Ook</title> <rect id="eek--a" width="5" height="5"></rect></svg>
HTML
],
// Solo svg element serializing as block with context
[1, true, false, true, false, false,
function (Config $config): string {
$html = <<<HTML
<!DOCTYPE html>
<html>
<body>
<svg><g><g><rect id="eek--a" width="5" height="5"/></g></g></svg>
<div></div>
</body>
</html>
HTML;
$d = Parser::parse($html, "UTF-8")->document;
$svg = $d->getElementsByTagName('svg')->item(0);
$g = $svg->firstChild->firstChild;
return Parser::serialize($g, $config);
},
<<<HTML
<g>
<rect id="eek--a" width="5" height="5"/>
</g>
HTML
],
// Solo svg element without context
[1, true, false, true, false, true,
function (Config $config): string {
$html = <<<HTML
<!DOCTYPE html>
<html>
<body>
<svg role="img" viewBox="0 0 26 26"><title>Ook</title>
<rect id="eek--a" width="5" height="5"/></svg>
</body>
</html>
HTML;
$d = Parser::parse($html, "UTF-8")->document;
$svg = $d->getElementsByTagName('svg')->item(0);
$svg->parentNode->removeChild($svg);
return Parser::serialize($svg, $config);
},
<<<HTML
<svg role="img" viewBox="0 0 26 26">
<title>Ook</title>
<rect id="eek--a" width="5" height="5"></rect>
</svg>
HTML
],
/*
// Fragment, html elements
[true, 'div', 1, true, false, true, false, false,
<<<HTML
<span> <span> Ook!</span></span>
HTML,
<<<HTML
<span><span>Ook!</span></span>
HTML
],
*/
// Solo text node without context
[1, true, false, true, false, true,
function (Config $config): string {
$html = <<<HTML
<!DOCTYPE html>
<html>
<body>
OOK eeek ooooooook ook
</body>
</html>
HTML;
$d = Parser::parse($html, "UTF-8")->document;
$text = $d->getElementsByTagName('body')->item(0)->firstChild;
$text->parentNode->removeChild($text);
return Parser::serialize($text, $config);
},
<<<HTML
OOK eeek ooooooook ook
HTML
],
];
}

Loading…
Cancel
Save