Browse Source

Test harness built for formatted serializing, fixed serializer bugs

wrapper-classes
Dustin Wilson 3 years ago
parent
commit
d742a9e0d7
  1. 88
      lib/Document.php
  2. 15
      lib/traits/Moonwalk.php
  3. 3
      lib/traits/ParentNode.php
  4. 212
      tests/cases/serializer/TestSerializer.php
  5. 13
      tests/cases/serializer/formatted/mensbeam01.dat
  6. 0
      tests/cases/serializer/standard/mensbeam01.dat
  7. 0
      tests/cases/serializer/standard/mensbeam02.dat
  8. 0
      tests/cases/serializer/standard/wpt01.dat

88
lib/Document.php

@ -399,7 +399,7 @@ class Document extends \DOMDocument {
$formatOutput = ($node->childElementCount > 0);
}
return $this->serializeFragment($node, $formatOutput);
return $this->serializeNode($node, $formatOutput);
}
public function saveHTMLFile($filename): int {
@ -419,8 +419,8 @@ class Document extends \DOMDocument {
}
protected function serializeBlockElementFilter(\DOMNode $ignoredNode): \Closure {
$blockElementFilter = function($n) use ($ignoredNode) {
protected function blockElementFilterFactory(\DOMNode $ignoredNode): \Closure {
return function($n) use ($ignoredNode) {
if (!$n->isSameNode($ignoredNode) && $n instanceof Element && $this->isHTMLNamespace($n) && (in_array($n->nodeName, self::BLOCK_ELEMENTS) || $n->walk(function($nn) {
if ($nn instanceof Element && $this->isHTMLNamespace($nn) && in_array($nn->nodeName, self::BLOCK_ELEMENTS)) {
return true;
@ -429,25 +429,22 @@ class Document extends \DOMDocument {
return true;
}
};
return $blockElementFilter;
}
protected function serializeFragment(\DOMNode $node, bool $formatOutput = false): string {
if ($formatOutput) {
// Stores the root foreign element when parsing its descendants
static $foreignElement = null;
// Flag used if the root foreign element above has block element siblings
static $foreignElementWithBlockElementSiblings = false;
// Stores the indention level
static $indent = 0;
// Stores the root preformatted element when parsing its descendants
static $preformattedElement = null;
// Stores the previous non text node name so it can be used to check for adding
// additional space.
static $previousNonTextNodeSiblingName = null;
}
/**
* Recursively serializes nodes
*
* @param \DOMNode $node - The node to serialize
* @param bool $formatOutput - Flag for formatting output
* @param bool $first - True if the first run
* @param ?Element $foreignElement - Stores the root foreign element when parsing its descendants
* @param bool $foreignElementWithBlockElementSiblings - Flag used if the root foreign element above has block element siblings
* @param int $indent - Stores the indention level
* @param ?Element $preformattedElement - Stores the root preformatted element when parsing its descendants
* @param ?string $previousNonTextNodeSiblingName - Stores the previous non text node name so it can be used to check for adding
* additional space.
*/
protected function serializeNode(\DOMNode $node, bool $formatOutput = false, bool $first = true, ?Element $foreignElement = null, bool $foreignElementWithBlockElementSiblings = false, int $indent = 0, ?Element $preformattedElement = null, ?string $previousNonTextNodeSiblingName = null): string {
# 13.3. Serializing HTML fragments
#
# 1. If the node serializes as void, then return the empty string.
@ -474,7 +471,7 @@ class Document extends \DOMDocument {
if ($this->formatOutput) {
// Filter meant to be used with DOM walker generator methods which checks if
// elements are block or if elements are inline with block descendants
$blockElementFilter = self::serializeBlockElementFilter($currentNode->parentNode);
$blockElementFilter = self::blockElementFilterFactory($currentNode->parentNode);
}
# 2. Append the appropriate string from the following list to s:
@ -492,7 +489,7 @@ class Document extends \DOMDocument {
}
if ($formatOutput) {
$blockElementFilter = self::serializeBlockElementFilter($currentNode);
$blockElementFilter = self::blockElementFilterFactory($currentNode);
$hasChildNodes = ($currentNode->hasChildNodes());
$modify = false;
@ -529,7 +526,9 @@ class Document extends \DOMDocument {
$s .= "\n";
}
$s .= "\n" . str_repeat(' ', $indent);
if (!$first) {
$s .= "\n" . str_repeat(' ', $indent);
}
}
}
@ -649,14 +648,14 @@ class Document extends \DOMDocument {
# current node element (thus recursing into this algorithm for that element),
# followed by a U+003C LESS-THAN SIGN character (<), a U+002F SOLIDUS character (/),
# tagname again, and finally a U+003E GREATER-THAN SIGN character (>).
$s .= $this->serializeFragment($currentNode, $formatOutput);
$s .= $this->serializeNode($currentNode, $formatOutput, false, $foreignElement, $foreignElementWithBlockElementSiblings, $indent, $preformattedElement, $previousNonTextNodeSiblingName);
if ($formatOutput) {
if ($modify) {
// Decrement the indention level.
$indent--;
if ($preformattedElement === null) {
if (!$first && $preformattedElement === null) {
// If a foreign element with a foreign element ancestor with block element
// siblings and has at least one element child or any element with a block
// element descendant...
@ -722,15 +721,17 @@ class Document extends \DOMDocument {
elseif ($currentNode instanceof Comment) {
if ($formatOutput) {
if ($preformattedElement === null && $foreignElementWithBlockElementSiblings || $currentNode->parentNode->walk($blockElementFilter)->current() !== null) {
// Add an additional newline if the previous sibling wasn't a comment.
if ($previousNonTextNodeSiblingName !== null && $previousNonTextNodeSiblingName !== $this->nodeName) {
$s .= "\n";
}
if (!$first && $previousNonTextNodeSiblingName !== null) {
// Add an additional newline if the previous sibling wasn't a comment.
if ($previousNonTextNodeSiblingName !== $this->nodeName) {
$s .= "\n";
}
$s .= "\n" . str_repeat(' ', $indent);
$s .= "\n" . str_repeat(' ', $indent);
}
}
$previousNonTextNodeSiblingName = $this->nodeName;
$previousNonTextNodeSiblingName = $currentNode->nodeName;
}
# Append the literal string "<!--" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION
@ -742,17 +743,19 @@ class Document extends \DOMDocument {
# If current node is a ProcessingInstruction
elseif ($currentNode instanceof ProcessingInstruction) {
if ($formatOutput) {
if ($preformattedElement === null && $foreignElementWithBlockElementSiblings || $currentNode->parentNode->walk($blockElementFilter)->current() !== null) {
if (!$first && $preformattedElement === null && ($foreignElementWithBlockElementSiblings || $currentNode->parentNode->walk($blockElementFilter)->current() !== null)) {
// Add an additional newline if the previous sibling wasn't a processing
// instruction.
if ($previousNonTextNodeSiblingName !== null && $previousNonTextNodeSiblingName !== $this->nodeName) {
$s .= "\n";
}
if ($previousNonTextNodeSiblingName !== null) {
if ($previousNonTextNodeSiblingName !== $this->nodeName) {
$s .= "\n";
}
$s .= "\n" . str_repeat(' ', $indent);
$s .= "\n" . str_repeat(' ', $indent);
}
}
$previousNonTextNodeSiblingName = $this->nodeName;
$previousNonTextNodeSiblingName = $currentNode->nodeName;
}
# Append the literal string "<?" (U+003C LESS-THAN SIGN, U+003F QUESTION MARK),
@ -773,8 +776,19 @@ class Document extends \DOMDocument {
// DEVIATION: The name is trimmed because PHP's DOM does not
// accept the empty string as a DOCTYPE name
$name = trim($currentNode->name, ' ');
if ($formatOutput) {
if ($previousNonTextNodeSiblingName !== null) {
$s .= "\n" . str_repeat(' ', $indent);
}
$previousNonTextNodeSiblingName = $currentNode->nodeName;
}
$s .= "<!DOCTYPE $name>";
}
$first = false;
}
# 5. Return s.

15
lib/traits/Moonwalk.php

@ -41,17 +41,12 @@ trait Moonwalk {
}
}
// If node is an instance of DocumentFragment then it might be the content
// fragment of a template element, so iterate through all template elements
// stored in the element map and see if node is the fragment of one of the
// templates; if it is change node to the template element and reprocess. Magic!
// Can walk backwards THROUGH templates!
// If node is an instance of DocumentFragment then set the node to its host if
// it isn't null.
if ($node instanceof DocumentFragment) {
foreach (ElementMap::getIterator($node->ownerDocument) as $element) {
if ($element->ownerDocument->isSameNode($node->ownerDocument) && $element instanceof TemplateElement && $element->content->isSameNode($node)) {
$node = $element;
continue;
}
$host = $node->host;
if ($host !== null) {
$node = $host;
}
}

3
lib/traits/ParentNode.php

@ -21,8 +21,7 @@ trait ParentNode {
// almost identical; so, using that. PHP's DOM doesn't provide the end user any
// way to create a \DOMNodeList from scratch, so going to cheat and use XPath to
// make one for us.
$isDocument = ($this instanceof Document);
$document = ($isDocument) ? $this : $this->ownerDocument;
$document = ($this instanceof Document) ? $this : $this->ownerDocument;
return $document->xpath->query('child::*', $this);
}

212
tests/cases/serializer/TestSerializer.php

@ -25,11 +25,22 @@ use MensBeam\HTML\Parser;
* @covers \MensBeam\HTML\DOM\ToString
*/
class TestSerializer extends \PHPUnit\Framework\TestCase {
public function provideStandardTreeTests(): iterable {
$blacklist = [];
$files = new \AppendIterator();
$files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/cases/Serializer/standard/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
foreach ($files as $file) {
if (!in_array(basename($file), $blacklist)) {
yield from $this->parseTreeTestFile($file);
}
}
}
/**
* @dataProvider provideStandardSerializerTests
* @dataProvider provideStandardTreeTests
* @covers \MensBeam\HTML\DOM\Document::saveHTML
* @covers \MensBeam\HTML\DOM\Document::serializeBlockElementFilter
* @covers \MensBeam\HTML\DOM\Document::serializeFragment
* @covers \MensBeam\HTML\DOM\Document::blockElementFilterFactory
* @covers \MensBeam\HTML\DOM\Document::serializeNode
* @covers \MensBeam\HTML\DOM\Document::__toString
* @covers \MensBeam\HTML\DOM\ToString::__toString
*/
@ -38,62 +49,75 @@ class TestSerializer extends \PHPUnit\Framework\TestCase {
$this->assertSame($exp, (string)$node);
}
public function provideStandardSerializerTests(): iterable {
$blacklist = [];
public function provideFormattedTreeTests(): iterable {
$files = new \AppendIterator();
$files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/cases/Serializer/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
$files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/cases/Serializer/formatted/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
foreach ($files as $file) {
$index = 0;
$l = 0;
if (!in_array(basename($file), $blacklist)) {
$lines = array_map(function($v) {
return rtrim($v, "\n");
}, file($file));
while ($l < sizeof($lines)) {
$pos = $l + 1;
assert(in_array($lines[$l], ["#document", "#fragment"]), new \Exception("Test $file #$index does not start with #doocument or #fragment tag at line ".($l + 1)));
$fragment = $lines[$l] === "#fragment";
// collect the test input
$data = [];
for (++$l; $l < sizeof($lines); $l++) {
if (preg_match('/^#(script-(on|off)|output)$/', $lines[$l])) {
break;
}
$data[] = $lines[$l];
}
// set the script mode, if present
assert(preg_match('/^#(script-(on|off)|output)$/', $lines[$l]) === 1, new \Exception("Test $file #$index follows data with something other than script flag or output at line ".($l + 1)));
$script = null;
if ($lines[$l] === "#script-off") {
$script = false;
$l++;
} elseif ($lines[$l] === "#script-on") {
$script = true;
$l++;
}
// collect the output string
$exp = [];
assert($lines[$l] === "#output", new \Exception("Test $file #$index follows input with something other than output at line ".($l + 1)));
for (++$l; $l < sizeof($lines); $l++) {
if ($lines[$l] === "" && in_array(($lines[$l + 1] ?? ""), ["#document", "#fragment"])) {
break;
}
assert(preg_match('/^[^#]/', $lines[$l]) === 1, new \Exception("Test $file #$index contains unrecognized data after output at line ".($l + 1)));
$exp[] = $lines[$l];
}
$exp = implode("\n", $exp);
if (!$script) {
yield basename($file)." #$index (line $pos)" => [$data, $fragment, $exp];
}
$l++;
$index++;
}
}
yield from $this->parseTreeTestFile($file);
}
}
protected function buildTree(array $data, bool $fragment): \DOMNode {
/**
* @dataProvider provideFormattedTreeTests
* @covers \MensBeam\HTML\DOM\Document::saveHTML
* @covers \MensBeam\HTML\DOM\Document::blockElementFilterFactory
* @covers \MensBeam\HTML\DOM\Document::serializeNode
* @covers \MensBeam\HTML\DOM\Document::__toString
* @covers \MensBeam\HTML\DOM\ToString::__toString
*/
public function testFormattedTreeTests(array $data, bool $fragment, string $exp): void {
$node = $this->buildTree($data, $fragment, true);
$this->assertSame($exp, (string)$node);
}
/**
* @covers \MensBeam\HTML\DOM\Document::saveHTML
* @covers \MensBeam\HTML\DOM\Document::serializeNode
*/
public function testSerializingDocumentType(): void {
$d = new Document();
$dt = $d->implementation->createDocumentType('ook', 'eek', 'ack');
$d->appendChild($dt);
$this->assertSame('<!DOCTYPE ook>', $d->saveHTML($dt));
}
/**
* @covers \MensBeam\HTML\DOM\Document::saveHTML
* @covers \MensBeam\HTML\DOM\Document::serializeNode
* @covers \MensBeam\HTML\DOM\ToString::__toString
*/
public function testSerializingElements(): void {
$d = new Document();
$i = $d->createElement('input');
$i->appendChild($d->createTextNode('You should not see this text'));
$this->assertSame('<input>', (string)$i);
$this->assertSame('', $d->saveHTML($i));
$t = $d->createElement('template');
$t->content->appendChild($d->createTextNode('Ook!'));
$this->assertSame('<template>Ook!</template>', (string)$t);
$this->assertSame('Ook!', $d->saveHTML($t));
}
/** @covers \MensBeam\HTML\DOM\Document::saveHTML */
public function testSerializerFailure(): void {
$this->expectException(DOMException::class);
$this->expectExceptionCode(DOMException::WRONG_DOCUMENT);
$d = new Document();
$h = $d->createElement('html');
$d2 = new Document();
$d2->saveHTML($h);
}
protected function buildTree(array $data, bool $fragment, bool $formatOutput = false): \DOMNode {
$document = new Document;
$document->formatOutput = $formatOutput;
if ($fragment) {
$document->appendChild($document->createElement("html"));
$out = $document->createDocumentFragment();
@ -154,42 +178,50 @@ class TestSerializer extends \PHPUnit\Framework\TestCase {
return $out;
}
/** @covers \MensBeam\HTML\DOM\Document::saveHTML */
public function testSerializingDocumentType(): void {
$d = new Document();
$dt = $d->implementation->createDocumentType('ook', 'eek', 'ack');
$d->appendChild($dt);
$this->assertSame('<!DOCTYPE ook>', $d->saveHTML($dt));
}
/**
* @covers \MensBeam\HTML\DOM\Document::saveHTML
* @covers \MensBeam\HTML\DOM\Document::serializeFragment
* @covers \MensBeam\HTML\DOM\ToString::__toString
*/
public function testSerializingElements(): void {
$d = new Document();
$i = $d->createElement('input');
$i->appendChild($d->createTextNode('You should not see this text'));
$this->assertSame('<input>', (string)$i);
$this->assertSame('', $d->saveHTML($i));
$t = $d->createElement('template');
$t->content->appendChild($d->createTextNode('Ook!'));
$this->assertSame('<template>Ook!</template>', (string)$t);
$this->assertSame('Ook!', $d->saveHTML($t));
}
/** @covers \MensBeam\HTML\DOM\Document::saveHTML */
public function testSerializerFailure(): void {
$this->expectException(DOMException::class);
$this->expectExceptionCode(DOMException::WRONG_DOCUMENT);
$d = new Document();
$h = $d->createElement('html');
$d2 = new Document();
$d2->saveHTML($h);
protected function parseTreeTestFile(string $file): \Generator {
$index = 0;
$l = 0;
$lines = array_map(function($v) {
return rtrim($v, "\n");
}, file($file));
while ($l < sizeof($lines)) {
$pos = $l + 1;
assert(in_array($lines[$l], ["#document", "#fragment"]), new \Exception("Test $file #$index does not start with #document or #fragment tag at line ".($l + 1)));
$fragment = $lines[$l] === "#fragment";
// collect the test input
$data = [];
for (++$l; $l < sizeof($lines); $l++) {
if (preg_match('/^#(script-(on|off)|output)$/', $lines[$l])) {
break;
}
$data[] = $lines[$l];
}
// set the script mode, if present
assert(preg_match('/^#(script-(on|off)|output)$/', $lines[$l]) === 1, new \Exception("Test $file #$index follows data with something other than script flag or output at line ".($l + 1)));
$script = null;
if ($lines[$l] === "#script-off") {
$script = false;
$l++;
} elseif ($lines[$l] === "#script-on") {
$script = true;
$l++;
}
// collect the output string
$exp = [];
assert($lines[$l] === "#output", new \Exception("Test $file #$index follows input with something other than output at line ".($l + 1)));
for (++$l; $l < sizeof($lines); $l++) {
if ($lines[$l] === "" && in_array(($lines[$l + 1] ?? ""), ["#document", "#fragment"])) {
break;
}
assert(preg_match('/^[^#]/', $lines[$l]) === 1, new \Exception("Test $file #$index contains unrecognized data after output at line ".($l + 1)));
$exp[] = $lines[$l];
}
$exp = implode("\n", $exp);
if (!$script) {
yield basename($file)." #$index (line $pos)" => [$data, $fragment, $exp];
}
$l++;
$index++;
}
}
}

13
tests/cases/serializer/formatted/mensbeam01.dat

@ -0,0 +1,13 @@
#document
| <html>
#output
<html></html>
#document
| <!-- data -->
| <!DOCTYPE html>
| <html>
#output
<!--data-->
<!DOCTYPE html>
<html></html>

0
tests/cases/serializer/menbeam01.dat → tests/cases/serializer/standard/mensbeam01.dat

0
tests/cases/serializer/menbeam02.dat → tests/cases/serializer/standard/mensbeam02.dat

0
tests/cases/serializer/wpt01.dat → tests/cases/serializer/standard/wpt01.dat

Loading…
Cancel
Save