Browse Source

Test harness built for formatted serializing, fixed serializer bugs

wrapper-classes
Dustin Wilson 3 years ago
parent
commit
d742a9e0d7
  1. 88
      lib/Document.php
  2. 15
      lib/traits/Moonwalk.php
  3. 3
      lib/traits/ParentNode.php
  4. 212
      tests/cases/serializer/TestSerializer.php
  5. 13
      tests/cases/serializer/formatted/mensbeam01.dat
  6. 0
      tests/cases/serializer/standard/mensbeam01.dat
  7. 0
      tests/cases/serializer/standard/mensbeam02.dat
  8. 0
      tests/cases/serializer/standard/wpt01.dat

88
lib/Document.php

@ -399,7 +399,7 @@ class Document extends \DOMDocument {
$formatOutput = ($node->childElementCount > 0); $formatOutput = ($node->childElementCount > 0);
} }
return $this->serializeFragment($node, $formatOutput); return $this->serializeNode($node, $formatOutput);
} }
public function saveHTMLFile($filename): int { public function saveHTMLFile($filename): int {
@ -419,8 +419,8 @@ class Document extends \DOMDocument {
} }
protected function serializeBlockElementFilter(\DOMNode $ignoredNode): \Closure { protected function blockElementFilterFactory(\DOMNode $ignoredNode): \Closure {
$blockElementFilter = function($n) use ($ignoredNode) { return function($n) use ($ignoredNode) {
if (!$n->isSameNode($ignoredNode) && $n instanceof Element && $this->isHTMLNamespace($n) && (in_array($n->nodeName, self::BLOCK_ELEMENTS) || $n->walk(function($nn) { if (!$n->isSameNode($ignoredNode) && $n instanceof Element && $this->isHTMLNamespace($n) && (in_array($n->nodeName, self::BLOCK_ELEMENTS) || $n->walk(function($nn) {
if ($nn instanceof Element && $this->isHTMLNamespace($nn) && in_array($nn->nodeName, self::BLOCK_ELEMENTS)) { if ($nn instanceof Element && $this->isHTMLNamespace($nn) && in_array($nn->nodeName, self::BLOCK_ELEMENTS)) {
return true; return true;
@ -429,25 +429,22 @@ class Document extends \DOMDocument {
return true; return true;
} }
}; };
return $blockElementFilter;
} }
protected function serializeFragment(\DOMNode $node, bool $formatOutput = false): string { /**
if ($formatOutput) { * Recursively serializes nodes
// Stores the root foreign element when parsing its descendants *
static $foreignElement = null; * @param \DOMNode $node - The node to serialize
// Flag used if the root foreign element above has block element siblings * @param bool $formatOutput - Flag for formatting output
static $foreignElementWithBlockElementSiblings = false; * @param bool $first - True if the first run
// Stores the indention level * @param ?Element $foreignElement - Stores the root foreign element when parsing its descendants
static $indent = 0; * @param bool $foreignElementWithBlockElementSiblings - Flag used if the root foreign element above has block element siblings
// Stores the root preformatted element when parsing its descendants * @param int $indent - Stores the indention level
static $preformattedElement = null; * @param ?Element $preformattedElement - Stores the root preformatted element when parsing its descendants
// Stores the previous non text node name so it can be used to check for adding * @param ?string $previousNonTextNodeSiblingName - Stores the previous non text node name so it can be used to check for adding
// additional space. * additional space.
static $previousNonTextNodeSiblingName = null; */
} protected function serializeNode(\DOMNode $node, bool $formatOutput = false, bool $first = true, ?Element $foreignElement = null, bool $foreignElementWithBlockElementSiblings = false, int $indent = 0, ?Element $preformattedElement = null, ?string $previousNonTextNodeSiblingName = null): string {
# 13.3. Serializing HTML fragments # 13.3. Serializing HTML fragments
# #
# 1. If the node serializes as void, then return the empty string. # 1. If the node serializes as void, then return the empty string.
@ -474,7 +471,7 @@ class Document extends \DOMDocument {
if ($this->formatOutput) { if ($this->formatOutput) {
// Filter meant to be used with DOM walker generator methods which checks if // Filter meant to be used with DOM walker generator methods which checks if
// elements are block or if elements are inline with block descendants // elements are block or if elements are inline with block descendants
$blockElementFilter = self::serializeBlockElementFilter($currentNode->parentNode); $blockElementFilter = self::blockElementFilterFactory($currentNode->parentNode);
} }
# 2. Append the appropriate string from the following list to s: # 2. Append the appropriate string from the following list to s:
@ -492,7 +489,7 @@ class Document extends \DOMDocument {
} }
if ($formatOutput) { if ($formatOutput) {
$blockElementFilter = self::serializeBlockElementFilter($currentNode); $blockElementFilter = self::blockElementFilterFactory($currentNode);
$hasChildNodes = ($currentNode->hasChildNodes()); $hasChildNodes = ($currentNode->hasChildNodes());
$modify = false; $modify = false;
@ -529,7 +526,9 @@ class Document extends \DOMDocument {
$s .= "\n"; $s .= "\n";
} }
$s .= "\n" . str_repeat(' ', $indent); if (!$first) {
$s .= "\n" . str_repeat(' ', $indent);
}
} }
} }
@ -649,14 +648,14 @@ class Document extends \DOMDocument {
# current node element (thus recursing into this algorithm for that element), # current node element (thus recursing into this algorithm for that element),
# followed by a U+003C LESS-THAN SIGN character (<), a U+002F SOLIDUS character (/), # followed by a U+003C LESS-THAN SIGN character (<), a U+002F SOLIDUS character (/),
# tagname again, and finally a U+003E GREATER-THAN SIGN character (>). # tagname again, and finally a U+003E GREATER-THAN SIGN character (>).
$s .= $this->serializeFragment($currentNode, $formatOutput); $s .= $this->serializeNode($currentNode, $formatOutput, false, $foreignElement, $foreignElementWithBlockElementSiblings, $indent, $preformattedElement, $previousNonTextNodeSiblingName);
if ($formatOutput) { if ($formatOutput) {
if ($modify) { if ($modify) {
// Decrement the indention level. // Decrement the indention level.
$indent--; $indent--;
if ($preformattedElement === null) { if (!$first && $preformattedElement === null) {
// If a foreign element with a foreign element ancestor with block element // If a foreign element with a foreign element ancestor with block element
// siblings and has at least one element child or any element with a block // siblings and has at least one element child or any element with a block
// element descendant... // element descendant...
@ -722,15 +721,17 @@ class Document extends \DOMDocument {
elseif ($currentNode instanceof Comment) { elseif ($currentNode instanceof Comment) {
if ($formatOutput) { if ($formatOutput) {
if ($preformattedElement === null && $foreignElementWithBlockElementSiblings || $currentNode->parentNode->walk($blockElementFilter)->current() !== null) { if ($preformattedElement === null && $foreignElementWithBlockElementSiblings || $currentNode->parentNode->walk($blockElementFilter)->current() !== null) {
// Add an additional newline if the previous sibling wasn't a comment. if (!$first && $previousNonTextNodeSiblingName !== null) {
if ($previousNonTextNodeSiblingName !== null && $previousNonTextNodeSiblingName !== $this->nodeName) { // Add an additional newline if the previous sibling wasn't a comment.
$s .= "\n"; if ($previousNonTextNodeSiblingName !== $this->nodeName) {
} $s .= "\n";
}
$s .= "\n" . str_repeat(' ', $indent); $s .= "\n" . str_repeat(' ', $indent);
}
} }
$previousNonTextNodeSiblingName = $this->nodeName; $previousNonTextNodeSiblingName = $currentNode->nodeName;
} }
# Append the literal string "<!--" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION # Append the literal string "<!--" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION
@ -742,17 +743,19 @@ class Document extends \DOMDocument {
# If current node is a ProcessingInstruction # If current node is a ProcessingInstruction
elseif ($currentNode instanceof ProcessingInstruction) { elseif ($currentNode instanceof ProcessingInstruction) {
if ($formatOutput) { if ($formatOutput) {
if ($preformattedElement === null && $foreignElementWithBlockElementSiblings || $currentNode->parentNode->walk($blockElementFilter)->current() !== null) { if (!$first && $preformattedElement === null && ($foreignElementWithBlockElementSiblings || $currentNode->parentNode->walk($blockElementFilter)->current() !== null)) {
// Add an additional newline if the previous sibling wasn't a processing // Add an additional newline if the previous sibling wasn't a processing
// instruction. // instruction.
if ($previousNonTextNodeSiblingName !== null && $previousNonTextNodeSiblingName !== $this->nodeName) { if ($previousNonTextNodeSiblingName !== null) {
$s .= "\n"; if ($previousNonTextNodeSiblingName !== $this->nodeName) {
} $s .= "\n";
}
$s .= "\n" . str_repeat(' ', $indent); $s .= "\n" . str_repeat(' ', $indent);
}
} }
$previousNonTextNodeSiblingName = $this->nodeName; $previousNonTextNodeSiblingName = $currentNode->nodeName;
} }
# Append the literal string "<?" (U+003C LESS-THAN SIGN, U+003F QUESTION MARK), # Append the literal string "<?" (U+003C LESS-THAN SIGN, U+003F QUESTION MARK),
@ -773,8 +776,19 @@ class Document extends \DOMDocument {
// DEVIATION: The name is trimmed because PHP's DOM does not // DEVIATION: The name is trimmed because PHP's DOM does not
// accept the empty string as a DOCTYPE name // accept the empty string as a DOCTYPE name
$name = trim($currentNode->name, ' '); $name = trim($currentNode->name, ' ');
if ($formatOutput) {
if ($previousNonTextNodeSiblingName !== null) {
$s .= "\n" . str_repeat(' ', $indent);
}
$previousNonTextNodeSiblingName = $currentNode->nodeName;
}
$s .= "<!DOCTYPE $name>"; $s .= "<!DOCTYPE $name>";
} }
$first = false;
} }
# 5. Return s. # 5. Return s.

15
lib/traits/Moonwalk.php

@ -41,17 +41,12 @@ trait Moonwalk {
} }
} }
// If node is an instance of DocumentFragment then it might be the content // If node is an instance of DocumentFragment then set the node to its host if
// fragment of a template element, so iterate through all template elements // it isn't null.
// stored in the element map and see if node is the fragment of one of the
// templates; if it is change node to the template element and reprocess. Magic!
// Can walk backwards THROUGH templates!
if ($node instanceof DocumentFragment) { if ($node instanceof DocumentFragment) {
foreach (ElementMap::getIterator($node->ownerDocument) as $element) { $host = $node->host;
if ($element->ownerDocument->isSameNode($node->ownerDocument) && $element instanceof TemplateElement && $element->content->isSameNode($node)) { if ($host !== null) {
$node = $element; $node = $host;
continue;
}
} }
} }

3
lib/traits/ParentNode.php

@ -21,8 +21,7 @@ trait ParentNode {
// almost identical; so, using that. PHP's DOM doesn't provide the end user any // almost identical; so, using that. PHP's DOM doesn't provide the end user any
// way to create a \DOMNodeList from scratch, so going to cheat and use XPath to // way to create a \DOMNodeList from scratch, so going to cheat and use XPath to
// make one for us. // make one for us.
$isDocument = ($this instanceof Document); $document = ($this instanceof Document) ? $this : $this->ownerDocument;
$document = ($isDocument) ? $this : $this->ownerDocument;
return $document->xpath->query('child::*', $this); return $document->xpath->query('child::*', $this);
} }

212
tests/cases/serializer/TestSerializer.php

@ -25,11 +25,22 @@ use MensBeam\HTML\Parser;
* @covers \MensBeam\HTML\DOM\ToString * @covers \MensBeam\HTML\DOM\ToString
*/ */
class TestSerializer extends \PHPUnit\Framework\TestCase { class TestSerializer extends \PHPUnit\Framework\TestCase {
public function provideStandardTreeTests(): iterable {
$blacklist = [];
$files = new \AppendIterator();
$files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/cases/Serializer/standard/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
foreach ($files as $file) {
if (!in_array(basename($file), $blacklist)) {
yield from $this->parseTreeTestFile($file);
}
}
}
/** /**
* @dataProvider provideStandardSerializerTests * @dataProvider provideStandardTreeTests
* @covers \MensBeam\HTML\DOM\Document::saveHTML * @covers \MensBeam\HTML\DOM\Document::saveHTML
* @covers \MensBeam\HTML\DOM\Document::serializeBlockElementFilter * @covers \MensBeam\HTML\DOM\Document::blockElementFilterFactory
* @covers \MensBeam\HTML\DOM\Document::serializeFragment * @covers \MensBeam\HTML\DOM\Document::serializeNode
* @covers \MensBeam\HTML\DOM\Document::__toString * @covers \MensBeam\HTML\DOM\Document::__toString
* @covers \MensBeam\HTML\DOM\ToString::__toString * @covers \MensBeam\HTML\DOM\ToString::__toString
*/ */
@ -38,62 +49,75 @@ class TestSerializer extends \PHPUnit\Framework\TestCase {
$this->assertSame($exp, (string)$node); $this->assertSame($exp, (string)$node);
} }
public function provideStandardSerializerTests(): iterable {
$blacklist = []; public function provideFormattedTreeTests(): iterable {
$files = new \AppendIterator(); $files = new \AppendIterator();
$files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/cases/Serializer/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME)); $files->append(new \GlobIterator(\MensBeam\HTML\DOM\BASE."tests/cases/Serializer/formatted/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
foreach ($files as $file) { foreach ($files as $file) {
$index = 0; yield from $this->parseTreeTestFile($file);
$l = 0;
if (!in_array(basename($file), $blacklist)) {
$lines = array_map(function($v) {
return rtrim($v, "\n");
}, file($file));
while ($l < sizeof($lines)) {
$pos = $l + 1;
assert(in_array($lines[$l], ["#document", "#fragment"]), new \Exception("Test $file #$index does not start with #doocument or #fragment tag at line ".($l + 1)));
$fragment = $lines[$l] === "#fragment";
// collect the test input
$data = [];
for (++$l; $l < sizeof($lines); $l++) {
if (preg_match('/^#(script-(on|off)|output)$/', $lines[$l])) {
break;
}
$data[] = $lines[$l];
}
// set the script mode, if present
assert(preg_match('/^#(script-(on|off)|output)$/', $lines[$l]) === 1, new \Exception("Test $file #$index follows data with something other than script flag or output at line ".($l + 1)));
$script = null;
if ($lines[$l] === "#script-off") {
$script = false;
$l++;
} elseif ($lines[$l] === "#script-on") {
$script = true;
$l++;
}
// collect the output string
$exp = [];
assert($lines[$l] === "#output", new \Exception("Test $file #$index follows input with something other than output at line ".($l + 1)));
for (++$l; $l < sizeof($lines); $l++) {
if ($lines[$l] === "" && in_array(($lines[$l + 1] ?? ""), ["#document", "#fragment"])) {
break;
}
assert(preg_match('/^[^#]/', $lines[$l]) === 1, new \Exception("Test $file #$index contains unrecognized data after output at line ".($l + 1)));
$exp[] = $lines[$l];
}
$exp = implode("\n", $exp);
if (!$script) {
yield basename($file)." #$index (line $pos)" => [$data, $fragment, $exp];
}
$l++;
$index++;
}
}
} }
} }
protected function buildTree(array $data, bool $fragment): \DOMNode { /**
* @dataProvider provideFormattedTreeTests
* @covers \MensBeam\HTML\DOM\Document::saveHTML
* @covers \MensBeam\HTML\DOM\Document::blockElementFilterFactory
* @covers \MensBeam\HTML\DOM\Document::serializeNode
* @covers \MensBeam\HTML\DOM\Document::__toString
* @covers \MensBeam\HTML\DOM\ToString::__toString
*/
public function testFormattedTreeTests(array $data, bool $fragment, string $exp): void {
$node = $this->buildTree($data, $fragment, true);
$this->assertSame($exp, (string)$node);
}
/**
* @covers \MensBeam\HTML\DOM\Document::saveHTML
* @covers \MensBeam\HTML\DOM\Document::serializeNode
*/
public function testSerializingDocumentType(): void {
$d = new Document();
$dt = $d->implementation->createDocumentType('ook', 'eek', 'ack');
$d->appendChild($dt);
$this->assertSame('<!DOCTYPE ook>', $d->saveHTML($dt));
}
/**
* @covers \MensBeam\HTML\DOM\Document::saveHTML
* @covers \MensBeam\HTML\DOM\Document::serializeNode
* @covers \MensBeam\HTML\DOM\ToString::__toString
*/
public function testSerializingElements(): void {
$d = new Document();
$i = $d->createElement('input');
$i->appendChild($d->createTextNode('You should not see this text'));
$this->assertSame('<input>', (string)$i);
$this->assertSame('', $d->saveHTML($i));
$t = $d->createElement('template');
$t->content->appendChild($d->createTextNode('Ook!'));
$this->assertSame('<template>Ook!</template>', (string)$t);
$this->assertSame('Ook!', $d->saveHTML($t));
}
/** @covers \MensBeam\HTML\DOM\Document::saveHTML */
public function testSerializerFailure(): void {
$this->expectException(DOMException::class);
$this->expectExceptionCode(DOMException::WRONG_DOCUMENT);
$d = new Document();
$h = $d->createElement('html');
$d2 = new Document();
$d2->saveHTML($h);
}
protected function buildTree(array $data, bool $fragment, bool $formatOutput = false): \DOMNode {
$document = new Document; $document = new Document;
$document->formatOutput = $formatOutput;
if ($fragment) { if ($fragment) {
$document->appendChild($document->createElement("html")); $document->appendChild($document->createElement("html"));
$out = $document->createDocumentFragment(); $out = $document->createDocumentFragment();
@ -154,42 +178,50 @@ class TestSerializer extends \PHPUnit\Framework\TestCase {
return $out; return $out;
} }
protected function parseTreeTestFile(string $file): \Generator {
/** @covers \MensBeam\HTML\DOM\Document::saveHTML */ $index = 0;
public function testSerializingDocumentType(): void { $l = 0;
$d = new Document(); $lines = array_map(function($v) {
$dt = $d->implementation->createDocumentType('ook', 'eek', 'ack'); return rtrim($v, "\n");
$d->appendChild($dt); }, file($file));
$this->assertSame('<!DOCTYPE ook>', $d->saveHTML($dt)); while ($l < sizeof($lines)) {
} $pos = $l + 1;
assert(in_array($lines[$l], ["#document", "#fragment"]), new \Exception("Test $file #$index does not start with #document or #fragment tag at line ".($l + 1)));
$fragment = $lines[$l] === "#fragment";
/** // collect the test input
* @covers \MensBeam\HTML\DOM\Document::saveHTML $data = [];
* @covers \MensBeam\HTML\DOM\Document::serializeFragment for (++$l; $l < sizeof($lines); $l++) {
* @covers \MensBeam\HTML\DOM\ToString::__toString if (preg_match('/^#(script-(on|off)|output)$/', $lines[$l])) {
*/ break;
public function testSerializingElements(): void { }
$d = new Document(); $data[] = $lines[$l];
$i = $d->createElement('input'); }
$i->appendChild($d->createTextNode('You should not see this text')); // set the script mode, if present
$this->assertSame('<input>', (string)$i); assert(preg_match('/^#(script-(on|off)|output)$/', $lines[$l]) === 1, new \Exception("Test $file #$index follows data with something other than script flag or output at line ".($l + 1)));
$this->assertSame('', $d->saveHTML($i)); $script = null;
if ($lines[$l] === "#script-off") {
$t = $d->createElement('template'); $script = false;
$t->content->appendChild($d->createTextNode('Ook!')); $l++;
$this->assertSame('<template>Ook!</template>', (string)$t); } elseif ($lines[$l] === "#script-on") {
$this->assertSame('Ook!', $d->saveHTML($t)); $script = true;
} $l++;
}
// collect the output string
/** @covers \MensBeam\HTML\DOM\Document::saveHTML */ $exp = [];
public function testSerializerFailure(): void { assert($lines[$l] === "#output", new \Exception("Test $file #$index follows input with something other than output at line ".($l + 1)));
$this->expectException(DOMException::class); for (++$l; $l < sizeof($lines); $l++) {
$this->expectExceptionCode(DOMException::WRONG_DOCUMENT); if ($lines[$l] === "" && in_array(($lines[$l + 1] ?? ""), ["#document", "#fragment"])) {
$d = new Document(); break;
$h = $d->createElement('html'); }
$d2 = new Document(); assert(preg_match('/^[^#]/', $lines[$l]) === 1, new \Exception("Test $file #$index contains unrecognized data after output at line ".($l + 1)));
$d2->saveHTML($h); $exp[] = $lines[$l];
}
$exp = implode("\n", $exp);
if (!$script) {
yield basename($file)." #$index (line $pos)" => [$data, $fragment, $exp];
}
$l++;
$index++;
}
} }
} }

13
tests/cases/serializer/formatted/mensbeam01.dat

@ -0,0 +1,13 @@
#document
| <html>
#output
<html></html>
#document
| <!-- data -->
| <!DOCTYPE html>
| <html>
#output
<!--data-->
<!DOCTYPE html>
<html></html>

0
tests/cases/serializer/menbeam01.dat → tests/cases/serializer/standard/mensbeam01.dat

0
tests/cases/serializer/menbeam02.dat → tests/cases/serializer/standard/mensbeam02.dat

0
tests/cases/serializer/wpt01.dat → tests/cases/serializer/standard/wpt01.dat

Loading…
Cancel
Save