Browse Source

Re-import of serializer tests

Some errors remain
serialize
J. King 3 years ago
parent
commit
b8d4636664
  1. 160
      tests/cases/TestSerializer.php
  2. 23
      tests/cases/serializer/README.md
  3. 33
      tests/cases/serializer/mensbeam01.dat
  4. 34
      tests/cases/serializer/mensbeam02.dat
  5. 913
      tests/cases/serializer/wpt01.dat
  6. 3
      tests/phpunit.dist.xml

160
tests/cases/TestSerializer.php

@ -0,0 +1,160 @@
<?php
/**
* @license MIT
* Copyright 2017, Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM\TestCase;
use MensBeam\HTML\DOM\{
Document,
DOMException,
Exception
};
use MensBeam\HTML\Parser;
use MensBeam\HTML\Parser\Serializer;
/**
* @covers \MensBeam\HTML\DOM\Comment
* @covers \MensBeam\HTML\DOM\Document
* @covers \MensBeam\HTML\DOM\DocumentFragment
* @covers \MensBeam\HTML\DOM\Element
* @covers \MensBeam\HTML\DOM\HTMLTemplateElement
* @covers \MensBeam\HTML\DOM\ProcessingInstruction
* @covers \MensBeam\HTML\DOM\Text
* @covers \MensBeam\HTML\DOM\ToString
*/
class TestSerializer extends \PHPUnit\Framework\TestCase {
public function provideStandardTreeTests(): iterable {
$blacklist = [];
$files = new \AppendIterator();
$files->append(new \GlobIterator(\MensBeam\HTML\Parser\BASE."tests/cases/serializer/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
foreach ($files as $file) {
if (!in_array(basename($file), $blacklist)) {
yield from $this->parseTreeTestFile($file);
}
}
}
/**
* @dataProvider provideStandardTreeTests
* @covers \MensBeam\HTML\Parser\Serializer
*/
public function testStandardTreeTests(array $data, bool $fragment, string $exp): void {
$node = $this->buildTree($data, $fragment);
$this->assertSame($exp, Serializer::serializeOuter($node));
}
protected function buildTree(array $data, bool $fragment, bool $formatOutput = false): \DOMNode {
$document = new \DOMDocument;
$document->formatOutput = $formatOutput;
if ($fragment) {
$document->appendChild($document->createElement("html"));
$out = $document->createDocumentFragment();
} else {
$out = $document;
}
$cur = $out;
$pad = 2;
// process each line in turn
for ($l = 0; $l < sizeof($data); $l++) {
preg_match('/^(\|\s+)(.+)/', $data[$l], $m);
// pop any parents as long as the padding of the line is less than the expected padding
$p = strlen((string) $m[1]);
assert($p >= 2 && $p <= $pad && !($p % 2), new \Exception("Input data is invalid on line ".($l + 1)));
while ($p < $pad) {
$pad -= 2;
$cur = $cur->parentNode;
}
// act based upon what the rest of the line looks like
$d = $m[2];
if (preg_match('/^<!-- (.*?) -->$/', $d, $m)) {
// comment
$cur->appendChild($document->createComment($m[1]));
} elseif (preg_match('/^<!DOCTYPE(?: ([^ >]*)(?: "([^"]*)" "([^"]*)")?)?>$/', $d, $m)) {
// doctype
$name = strlen((string) ($m[1] ?? "")) ? $m[1] : " ";
$public = strlen((string) ($m[2] ?? "")) ? $m[2] : "";
$system = strlen((string) ($m[3] ?? "")) ? $m[3] : "";
$cur->appendChild($document->implementation->createDocumentType($name, $public, $system));
} elseif (preg_match('/^<\?([^ ]+) ([^>]*)>$/', $d, $m)) {
// processing instruction
$cur->appendChild($document->createProcessingInstruction($m[1], $m[2]));
} elseif (preg_match('/^<(?:([^ ]+) )?([^>]+)>$/', $d, $m)) {
// element
$ns = strlen((string) $m[1]) ? (array_flip(Parser::NAMESPACE_MAP)[$m[1]] ?? $m[1]) : null;
$cur = $cur->appendChild($document->createElementNS($ns, $m[2]));
$pad += 2;
} elseif (preg_match('/^(?:([^" ]+) )?([^"=]+)="((?:[^"]|"(?!$))*)"$/', $d, $m)) {
// attribute
$ns = strlen((string) $m[1]) ? (array_flip(Parser::NAMESPACE_MAP)[$m[1]] ?? $m[1]) : "";
if ($ns === '') {
$cur->setAttribute($m[2], $m[3]);
} else {
$cur->setAttributeNS($ns, $m[2], $m[3]);
}
} elseif (preg_match('/^"((?:[^"]|"(?!$))*)("?)$/', $d, $m)) {
// text
$t = $m[1];
while (!strlen((string) $m[2])) {
preg_match('/^((?:[^"]|"(?!$))*)("?)$/', $data[++$l], $m);
$t .= "\n".$m[1];
}
$cur->appendChild($document->createTextNode($t));
} else {
throw new \Exception("Input data is invalid on line ".($l + 1));
}
}
return $out;
}
protected function parseTreeTestFile(string $file): \Generator {
$index = 0;
$l = 0;
$lines = array_map(function($v) {
return rtrim($v, "\n");
}, file($file));
while ($l < sizeof($lines)) {
$pos = $l + 1;
assert(in_array($lines[$l], ["#document", "#fragment"]), new \Exception("Test $file #$index does not start with #document or #fragment tag at line ".($l + 1)));
$fragment = $lines[$l] === "#fragment";
// collect the test input
$data = [];
for (++$l; $l < sizeof($lines); $l++) {
if (preg_match('/^#(script-(on|off)|output)$/', $lines[$l])) {
break;
}
$data[] = $lines[$l];
}
// set the script mode, if present
assert(preg_match('/^#(script-(on|off)|output)$/', $lines[$l]) === 1, new \Exception("Test $file #$index follows data with something other than script flag or output at line ".($l + 1)));
$script = null;
if ($lines[$l] === "#script-off") {
$script = false;
$l++;
} elseif ($lines[$l] === "#script-on") {
$script = true;
$l++;
}
// collect the output string
$exp = [];
assert($lines[$l] === "#output", new \Exception("Test $file #$index follows input with something other than output at line ".($l + 1)));
for (++$l; $l < sizeof($lines); $l++) {
if ($lines[$l] === "" && in_array(($lines[$l + 1] ?? ""), ["#document", "#fragment"])) {
break;
}
assert(preg_match('/^([^#]|$)/', $lines[$l]) === 1, new \Exception("Test $file #$index contains unrecognized data after output at line ".($l + 1)));
$exp[] = $lines[$l];
}
$exp = implode("\n", $exp);
if (!$script) {
yield basename($file)." #$index (line $pos)" => [$data, $fragment, $exp];
}
$l++;
$index++;
}
}
}

23
tests/cases/serializer/README.md

@ -0,0 +1,23 @@
HTML DOM serialization tests
============================
The format of these tests is essentially the format of html5lib's tree construction tests in reverse. There are, however, important differences, so the format is documented in full here.
Each file containing tree construction tests consists of any number of
tests separated by two newlines (LF) and a single newline before the end
of the file. For instance:
[TEST]LF
LF
[TEST]LF
LF
[TEST]LF
Where [TEST] is the following format:
Each test begins with a line reading "#document" or "#fragment"; subsequent
lines represent the document or document fragment (respectively) used as
input, until a line is encountered which reads "#output", "#script-on",
or "#script-off".

33
tests/cases/serializer/mensbeam01.dat

@ -0,0 +1,33 @@
#fragment
| <fake_ns test:test>
#output
<test:test></test:test>
#fragment
| <span>
| test💩test="test"
#output
<span test💩test="test"></span>
#fragment
| <wbr>
| "You should not see this text."
#output
<wbr>
#fragment
| <wbr>
| class="test"
#output
<wbr class="test">
#fragment
| <poop💩>
#output
<poop💩></poop💩>
#fragment
| <test>
| poop💩="soccer"
#output
<test poop💩="soccer"></test>

34
tests/cases/serializer/mensbeam02.dat

@ -0,0 +1,34 @@
#document
| <html>
#output
<html></html>
#document
| <!DOCTYPE html>
| <html>
#output
<!DOCTYPE html><html></html>
#document
| <!DOCTYPE html "public" "system">
| <html>
#output
<!DOCTYPE html><html></html>
#document
| <!DOCTYPE test>
| <html>
#output
<!DOCTYPE test><html></html>
#document
| <!DOCTYPE>
| <html>
#output
<!DOCTYPE ><html></html>
#document
| <html>
| <?php echo "Hello world!"; ?>
#output
<html><?php echo "Hello world!"; ?></html>

913
tests/cases/serializer/wpt01.dat

@ -0,0 +1,913 @@
#fragment
| <span>
#output
<span></span>
#fragment
| <span>
| <a>
#output
<span><a></a></span>
#fragment
| <span>
| <a>
| b="c"
#output
<span><a b="c"></a></span>
#fragment
| <span>
| <a>
| b="&"
#output
<span><a b="&amp;"></a></span>
#fragment
| <span>
| <a>
| b=" "
#output
<span><a b="&nbsp;"></a></span>
#fragment
| <span>
| <a>
| b="""
#output
<span><a b="&quot;"></a></span>
#fragment
| <span>
| <a>
| b="<"
#output
<span><a b="<"></a></span>
#fragment
| <span>
| <a>
| b=">"
#output
<span><a b=">"></a></span>
#fragment
| <span>
| <a>
| href="javascript:"<>""
#output
<span><a href="javascript:&quot;<>&quot;"></a></span>
#fragment
| <span>
| <svg svg>
| xlink xlink:href="a"
#output
<span><svg xlink:href="a"></svg></span>
#fragment
| <span>
| <svg svg>
| xmlns xmlns:svg="test"
#output
<span><svg xmlns:svg="test"></svg></span>
#fragment
| <span>
| "a"
#output
<span>a</span>
#fragment
| <span>
| "&"
#output
<span>&amp;</span>
#fragment
| <span>
| " "
#output
<span>&nbsp;</span>
#fragment
| <span>
| "<"
#output
<span>&lt;</span>
#fragment
| <span>
| ">"
#output
<span>&gt;</span>
#fragment
| <span>
| """
#output
<span>"</span>
#fragment
| <span>
| <style>
| "<&>"
#output
<span><style><&></style></span>
#fragment
| <span>
| <script>
| type="test"
| "<&>"
#output
<span><script type="test"><&></script></span>
#fragment
| <script>
| type="test"
| "<&>"
#output
<script type="test"><&></script>
#fragment
| <span>
| <xmp>
| "<&>"
#output
<span><xmp><&></xmp></span>
#fragment
| <span>
| <iframe>
| "<&>"
#output
<span><iframe><&></iframe></span>
#fragment
| <span>
| <noembed>
| "<&>"
#output
<span><noembed><&></noembed></span>
#fragment
| <span>
| <noframes>
| "<&>"
#output
<span><noframes><&></noframes></span>
#fragment
| <span>
| <noscript>
| "<&>"
#script-off
#output
<span><noscript>&lt;&amp;&gt;</noscript></span>
#fragment
| <span>
| <noscript>
| "<&>"
#script-on
#output
<span><noscript><&></noscript></span>
#fragment
| <span>
| <!-- data -->
#output
<span><!--data--></span>
#fragment
| <span>
| <a>
| <b>
| <c>
| <d>
| "e"
| <f>
| <g>
| "h"
#output
<span><a><b><c></c></b><d>e</d><f><g>h</g></f></a></span>
#fragment
| <span>
| b="c"
#output
<span b="c"></span>
#fragment
| <span>
| <svg svg>
| xml xml:foo="test"
#output
<span><svg xml:foo="test"></svg></span>
#fragment
| <span>
| <svg svg>
| xml abc:foo="test"
#output
<span><svg xml:foo="test"></svg></span>
#fragment
| <span>
| <svg svg>
| xmlns xmlns:foo="test"
#output
<span><svg xmlns:foo="test"></svg></span>
#fragment
| <span>
| <svg svg>
| xmlns xmlns="test"
#output
<span><svg xmlns="test"></svg></span>
#fragment
| <span>
| <svg svg>
| fake_ns abc:def="test"
#output
<span><svg abc:def="test"></svg></span>
#fragment
| <pre>
| "
"
#output
<pre>
</pre>
#fragment
| <pre>
| "a
"
#output
<pre>a
</pre>
#fragment
| <span>
| <pre>
| "
"
#output
<span><pre>
</pre></span>
#fragment
| <span>
| <pre>
| "a
"
#output
<span><pre>a
</pre></span>
#fragment
| <textarea>
| "
"
#output
<textarea>
</textarea>
#fragment
| <textarea>
| "a
"
#output
<textarea>a
</textarea>
#fragment
| <span>
| <textarea>
| "
"
#output
<span><textarea>
</textarea></span>
#fragment
| <span>
| <textarea>
| "a
"
#output
<span><textarea>a
</textarea></span>
#fragment
| <listing>
| "
"
#output
<listing>
</listing>
#fragment
| <listing>
| "a
"
#output
<listing>a
</listing>
#fragment
| <span>
| <listing>
| "
"
#output
<span><listing>
</listing></span>
#fragment
| <span>
| <listing>
| "a
"
#output
<span><listing>a
</listing></span>
#fragment
| <area>
#output
<area>
#fragment
| <span>
| <area>
| <a>
| "test"
| <b>
#output
<span><area><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <area>
| <b>
#output
<span><a>test</a><area><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <area>
#output
<span><a>test</a><b></b><area></span>
#fragment
| <base>
#output
<base>
#fragment
| <span>
| <base>
| <a>
| "test"
| <b>
#output
<span><base><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <base>
| <b>
#output
<span><a>test</a><base><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <base>
#output
<span><a>test</a><b></b><base></span>
#fragment
| <basefont>
#output
<basefont>
#fragment
| <span>
| <basefont>
| <a>
| "test"
| <b>
#output
<span><basefont><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <basefont>
| <b>
#output
<span><a>test</a><basefont><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <basefont>
#output
<span><a>test</a><b></b><basefont></span>
#fragment
| <bgsound>
#output
<bgsound>
#fragment
| <span>
| <bgsound>
| <a>
| "test"
| <b>
#output
<span><bgsound><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <bgsound>
| <b>
#output
<span><a>test</a><bgsound><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <bgsound>
#output
<span><a>test</a><b></b><bgsound></span>
#fragment
| <br>
#output
<br>
#fragment
| <span>
| <br>
| <a>
| "test"
| <b>
#output
<span><br><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <br>
| <b>
#output
<span><a>test</a><br><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <br>
#output
<span><a>test</a><b></b><br></span>
#fragment
| <col>
#output
<col>
#fragment
| <span>
| <col>
| <a>
| "test"
| <b>
#output
<span><col><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <col>
| <b>
#output
<span><a>test</a><col><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <col>
#output
<span><a>test</a><b></b><col></span>
#fragment
| <embed>
#output
<embed>
#fragment
| <span>
| <embed>
| <a>
| "test"
| <b>
#output
<span><embed><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <embed>
| <b>
#output
<span><a>test</a><embed><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <embed>
#output
<span><a>test</a><b></b><embed></span>
#fragment
| <frame>
#output
<frame>
#fragment
| <span>
| <frame>
| <a>
| "test"
| <b>
#output
<span><frame><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <frame>
| <b>
#output
<span><a>test</a><frame><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <frame>
#output
<span><a>test</a><b></b><frame></span>
#fragment
| <hr>
#output
<hr>
#fragment
| <span>
| <hr>
| <a>
| "test"
| <b>
#output
<span><hr><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <hr>
| <b>
#output
<span><a>test</a><hr><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <hr>
#output
<span><a>test</a><b></b><hr></span>
#fragment
| <img>
#output
<img>
#fragment
| <span>
| <img>
| <a>
| "test"
| <b>
#output
<span><img><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <img>
| <b>
#output
<span><a>test</a><img><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <img>
#output
<span><a>test</a><b></b><img></span>
#fragment
| <input>
#output
<input>
#fragment
| <span>
| <input>
| <a>
| "test"
| <b>
#output
<span><input><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <input>
| <b>
#output
<span><a>test</a><input><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <input>
#output
<span><a>test</a><b></b><input></span>
#fragment
| <keygen>
#output
<keygen>
#fragment
| <span>
| <keygen>
| <a>
| "test"
| <b>
#output
<span><keygen><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <keygen>
| <b>
#output
<span><a>test</a><keygen><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <keygen>
#output
<span><a>test</a><b></b><keygen></span>
#fragment
| <link>
#output
<link>
#fragment
| <span>
| <link>
| <a>
| "test"
| <b>
#output
<span><link><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <link>
| <b>
#output
<span><a>test</a><link><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <link>
#output
<span><a>test</a><b></b><link></span>
#fragment
| <meta>
#output
<meta>
#fragment
| <span>
| <meta>
| <a>
| "test"
| <b>
#output
<span><meta><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <meta>
| <b>
#output
<span><a>test</a><meta><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <meta>
#output
<span><a>test</a><b></b><meta></span>
#fragment
| <param>
#output
<param>
#fragment
| <span>
| <param>
| <a>
| "test"
| <b>
#output
<span><param><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <param>
| <b>
#output
<span><a>test</a><param><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <param>
#output
<span><a>test</a><b></b><param></span>
#fragment
| <source>
#output
<source>
#fragment
| <span>
| <source>
| <a>
| "test"
| <b>
#output
<span><source><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <source>
| <b>
#output
<span><a>test</a><source><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <source>
#output
<span><a>test</a><b></b><source></span>
#fragment
| <track>
#output
<track>
#fragment
| <span>
| <track>
| <a>
| "test"
| <b>
#output
<span><track><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <track>
| <b>
#output
<span><a>test</a><track><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <track>
#output
<span><a>test</a><b></b><track></span>
#fragment
| <wbr>
#output
<wbr>
#fragment
| <span>
| <wbr>
| <a>
| "test"
| <b>
#output
<span><wbr><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <wbr>
| <b>
#output
<span><a>test</a><wbr><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <wbr>
#output
<span><a>test</a><b></b><wbr></span>

3
tests/phpunit.dist.xml

@ -28,6 +28,9 @@
<testsuite name="Parser">
<file>cases/TestParser.php</file>
</testsuite>
<testsuite name="Serializer">
<file>cases/TestSerializer.php</file>
</testsuite>
<testsuite name="Encoding change">
<file>cases/TestEncodingChange.php</file>
</testsuite>

Loading…
Cancel
Save