Re-import of serializer tests

Some errors remain
This commit is contained in:
J. King 2021-10-14 15:44:14 -04:00
parent 180dcd3e51
commit b8d4636664
6 changed files with 1166 additions and 0 deletions

View file

@ -0,0 +1,160 @@
<?php
/**
* @license MIT
* Copyright 2017, Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details
*/
declare(strict_types=1);
namespace MensBeam\HTML\DOM\TestCase;
use MensBeam\HTML\DOM\{
Document,
DOMException,
Exception
};
use MensBeam\HTML\Parser;
use MensBeam\HTML\Parser\Serializer;
/**
* @covers \MensBeam\HTML\DOM\Comment
* @covers \MensBeam\HTML\DOM\Document
* @covers \MensBeam\HTML\DOM\DocumentFragment
* @covers \MensBeam\HTML\DOM\Element
* @covers \MensBeam\HTML\DOM\HTMLTemplateElement
* @covers \MensBeam\HTML\DOM\ProcessingInstruction
* @covers \MensBeam\HTML\DOM\Text
* @covers \MensBeam\HTML\DOM\ToString
*/
class TestSerializer extends \PHPUnit\Framework\TestCase {
public function provideStandardTreeTests(): iterable {
$blacklist = [];
$files = new \AppendIterator();
$files->append(new \GlobIterator(\MensBeam\HTML\Parser\BASE."tests/cases/serializer/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
foreach ($files as $file) {
if (!in_array(basename($file), $blacklist)) {
yield from $this->parseTreeTestFile($file);
}
}
}
/**
* @dataProvider provideStandardTreeTests
* @covers \MensBeam\HTML\Parser\Serializer
*/
public function testStandardTreeTests(array $data, bool $fragment, string $exp): void {
$node = $this->buildTree($data, $fragment);
$this->assertSame($exp, Serializer::serializeOuter($node));
}
protected function buildTree(array $data, bool $fragment, bool $formatOutput = false): \DOMNode {
$document = new \DOMDocument;
$document->formatOutput = $formatOutput;
if ($fragment) {
$document->appendChild($document->createElement("html"));
$out = $document->createDocumentFragment();
} else {
$out = $document;
}
$cur = $out;
$pad = 2;
// process each line in turn
for ($l = 0; $l < sizeof($data); $l++) {
preg_match('/^(\|\s+)(.+)/', $data[$l], $m);
// pop any parents as long as the padding of the line is less than the expected padding
$p = strlen((string) $m[1]);
assert($p >= 2 && $p <= $pad && !($p % 2), new \Exception("Input data is invalid on line ".($l + 1)));
while ($p < $pad) {
$pad -= 2;
$cur = $cur->parentNode;
}
// act based upon what the rest of the line looks like
$d = $m[2];
if (preg_match('/^<!-- (.*?) -->$/', $d, $m)) {
// comment
$cur->appendChild($document->createComment($m[1]));
} elseif (preg_match('/^<!DOCTYPE(?: ([^ >]*)(?: "([^"]*)" "([^"]*)")?)?>$/', $d, $m)) {
// doctype
$name = strlen((string) ($m[1] ?? "")) ? $m[1] : " ";
$public = strlen((string) ($m[2] ?? "")) ? $m[2] : "";
$system = strlen((string) ($m[3] ?? "")) ? $m[3] : "";
$cur->appendChild($document->implementation->createDocumentType($name, $public, $system));
} elseif (preg_match('/^<\?([^ ]+) ([^>]*)>$/', $d, $m)) {
// processing instruction
$cur->appendChild($document->createProcessingInstruction($m[1], $m[2]));
} elseif (preg_match('/^<(?:([^ ]+) )?([^>]+)>$/', $d, $m)) {
// element
$ns = strlen((string) $m[1]) ? (array_flip(Parser::NAMESPACE_MAP)[$m[1]] ?? $m[1]) : null;
$cur = $cur->appendChild($document->createElementNS($ns, $m[2]));
$pad += 2;
} elseif (preg_match('/^(?:([^" ]+) )?([^"=]+)="((?:[^"]|"(?!$))*)"$/', $d, $m)) {
// attribute
$ns = strlen((string) $m[1]) ? (array_flip(Parser::NAMESPACE_MAP)[$m[1]] ?? $m[1]) : "";
if ($ns === '') {
$cur->setAttribute($m[2], $m[3]);
} else {
$cur->setAttributeNS($ns, $m[2], $m[3]);
}
} elseif (preg_match('/^"((?:[^"]|"(?!$))*)("?)$/', $d, $m)) {
// text
$t = $m[1];
while (!strlen((string) $m[2])) {
preg_match('/^((?:[^"]|"(?!$))*)("?)$/', $data[++$l], $m);
$t .= "\n".$m[1];
}
$cur->appendChild($document->createTextNode($t));
} else {
throw new \Exception("Input data is invalid on line ".($l + 1));
}
}
return $out;
}
protected function parseTreeTestFile(string $file): \Generator {
$index = 0;
$l = 0;
$lines = array_map(function($v) {
return rtrim($v, "\n");
}, file($file));
while ($l < sizeof($lines)) {
$pos = $l + 1;
assert(in_array($lines[$l], ["#document", "#fragment"]), new \Exception("Test $file #$index does not start with #document or #fragment tag at line ".($l + 1)));
$fragment = $lines[$l] === "#fragment";
// collect the test input
$data = [];
for (++$l; $l < sizeof($lines); $l++) {
if (preg_match('/^#(script-(on|off)|output)$/', $lines[$l])) {
break;
}
$data[] = $lines[$l];
}
// set the script mode, if present
assert(preg_match('/^#(script-(on|off)|output)$/', $lines[$l]) === 1, new \Exception("Test $file #$index follows data with something other than script flag or output at line ".($l + 1)));
$script = null;
if ($lines[$l] === "#script-off") {
$script = false;
$l++;
} elseif ($lines[$l] === "#script-on") {
$script = true;
$l++;
}
// collect the output string
$exp = [];
assert($lines[$l] === "#output", new \Exception("Test $file #$index follows input with something other than output at line ".($l + 1)));
for (++$l; $l < sizeof($lines); $l++) {
if ($lines[$l] === "" && in_array(($lines[$l + 1] ?? ""), ["#document", "#fragment"])) {
break;
}
assert(preg_match('/^([^#]|$)/', $lines[$l]) === 1, new \Exception("Test $file #$index contains unrecognized data after output at line ".($l + 1)));
$exp[] = $lines[$l];
}
$exp = implode("\n", $exp);
if (!$script) {
yield basename($file)." #$index (line $pos)" => [$data, $fragment, $exp];
}
$l++;
$index++;
}
}
}

View file

@ -0,0 +1,23 @@
HTML DOM serialization tests
============================
The format of these tests is essentially the format of html5lib's tree construction tests in reverse. There are, however, important differences, so the format is documented in full here.
Each file containing tree construction tests consists of any number of
tests separated by two newlines (LF) and a single newline before the end
of the file. For instance:
[TEST]LF
LF
[TEST]LF
LF
[TEST]LF
Where [TEST] is the following format:
Each test begins with a line reading "#document" or "#fragment"; subsequent
lines represent the document or document fragment (respectively) used as
input, until a line is encountered which reads "#output", "#script-on",
or "#script-off".

View file

@ -0,0 +1,33 @@
#fragment
| <fake_ns test:test>
#output
<test:test></test:test>
#fragment
| <span>
| test💩test="test"
#output
<span test💩test="test"></span>
#fragment
| <wbr>
| "You should not see this text."
#output
<wbr>
#fragment
| <wbr>
| class="test"
#output
<wbr class="test">
#fragment
| <poop💩>
#output
<poop💩></poop💩>
#fragment
| <test>
| poop💩="soccer"
#output
<test poop💩="soccer"></test>

View file

@ -0,0 +1,34 @@
#document
| <html>
#output
<html></html>
#document
| <!DOCTYPE html>
| <html>
#output
<!DOCTYPE html><html></html>
#document
| <!DOCTYPE html "public" "system">
| <html>
#output
<!DOCTYPE html><html></html>
#document
| <!DOCTYPE test>
| <html>
#output
<!DOCTYPE test><html></html>
#document
| <!DOCTYPE>
| <html>
#output
<!DOCTYPE ><html></html>
#document
| <html>
| <?php echo "Hello world!"; ?>
#output
<html><?php echo "Hello world!"; ?></html>

View file

@ -0,0 +1,913 @@
#fragment
| <span>
#output
<span></span>
#fragment
| <span>
| <a>
#output
<span><a></a></span>
#fragment
| <span>
| <a>
| b="c"
#output
<span><a b="c"></a></span>
#fragment
| <span>
| <a>
| b="&"
#output
<span><a b="&amp;"></a></span>
#fragment
| <span>
| <a>
| b=" "
#output
<span><a b="&nbsp;"></a></span>
#fragment
| <span>
| <a>
| b="""
#output
<span><a b="&quot;"></a></span>
#fragment
| <span>
| <a>
| b="<"
#output
<span><a b="<"></a></span>
#fragment
| <span>
| <a>
| b=">"
#output
<span><a b=">"></a></span>
#fragment
| <span>
| <a>
| href="javascript:"<>""
#output
<span><a href="javascript:&quot;<>&quot;"></a></span>
#fragment
| <span>
| <svg svg>
| xlink xlink:href="a"
#output
<span><svg xlink:href="a"></svg></span>
#fragment
| <span>
| <svg svg>
| xmlns xmlns:svg="test"
#output
<span><svg xmlns:svg="test"></svg></span>
#fragment
| <span>
| "a"
#output
<span>a</span>
#fragment
| <span>
| "&"
#output
<span>&amp;</span>
#fragment
| <span>
| " "
#output
<span>&nbsp;</span>
#fragment
| <span>
| "<"
#output
<span>&lt;</span>
#fragment
| <span>
| ">"
#output
<span>&gt;</span>
#fragment
| <span>
| """
#output
<span>"</span>
#fragment
| <span>
| <style>
| "<&>"
#output
<span><style><&></style></span>
#fragment
| <span>
| <script>
| type="test"
| "<&>"
#output
<span><script type="test"><&></script></span>
#fragment
| <script>
| type="test"
| "<&>"
#output
<script type="test"><&></script>
#fragment
| <span>
| <xmp>
| "<&>"
#output
<span><xmp><&></xmp></span>
#fragment
| <span>
| <iframe>
| "<&>"
#output
<span><iframe><&></iframe></span>
#fragment
| <span>
| <noembed>
| "<&>"
#output
<span><noembed><&></noembed></span>
#fragment
| <span>
| <noframes>
| "<&>"
#output
<span><noframes><&></noframes></span>
#fragment
| <span>
| <noscript>
| "<&>"
#script-off
#output
<span><noscript>&lt;&amp;&gt;</noscript></span>
#fragment
| <span>
| <noscript>
| "<&>"
#script-on
#output
<span><noscript><&></noscript></span>
#fragment
| <span>
| <!-- data -->
#output
<span><!--data--></span>
#fragment
| <span>
| <a>
| <b>
| <c>
| <d>
| "e"
| <f>
| <g>
| "h"
#output
<span><a><b><c></c></b><d>e</d><f><g>h</g></f></a></span>
#fragment
| <span>
| b="c"
#output
<span b="c"></span>
#fragment
| <span>
| <svg svg>
| xml xml:foo="test"
#output
<span><svg xml:foo="test"></svg></span>
#fragment
| <span>
| <svg svg>
| xml abc:foo="test"
#output
<span><svg xml:foo="test"></svg></span>
#fragment
| <span>
| <svg svg>
| xmlns xmlns:foo="test"
#output
<span><svg xmlns:foo="test"></svg></span>
#fragment
| <span>
| <svg svg>
| xmlns xmlns="test"
#output
<span><svg xmlns="test"></svg></span>
#fragment
| <span>
| <svg svg>
| fake_ns abc:def="test"
#output
<span><svg abc:def="test"></svg></span>
#fragment
| <pre>
| "
"
#output
<pre>
</pre>
#fragment
| <pre>
| "a
"
#output
<pre>a
</pre>
#fragment
| <span>
| <pre>
| "
"
#output
<span><pre>
</pre></span>
#fragment
| <span>
| <pre>
| "a
"
#output
<span><pre>a
</pre></span>
#fragment
| <textarea>
| "
"
#output
<textarea>
</textarea>
#fragment
| <textarea>
| "a
"
#output
<textarea>a
</textarea>
#fragment
| <span>
| <textarea>
| "
"
#output
<span><textarea>
</textarea></span>
#fragment
| <span>
| <textarea>
| "a
"
#output
<span><textarea>a
</textarea></span>
#fragment
| <listing>
| "
"
#output
<listing>
</listing>
#fragment
| <listing>
| "a
"
#output
<listing>a
</listing>
#fragment
| <span>
| <listing>
| "
"
#output
<span><listing>
</listing></span>
#fragment
| <span>
| <listing>
| "a
"
#output
<span><listing>a
</listing></span>
#fragment
| <area>
#output
<area>
#fragment
| <span>
| <area>
| <a>
| "test"
| <b>
#output
<span><area><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <area>
| <b>
#output
<span><a>test</a><area><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <area>
#output
<span><a>test</a><b></b><area></span>
#fragment
| <base>
#output
<base>
#fragment
| <span>
| <base>
| <a>
| "test"
| <b>
#output
<span><base><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <base>
| <b>
#output
<span><a>test</a><base><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <base>
#output
<span><a>test</a><b></b><base></span>
#fragment
| <basefont>
#output
<basefont>
#fragment
| <span>
| <basefont>
| <a>
| "test"
| <b>
#output
<span><basefont><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <basefont>
| <b>
#output
<span><a>test</a><basefont><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <basefont>
#output
<span><a>test</a><b></b><basefont></span>
#fragment
| <bgsound>
#output
<bgsound>
#fragment
| <span>
| <bgsound>
| <a>
| "test"
| <b>
#output
<span><bgsound><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <bgsound>
| <b>
#output
<span><a>test</a><bgsound><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <bgsound>
#output
<span><a>test</a><b></b><bgsound></span>
#fragment
| <br>
#output
<br>
#fragment
| <span>
| <br>
| <a>
| "test"
| <b>
#output
<span><br><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <br>
| <b>
#output
<span><a>test</a><br><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <br>
#output
<span><a>test</a><b></b><br></span>
#fragment
| <col>
#output
<col>
#fragment
| <span>
| <col>
| <a>
| "test"
| <b>
#output
<span><col><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <col>
| <b>
#output
<span><a>test</a><col><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <col>
#output
<span><a>test</a><b></b><col></span>
#fragment
| <embed>
#output
<embed>
#fragment
| <span>
| <embed>
| <a>
| "test"
| <b>
#output
<span><embed><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <embed>
| <b>
#output
<span><a>test</a><embed><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <embed>
#output
<span><a>test</a><b></b><embed></span>
#fragment
| <frame>
#output
<frame>
#fragment
| <span>
| <frame>
| <a>
| "test"
| <b>
#output
<span><frame><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <frame>
| <b>
#output
<span><a>test</a><frame><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <frame>
#output
<span><a>test</a><b></b><frame></span>
#fragment
| <hr>
#output
<hr>
#fragment
| <span>
| <hr>
| <a>
| "test"
| <b>
#output
<span><hr><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <hr>
| <b>
#output
<span><a>test</a><hr><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <hr>
#output
<span><a>test</a><b></b><hr></span>
#fragment
| <img>
#output
<img>
#fragment
| <span>
| <img>
| <a>
| "test"
| <b>
#output
<span><img><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <img>
| <b>
#output
<span><a>test</a><img><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <img>
#output
<span><a>test</a><b></b><img></span>
#fragment
| <input>
#output
<input>
#fragment
| <span>
| <input>
| <a>
| "test"
| <b>
#output
<span><input><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <input>
| <b>
#output
<span><a>test</a><input><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <input>
#output
<span><a>test</a><b></b><input></span>
#fragment
| <keygen>
#output
<keygen>
#fragment
| <span>
| <keygen>
| <a>
| "test"
| <b>
#output
<span><keygen><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <keygen>
| <b>
#output
<span><a>test</a><keygen><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <keygen>
#output
<span><a>test</a><b></b><keygen></span>
#fragment
| <link>
#output
<link>
#fragment
| <span>
| <link>
| <a>
| "test"
| <b>
#output
<span><link><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <link>
| <b>
#output
<span><a>test</a><link><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <link>
#output
<span><a>test</a><b></b><link></span>
#fragment
| <meta>
#output
<meta>
#fragment
| <span>
| <meta>
| <a>
| "test"
| <b>
#output
<span><meta><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <meta>
| <b>
#output
<span><a>test</a><meta><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <meta>
#output
<span><a>test</a><b></b><meta></span>
#fragment
| <param>
#output
<param>
#fragment
| <span>
| <param>
| <a>
| "test"
| <b>
#output
<span><param><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <param>
| <b>
#output
<span><a>test</a><param><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <param>
#output
<span><a>test</a><b></b><param></span>
#fragment
| <source>
#output
<source>
#fragment
| <span>
| <source>
| <a>
| "test"
| <b>
#output
<span><source><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <source>
| <b>
#output
<span><a>test</a><source><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <source>
#output
<span><a>test</a><b></b><source></span>
#fragment
| <track>
#output
<track>
#fragment
| <span>
| <track>
| <a>
| "test"
| <b>
#output
<span><track><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <track>
| <b>
#output
<span><a>test</a><track><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <track>
#output
<span><a>test</a><b></b><track></span>
#fragment
| <wbr>
#output
<wbr>
#fragment
| <span>
| <wbr>
| <a>
| "test"
| <b>
#output
<span><wbr><a>test</a><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <wbr>
| <b>
#output
<span><a>test</a><wbr><b></b></span>
#fragment
| <span>
| <a>
| "test"
| <b>
| <wbr>
#output
<span><a>test</a><b></b><wbr></span>

View file

@ -28,6 +28,9 @@
<testsuite name="Parser">
<file>cases/TestParser.php</file>
</testsuite>
<testsuite name="Serializer">
<file>cases/TestSerializer.php</file>
</testsuite>
<testsuite name="Encoding change">
<file>cases/TestEncodingChange.php</file>
</testsuite>