Browse Source

Fix most serializer test failures

Three remain related to xmlns attributes
split-manual
J. King 3 years ago
parent
commit
3114f3a9bb
  1. 2
      lib/DOM/Element.php
  2. 6
      lib/DOM/Text.php
  3. 14
      lib/DOM/traits/EscapeString.php
  4. 2
      lib/DOM/traits/Serialize.php
  5. 135
      tests/cases/TestSerializer.php
  6. 5
      tests/cases/TestTreeConstructor.php
  7. 45
      tests/cases/serializer/wpt01.dat
  8. 3
      tests/phpunit.dist.xml

2
lib/DOM/Element.php

@ -323,7 +323,7 @@ class Element extends \DOMElement {
# If the attribute is in some other namespace
default:
# The attribute’s serialized name is the attribute’s qualified name.
$name = $attr->name;
$name = $attr->nodeName;
}
$value = $this->escapeString($attr->value, true);

6
lib/DOM/Text.php

@ -14,8 +14,10 @@ class Text extends \DOMText {
# noframes, or plaintext element, or if the parent of current node is a noscript
# element and scripting is enabled for the node, then append the value of
# current node’s data IDL attribute literally.
// DEVIATION: No scripting.
// DEVIATION: No scripting, so <noscript> is not included
if ($this->parentNode->namespaceURI === null && in_array($this->parentNode->nodeName, ['style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'plaintext'])) {
return $this->data;
}
# Otherwise, append the value of current node’s data IDL attribute, escaped as
# described below.
return $this->escapeString($this->data);

14
lib/DOM/traits/EscapeString.php

@ -13,16 +13,16 @@ trait EscapeString {
# Escaping a string (for the purposes of the algorithm above) consists of
# running the following steps:
# 1. Replace any occurrence of the "&amp;" character by the string "&amp;amp;".
# 1. Replace any occurrence of the "&" character by the string "&amp;".
# 2. Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the
# string "&amp;nbsp;".
$string = str_replace(['&amp;', chr(0x00A0)], ['&amp;amp;', '&amp;nbsp;'], $string);
# string "&nbsp;".
$string = str_replace(['&', "\u{A0}"], ['&amp;', '&nbsp;'], $string);
# 3. If the algorithm was invoked in the attribute mode, replace any
# occurrences of the "&quot;" character by the string "&amp;quot;".
# occurrences of the """ character by the string "&quot;".
# 4. If the algorithm was not invoked in the attribute mode, replace any
# occurrences of the "&lt;" character by the string "&amp;lt;", and any
# occurrences of the "&gt;" character by the string "&amp;gt;".
return ($attribute) ? str_replace('&quot;', '&amp;quot;', $string) : str_replace(['&lt;', '&gt;'], ['&amp;lt;', '&amp;gt;'], $string);
# occurrences of the "<" character by the string "&lt;", and any
# occurrences of the ">" character by the string "&gt;".
return ($attribute) ? str_replace('"', '&quot;', $string) : str_replace(['<', '>'], ['&lt;', '&gt;'], $string);
}
protected function coerceName(string $name): string {

2
lib/DOM/traits/Serialize.php

@ -7,7 +7,7 @@ declare(strict_types=1);
namespace MensBeam\HTML;
trait Serialize {
protected static $voidElements = [ 'area', 'base', 'basefont', 'bgsound', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr' ];
protected static $voidElements = [ 'area', 'base', 'basefont', 'bgsound', 'br', 'col', 'embed', 'frame', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' ];
protected function serializesAsVoid(): bool {
$name = $this->nodeName;

135
tests/cases/TestSerializer.php

@ -0,0 +1,135 @@
<?php
/** @license MIT
* Copyright 2017 , Dustin Wilson, J. King et al.
* See LICENSE and AUTHORS files for details */
declare(strict_types=1);
namespace MensBeam\HTML\TestCase;
use MensBeam\HTML\Document;
use MensBeam\HTML\Parser;
/**
* @covers \MensBeam\HTML\Document
* @covers \MensBeam\HTML\DocumentFragment
* @covers \MensBeam\HTML\Element
* @covers \MensBeam\HTML\TemplateElement
* @covers \MensBeam\HTML\Comment
* @covers \MensBeam\HTML\Text
*/
class TestTreeConstructor extends \PHPUnit\Framework\TestCase {
use \MensBeam\HTML\EscapeString;
protected $out;
protected $depth;
/** @dataProvider provideStandardSerializerTests */
public function testStandardTreeTests(array $data, bool $fragment, string $exp): void {
$node = $this->buildTree($data, $fragment);
$this->assertSame($exp, (string) $node);
}
public function provideStandardSerializerTests(): iterable {
$blacklist = [];
$files = new \AppendIterator();
$files->append(new \GlobIterator(\MensBeam\HTML\BASE."tests/cases/serializer/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
foreach ($files as $file) {
$index = 0;
$l = 0;
if (!in_array(basename($file), $blacklist)) {
$lines = array_map(function($v) {
return rtrim($v, "\n");
}, file($file));
while ($l < sizeof($lines)) {
$pos = $l + 1;
assert(in_array($lines[$l], ["#document", "#fragment"]), new \Exception("Test $file #$index does not start with #doocument or #fragment tag at line ".($l + 1)));
$fragment = $lines[$l] === "#fragment";
// collect the test input
$data = [];
for (++$l; $l < sizeof($lines); $l++) {
if (preg_match('/^#(script-(on|off)|output)$/', $lines[$l])) {
break;
}
$data[] = $lines[$l];
}
// set the script mode, if present
assert(preg_match('/^#(script-(on|off)|output)$/', $lines[$l]) === 1, new \Exception("Test $file #$index follows data with something other than script flag or output at line ".($l + 1)));
$script = null;
if ($lines[$l] === "#script-off") {
$script = false;
$l++;
} elseif ($lines[$l] === "#script-on") {
$script = true;
$l++;
}
// collect the output string
$exp = [];
assert($lines[$l] === "#output", new \Exception("Test $file #$index follows input with something other than output at line ".($l + 1)));
for (++$l; $l < sizeof($lines); $l++) {
if ($lines[$l] === "" && in_array(($lines[$l + 1] ?? ""), ["#ddocument", "#fragment"])) {
break;
}
assert(preg_match('/^[^#]/', $lines[$l]) === 1, new \Exception("Test $file #$index contains unrecognized data after document at line ".($l + 1)));
$exp[] = $lines[$l];
}
$exp = implode("\n", $exp);
if (!$script) {
yield basename($file)." #$index (line $pos)" => [$data, $fragment, $exp];
}
$l++;
$index++;
}
}
}
}
protected function buildTree(array $data, bool $fragment): \DOMNode {
$document = new Document;
$out = $fragment ? $document->createDocumentFragment() : $document;
$cur = $out;
$pad = 2;
// process each line in turn
for ($l = 0; $l < sizeof($data); $l++) {
preg_match('/^(\|\s+)(.+)/', $data[$l], $m);
// pop any parents as long as the padding of the line is less than the expected padding
$p = strlen((string) $m[1]);
assert($p >= 2 && $p <= $pad && !($p % 2), new \Exception("Input data is invalid on line ".($l + 1)));
while ($p < $pad) {
$pad -= 2;
$cur = $cur->parentNode;
}
// act based upon what the rest of the line looks like
$d = $m[2];
if (preg_match('/^<!-- (.*?) -->$/', $d, $m)) {
// comment
$cur->appendChild($document->createComment($m[1]));
} elseif (preg_match('/^<!DOCTYPE ([^ >]*)(?: "([^"]*)" "([^"]*)")?>$/', $d, $m)) {
// doctype
$name = strlen((string) $m[1]) ? $m[1] : null;
$public = strlen((string) $m[2]) ? $m[2] : null;
$system = strlen((string) $m[3]) ? $m[3] : null;
$cur->appendChild($document->implementation->createDocumentType($name, $public, $system));
} elseif (preg_match('/^<(?:([^ ]+) )?([^>]+)>$/', $d, $m)) {
// element
$ns = strlen((string) $m[1]) ? (array_flip(Parser::NAMESPACE_MAP)[$m[1]] ?? null) : null;
$cur = $cur->appendChild($document->createElementNS($ns, $m[2]));
$pad += 2;
} elseif (preg_match('/^(?:([^" ]+) )?([^"=]+)="((?:[^"]|"(?!$))*)"$/', $d, $m)) {
// attribute
$ns = strlen((string) $m[1]) ? (array_flip(Parser::NAMESPACE_MAP)[$m[1]] ?? $m[1]) : "";
$cur->setAttributeNS($ns, $m[2], $m[3]);
} elseif (preg_match('/^"((?:[^"]|"(?!$))*)("?)$/', $d, $m)) {
// text
$t = $m[1];
while (!strlen((string) $m[2])) {
preg_match('/^((?:[^"]|"(?!$))*)("?)$/', $data[++$l], $m);
$t .= "\n".$m[1];
}
$cur->appendChild($document->createTextNode($t));
} else {
throw new \Exception("Input data is invalid on line ".($l + 1));
}
}
return $out;
}
}

5
tests/cases/TestTreeConstructor.php

@ -8,7 +8,6 @@ namespace MensBeam\HTML\TestCase;
use MensBeam\HTML\Data;
use MensBeam\HTML\Document;
use MensBeam\HTML\EOFToken;
use MensBeam\HTML\LoopException;
use MensBeam\HTML\NotImplementedException;
use MensBeam\HTML\OpenElementsStack;
@ -286,7 +285,7 @@ class TestTreeConstructor extends \PHPUnit\Framework\TestCase {
protected function balanceTree(array $act, array $exp): array {
// makes sure that the actual tree contain the same number of lines as the expected tree
// lines are inserted where the two trees diverge, until the end of the actual tree is reached
// this usuallyresults in clean PHPUnit comparison failure output
// this usually results in cleaner PHPUnit comparison failure output
for ($a = 0; $a < sizeof($act) && sizeof($act) < sizeof($exp); $a++) {
if (!isset($act[$a]) || $exp[$a] !== $act[$a]) {
array_splice($act, $a, 0, [""]);
@ -431,7 +430,7 @@ class TestTreeConstructor extends \PHPUnit\Framework\TestCase {
}
// collect the output tree
$exp = [];
assert($lines[$l] === "#document", new \Exception("Test $file #$index follows dociument fragment with something other than document at line ".($l + 1)));
assert($lines[$l] === "#document", new \Exception("Test $file #$index follows document fragment with something other than document at line ".($l + 1)));
for (++$l; $l < sizeof($lines); $l++) {
if ($lines[$l] === "" && ($lines[$l + 1] ?? "") === "#data") {
break;

45
tests/cases/serializer/wpt01.dat

@ -26,7 +26,7 @@
#fragment
| <span>
| <a>
| b=" "
| b=" "
#output
<span><a b="&nbsp;"></a></span>
@ -42,33 +42,33 @@
| <a>
| b="<"
#output
<span><a b="&lt;"></a></span>
<span><a b="<"></a></span>
#fragment
| <span>
| <a>
| b=">"
#output
<span><a b="&gt;"></a></span>
<span><a b=">"></a></span>
#fragment
| <span>
| <a>
| href="javascript:"<>""
#output
<span><a href="javascript:&quot;&lt;>&quot;"></a></span>
<span><a href="javascript:&quot;<>&quot;"></a></span>
#fragment
| <span>
| <svg svg>
| xlink href="a"
| xlink xlink:href="a"
#output
<span><svg xlink:href="a"></svg></span>
#fragment
| <span>
| <svg svg>
| xmlns svg="test"
| xmlns xmlns:svg="test"
#output
<span><svg xmlns:svg="test"></svg></span>
@ -86,7 +86,7 @@
#fragment
| <span>
| " "
| " "
#output
<span>&nbsp;</span>
@ -106,7 +106,7 @@
| <span>
| """
#output
<span>&quot;</span>
<span>"</span>
#fragment
| <span>
@ -162,6 +162,15 @@
| <span>
| <noscript>
| "<&>"
#script-off
#output
<span><noscript>&lt;&amp;&gt;</noscript></span>
#fragment
| <span>
| <noscript>
| "<&>"
#script-on
#output
<span><noscript><&></noscript></span>
@ -176,11 +185,11 @@
| <a>
| <b>
| <c>
| <d>
| "e"
| <f>
| <g>
| "h"
| <d>
| "e"
| <f>
| <g>
| "h"
#output
<span><a><b><c></c></b><d>e</d><f><g>h</g></f></a></span>
@ -193,7 +202,7 @@
#fragment
| <span>
| <svg svg>
| xml foo="test"
| xml xml:foo="test"
#output
<span><svg xml:foo="test"></svg></span>
@ -207,7 +216,7 @@
#fragment
| <span>
| <svg svg>
| xmlns foo="test"
| xmlns xmlns:foo="test"
#output
<span><svg xmlns:foo="test"></svg></span>
@ -253,7 +262,7 @@
#fragment
| <span>
| <pre>
| "abc
| "a
"
#output
<span><pre>a
@ -287,7 +296,7 @@
#fragment
| <span>
| <textarea>
| "abc
| "a
"
#output
<span><textarea>a
@ -321,7 +330,7 @@
#fragment
| <span>
| <listing>
| "abc
| "a
"
#output
<span><listing>a

3
tests/phpunit.dist.xml

@ -25,5 +25,8 @@
<testsuite name="Tree">
<file>cases/TestTreeConstructor.php</file>
</testsuite>
<testsuite name="Serializer">
<file>cases/TestSerializer.php</file>
</testsuite>
</testsuites>
</phpunit>

Loading…
Cancel
Save