diff --git a/tests/cases/TestTreeConstructor.php b/tests/cases/TestTreeConstructor.php new file mode 100644 index 0000000..46b5bdc --- /dev/null +++ b/tests/cases/TestTreeConstructor.php @@ -0,0 +1,166 @@ + "", + Parser::SVG_NAMESPACE => "svg ", + Parser::MATHML_NAMESPACE => "math ", + ]; + + protected $out; + protected $depth; + + /** @dataProvider provideStandardTreeTests */ + public function testStandardTreeTests(string $data, array $exp, array $errors, $fragment, ?bool $scripted): void { + if ($scripted) { + $this->markTestIncomplete("Scripting is not supported"); + } elseif ($fragment) { + $this->markTestSkipped("Fragment tests still to be implemented"); + } + $doc = Parser::parse($data); + $act = $this->serializeTree($doc); + $this->assertEquals($exp, $act); + // TODO: evaluate errors + } + + protected function push(string $data): void { + $this->out[] = "| ".str_repeat(" ", $this->depth).$data; + } + + protected function serializeTree(\DOMDocument $d): array { + $this->out = []; + $this->depth = 0; + if ($d->doctype) { + $dt = "doctype->name; + $dt .= strlen($d->doctype->publicId) ? ' "'.$d->doctype->publicId.'"' : ""; + $dt .= strlen($d->doctype->systemId) ? ' "'.$d->doctype->systemId.'"' : ""; + $dt .= ">"; + $this->push($dt); + } + if ($d->documentElement) { + $this->serializeElement($d->documentElement); + } + return $this->out; + } + + protected function serializeElement(\DOMElement $e): void { + if ($e->namespaceURI) { + $prefix = $ns[$e->namespaceURI] ?? ""; + assert((bool) $prefix, new \Exception("Prefix for namespace {$e->namespaceURI} is not defined")); + } else { + $prefix = ""; + } + $this->push("<".$prefix.$e->localName.">"); + $this->depth++; + $attr = []; + foreach ($e->attributes as $a) { + $attr[$a->name] = $a->value; + } + ksort($attr); + foreach ($attr as $k => $v) { + $this->push($k.'="'.$v.'"'); + } + if ($e->localName === "template") { + $this->push("content"); + $this->depth++; + } + foreach ($e->childNodes as $n) { + $this->serializeNode($n); + } + if ($e->localName === "template") { + $this->depth--; + } + $this->depth--; + } + + public function serializeNode(\DOMNode $n): void { + if ($n instanceof \DOMElement) { + $this->serializeElement($n); + } elseif ($n instanceof \DOMCharacterData) { + $this->push('"'.$n->data.'"'); + } elseif ($n instanceof \DOMComment) { + $this->push(""); + } elseif ($n instanceof \DOMProcessingInstruction) { + $this->push("target." ".$n->data.">"); + } else { + throw new \Exception("Node type ".get_class($n)." not handled"); + } + } + + public function provideStandardTreeTests(): iterable { + $blacklist = []; + foreach (new \GlobIterator(\dW\HTML5\BASE."tests/html5lib-tests/tree-construction/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME) as $file) { + $index = 0; + $l = 0; + if (!in_array(basename($file), $blacklist)) { + $lines = array_map("trim", file($file)); + while ($l < sizeof($lines)) { + $pos = $l + 1; + assert($lines[$l] === "#data", new \Exception("Test $file #$index does not start with #data tag at line ".($l + 1))); + // collect the test input + $data = []; + for (++$l; $l < sizeof($lines); $l++) { + if ($lines[$l] === "#errors") { + break; + } + $data[] = $lines[$l]; + } + $data = implode("\n", $data); + // collect the test errors + $errors = []; + assert(($lines[$l] ?? "") === "#errors", new \Exception("Test $file #$index does not list errors at line ".($l + 1))); + for (++$l; $l < sizeof($lines); $l++) { + if ($lines[$l] === "#new-errors") { + continue; + } elseif (preg_match('/^#(document(-fragment)?|script-(on|off)|)$/', $lines[$l])) { + break; + } + $errors[] = $lines[$l]; + } + // set the script mode, if present + assert(preg_match('/^#(script-(on|off)|document(-fragment)?)$/', $lines[$l]) === 1, new \Exception("Test $file #$index follows errors with something other than script flag, document fragment, or document at line ".($l + 1))); + $script = null; + if ($lines[$l] === "#script-off") { + $script = false; + $l++; + } elseif ($lines[$l] === "#script-on") { + $script = true; + $l++; + } + // collect the document fragment, if present + assert(preg_match('/^#document(-fragment)?$/', $lines[$l]) === 1, new \Exception("Test $file #$index follows script flag with something other than document fragment or document at line ".($l + 1))); + $fragment = null; + if ($lines[$l] === "#document-fragment") { + $fragment = $lines[++$l]; + $l++; + } + // collect the output tree + $exp = []; + assert($lines[$l] === "#document", new \Exception("Test $file #$index follows dociument fragment with something other than document at line ".($l + 1))); + for (++$l; $l < sizeof($lines); $l++) { + if ($lines[$l] === "" && ($lines[$l + 1] ?? "") === "#data") { + break; + } elseif ($lines[$l][0] !== "|") { + // apend the data to the previous token + $exp[sizeof($exp) - 1] .= "\n".$lines[$l]; + continue; + } + assert(preg_match('/^[^#]/', $lines[$l]) === 1, new \Exception("Test $file #$index contains unrecognized data after document at line ".($l + 1))); + $exp[] = $lines[$l]; + } + yield "$file #$index (line $pos)" => [$data, $exp, $errors, $fragment, $script]; + $l++; + $index++; + } + } + } + } +} \ No newline at end of file diff --git a/tests/phpunit.dist.xml b/tests/phpunit.dist.xml index 4befd79..cccd836 100644 --- a/tests/phpunit.dist.xml +++ b/tests/phpunit.dist.xml @@ -10,9 +10,9 @@ > - - ../lib - + + ../lib + @@ -20,5 +20,8 @@ cases/TestCharset.php cases/TestTokenizer.php + + cases/TestTreeConstructor.php +