markTestIncomplete("Foreign content fragment tests still to be implemented"); } // certain tests need to be patched to ignore unavoidable limitations of PHP's DOM [$exp, $patched, $skip] = $this->patchTest($data, $fragment, $exp); if (strlen($skip)) { $this->markTestSkipped($skip); } elseif ($patched) { $this->markAsRisky(); } // convert parse error constants into standard symbols in specification $errorMap = array_map(function($str) { return strtolower(str_replace("_", "-", $str)); }, array_flip(array_filter((new \ReflectionClass(ParseError::class))->getConstants(), function($v) { return is_int($v); }))); // create a stub error handler which collects parse errors $errors = []; $errorHandler = $this->createStub(ParseError::class); $errorHandler->method("emit")->willReturnCallback(function($file, $line, $col, $code) use (&$errors, $errorMap) { $errors[] = ['code' => $errorMap[$code], 'line' => $line, 'col' => $col]; return true; }); // initialize the classes we need $decoder = new Data($data, "STDIN", $errorHandler, "UTF-8"); $stack = new OpenElementsStack; $tokenizer = new Tokenizer($decoder, $stack, $errorHandler); $doc = new Document; // prepare the fragment context, if any if ($fragment) { $fragment = explode(" ", $fragment); assert(sizeof($fragment) < 3); if (sizeof($fragment) === 1) { $fragmentContext = $doc->createElement($fragment[0]); } else { $ns = array_flip(Parser::NAMESPACE_MAP)[$fragment[0]] ?? null; assert(isset($ns)); $fragmentContext = $doc->createElementNS($ns, $fragment[1]); } } else { $fragmentContext = null; } $treeBuilder = new TreeBuilder($doc, $decoder, $tokenizer, $errorHandler, $stack, new TemplateInsertionModesStack, $fragmentContext); // run the tree builder try { do { $token = $tokenizer->createToken(); $treeBuilder->emitToken($token); } while (!$token instanceof EOFToken); } catch (\DOMException $e) { $this->markTestSkipped('Requires implementation of the "Coercing an HTML DOM into an infoset" specification section'); return; } catch (LoopException $e) { $act = $this->serializeTree($doc); $this->assertEquals($exp, $act, $e->getMessage()."\n".$treeBuilder->debugLog); throw $e; } catch (NotImplementedException $e) { $this->markTestSkipped($e->getMessage()); return; } $act = $this->serializeTree($doc, (bool) $fragmentContext); $this->assertEquals($exp, $act, $treeBuilder->debugLog); // TODO: evaluate errors } protected function patchTest(string $data, $fragment, array $exp): array { $patched = false; $skip = ""; // comments outside the root element are silently dropped by the PHP DOM for ($a = 0; $a < sizeof($exp); $a++) { if (strpos($exp[$a], "| "); } elseif ($n instanceof \DOMCharacterData) { $this->push('"'.$n->data.'"'); } else { throw new \Exception("Node type ".get_class($n)." not handled"); } } public function provideStandardTreeTests(): iterable { $blacklist = []; foreach (new \GlobIterator(\dW\HTML5\BASE."tests/html5lib-tests/tree-construction/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME) as $file) { $index = 0; $l = 0; if (!in_array(basename($file), $blacklist)) { $lines = array_map(function($v) { return rtrim($v, "\n"); }, file($file)); while ($l < sizeof($lines)) { $pos = $l + 1; assert($lines[$l] === "#data", new \Exception("Test $file #$index does not start with #data tag at line ".($l + 1))); // collect the test input $data = []; for (++$l; $l < sizeof($lines); $l++) { if ($lines[$l] === "#errors") { break; } $data[] = $lines[$l]; } $data = implode("\n", $data); // collect the test errors $errors = []; assert(($lines[$l] ?? "") === "#errors", new \Exception("Test $file #$index does not list errors at line ".($l + 1))); for (++$l; $l < sizeof($lines); $l++) { if ($lines[$l] === "#new-errors") { continue; } elseif (preg_match('/^#(document(-fragment)?|script-(on|off)|)$/', $lines[$l])) { break; } $errors[] = $lines[$l]; } // set the script mode, if present assert(preg_match('/^#(script-(on|off)|document(-fragment)?)$/', $lines[$l]) === 1, new \Exception("Test $file #$index follows errors with something other than script flag, document fragment, or document at line ".($l + 1))); $script = null; if ($lines[$l] === "#script-off") { $script = false; $l++; } elseif ($lines[$l] === "#script-on") { $script = true; $l++; } // collect the document fragment, if present assert(preg_match('/^#document(-fragment)?$/', $lines[$l]) === 1, new \Exception("Test $file #$index follows script flag with something other than document fragment or document at line ".($l + 1))); $fragment = null; if ($lines[$l] === "#document-fragment") { $fragment = $lines[++$l]; $l++; } // collect the output tree $exp = []; assert($lines[$l] === "#document", new \Exception("Test $file #$index follows dociument fragment with something other than document at line ".($l + 1))); for (++$l; $l < sizeof($lines); $l++) { if ($lines[$l] === "" && ($lines[$l + 1] ?? "") === "#data") { break; } elseif (($lines[$l][0] ?? "") !== "|") { // apend the data to the previous token $exp[sizeof($exp) - 1] .= "\n".$lines[$l]; continue; } assert(preg_match('/^[^#]/', $lines[$l]) === 1, new \Exception("Test $file #$index contains unrecognized data after document at line ".($l + 1))); $exp[] = $lines[$l]; } if (!$script) { // scripting-dependent tests are skipped entirely since we will not support scripting yield "$file #$index (line $pos)" => [$data, $exp, $errors, $fragment]; } $l++; $index++; } } } } }