patchTest($data, $fragment, $errors, $exp); if (strlen($skip)) { $this->markTestSkipped($skip); } elseif ($patched) { $this->markAsRisky(); } // convert parse error constants into standard symbols in specification $errorMap = array_map(function($str) { return strtolower(str_replace("_", "-", $str)); }, array_flip(array_filter((new \ReflectionClass(ParseError::class))->getConstants(), function($v) { return is_int($v); }))); // create a stub error handler which collects parse errors $actualErrors = []; $errorHandler = $this->createStub(ParseError::class); $errorHandler->method("emit")->willReturnCallback(function($file, $line, $col, $code) use (&$actualErrors, $errorMap) { $actualErrors[] = ['code' => $errorMap[$code], 'line' => $line, 'col' => $col]; return true; }); // initialize the output document $doc = new Document; // prepare the fragment context, if any if ($fragment) { $fragment = explode(" ", $fragment); assert(sizeof($fragment) < 3); if (sizeof($fragment) === 1) { $fragmentContext = $doc->createElement($fragment[0]); } else { $ns = array_flip(Parser::NAMESPACE_MAP)[$fragment[0]] ?? null; assert(isset($ns)); $fragmentContext = $doc->createElementNS($ns, $fragment[1]); } } else { $fragmentContext = null; } // initialize the other classes we need $decoder = new Data($data, "STDIN", $errorHandler, "UTF-8"); $stack = new OpenElementsStack($fragmentContext); $tokenizer = new Tokenizer($decoder, $stack, $errorHandler); $tokenList = $tokenizer->tokenize(); $treeBuilder = new TreeBuilder($doc, $decoder, $tokenizer, $tokenList, $errorHandler, $stack, new TemplateInsertionModesStack, $fragmentContext); // run the tree builder try { $treeBuilder->constructTree(); } catch (LoopException $e) { $act = $this->balanceTree($this->serializeTree($doc, (bool) $fragmentContext), $exp); $this->assertEquals($exp, $act, $e->getMessage()."\n".$treeBuilder->debugLog); throw $e; } catch (NotImplementedException $e) { $this->markTestSkipped($e->getMessage()); return; } $act = $this->balanceTree($this->serializeTree($doc, (bool) $fragmentContext), $exp); $this->assertEquals($exp, $act, $treeBuilder->debugLog); if ($errors !== false) { // If $errors is false, the test does not include errors when there are in fact errors $this->assertCount(sizeof($errors), $actualErrors, var_export($errors, true).var_export($actualErrors, true)); } } protected function patchTest(string $data, $fragment, array $errors, array $exp): array { $patched = false; $skip = ""; // comments outside the root element are silently dropped by the PHP DOM if (!$fragment) { for ($a = 0; $a < sizeof($exp); $a++) { if (strpos($exp[$a], "|
helloexcite!me!", // this one is pretty hairy with buffered characters ])) { $errors = false; } if ($errors) { // some "old" errors are made redundant by "new" errors $obsoleteSymbolList = implode("|", [ "illegal-codepoint-for-numeric-entity", "eof-in-attribute-value-double-quote", "non-void-element-with-trailing-solidus", "invalid-character-in-attribute-name", "attributes-in-end-tag", "expected-tag-name", "unexpected-character-after-solidus-in-tag", "expected-closing-tag-but-got-char", "eof-in-tag-name", "need-space-after-doctype", "expected-doctype-name-but-got-right-bracket", "expected-dashes-or-doctype", "expected-space-or-right-bracket-in-doctype", "unexpected-char-in-comment", "eof-in-comment-double-dash", "expected-named-entity", "named-entity-without-semicolon", "numeric-entity-without-semicolon", "expected-numeric-entity", "eof-in-attribute-name", "unexpected-eof-in-text-mode", "unexpected-EOF-after-solidus-in-tag", "expected-attribute-name-but-got-eof", "eof-in-script-in-script", "expected-script-data-but-got-eof", "unexpected-EOF-in-text-mode", "expected-tag-name-but-got-question-mark", "incorrect-comment", "self-closing-flag-on-end-tag", "invalid-codepoint", "invalid-codepoint-in-body", "invalid-codepoint-in-foreign-content", "end-table-tag-in-caption", "equals-in-unquoted-attribute-value", "eof-in-numeric-entity", "unexpected-char-in-doctype", "unexpected-end-of-doctype", "unexpected-dash-after-double-dash-in-comment", "unexpected-bang-after-double-dash-in-comment", ]); for ($a = 0, $stop = sizeof($errors); $a < $stop; $a++) { if (preg_match("/^\(\d+,\d+\):? ($obsoleteSymbolList)$/", $errors[$a])) { // these errors are redundant with "new" errors unset($errors[$a]); } } $errors = array_values($errors); // some other errors appear to document implementation details // rather than what the specificatioon dictates, or are // simple duplicates for ($a = 0, $stop = sizeof($errors); $a < $stop; $a++) { if ( preg_match("/^\(\d+,\d+\): unexpected-end-tag-in-special-element$/", $errors[$a]) || preg_match('/^\d+: Unclosed element “[^”]+”\.$/u', $errors[$a]) || ($data === '"); } elseif ($n instanceof \DOMCharacterData) { $this->push('"'.$n->data.'"'); } else { throw new \Exception("Node type ".get_class($n)." not handled"); } } public function provideStandardTreeTests(): iterable { $blacklist = []; $files = new \AppendIterator(); $files->append(new \GlobIterator(\MensBeam\HTML\BASE."tests/html5lib-tests/tree-construction/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME)); $files->append(new \GlobIterator(\MensBeam\HTML\BASE."tests/cases/tree-construction/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME)); foreach ($files as $file) { $index = 0; $l = 0; if (!in_array(basename($file), $blacklist)) { $lines = array_map(function($v) { return rtrim($v, "\n"); }, file($file)); while ($l < sizeof($lines)) { $pos = $l + 1; assert($lines[$l] === "#data", new \Exception("Test $file #$index does not start with #data tag at line ".($l + 1))); // collect the test input $data = []; for (++$l; $l < sizeof($lines); $l++) { if ($lines[$l] === "#errors") { break; } $data[] = $lines[$l]; } $data = implode("\n", $data); // collect the test errors $errors = []; assert(($lines[$l] ?? "") === "#errors", new \Exception("Test $file #$index does not list errors at line ".($l + 1))); for (++$l; $l < sizeof($lines); $l++) { if ($lines[$l] === "#new-errors") { continue; } elseif (preg_match('/^#(document(-fragment)?|script-(on|off)|)$/', $lines[$l])) { break; } $errors[] = $lines[$l]; } // set the script mode, if present assert(preg_match('/^#(script-(on|off)|document(-fragment)?)$/', $lines[$l]) === 1, new \Exception("Test $file #$index follows errors with something other than script flag, document fragment, or document at line ".($l + 1))); $script = null; if ($lines[$l] === "#script-off") { $script = false; $l++; } elseif ($lines[$l] === "#script-on") { $script = true; $l++; } // collect the document fragment, if present assert(preg_match('/^#document(-fragment)?$/', $lines[$l]) === 1, new \Exception("Test $file #$index follows script flag with something other than document fragment or document at line ".($l + 1))); $fragment = null; if ($lines[$l] === "#document-fragment") { $fragment = $lines[++$l]; $l++; } // collect the output tree $exp = []; assert($lines[$l] === "#document", new \Exception("Test $file #$index follows document fragment with something other than document at line ".($l + 1))); for (++$l; $l < sizeof($lines); $l++) { if ($lines[$l] === "" && ($lines[$l + 1] ?? "") === "#data") { break; } elseif (($lines[$l][0] ?? "") !== "|") { // apend the data to the previous token $exp[sizeof($exp) - 1] .= "\n".$lines[$l]; continue; } assert(preg_match('/^[^#]/', $lines[$l]) === 1, new \Exception("Test $file #$index contains unrecognized data after document at line ".($l + 1))); $exp[] = $lines[$l]; } if (!$script) { // scripting-dependent tests are skipped entirely since we will not support scripting yield basename($file)." #$index (line $pos)" => [$data, $exp, $errors, $fragment]; } $l++; $index++; } } } } }
please!