J. King
5 years ago
2 changed files with 131 additions and 148 deletions
@ -1,138 +0,0 @@ |
|||||
<?php |
|
||||
declare(strict_types=1); |
|
||||
namespace dW\HTML5\Test; |
|
||||
|
|
||||
use dW\HTML5\CharacterToken; |
|
||||
use dW\HTML5\CommentToken; |
|
||||
use dW\HTML5\DOCTYPEToken; |
|
||||
use dW\HTML5\EndTagToken; |
|
||||
use dW\HTML5\StartTagToken; |
|
||||
use dW\HTML5\Tokenizer; |
|
||||
|
|
||||
class StandardTest extends \PHPUnit\Framework\TestCase { |
|
||||
const STATE_MAP = [ |
|
||||
'Data state' => Tokenizer::DATA_STATE, |
|
||||
'PLAINTEXT state' => Tokenizer::PLAINTEXT_STATE, |
|
||||
'RCDATA state' => Tokenizer::RCDATA_STATE, |
|
||||
'RAWTEXT state' => Tokenizer::RAWTEXT_STATE, |
|
||||
'Script data state' => Tokenizer::SCRIPT_DATA_STATE, |
|
||||
'CDATA section state' => Tokenizer::CDATA_SECTION_STATE, |
|
||||
]; |
|
||||
|
|
||||
protected function reverseDoubleEscape(string $str): string { |
|
||||
if (preg_match_all("/\\\\u([0-9a-f]{4})/i", $str, $matches)) { |
|
||||
for ($a = 0; $a < sizeof($matches[0]); $a++) { |
|
||||
$esc = $matches[0][$a]; |
|
||||
$chr = \MensBeam\Intl\Encoding\UTF8::encode(hexdec($matches[1][$a])); |
|
||||
$str = str_replace($esc, $chr, $str); |
|
||||
} |
|
||||
} |
|
||||
return $str; |
|
||||
} |
|
||||
|
|
||||
protected function combineCharacterTokens(array $tokens) : array { |
|
||||
$out = []; |
|
||||
$pending = null; |
|
||||
foreach ($tokens as $t) { |
|
||||
if ($t instanceof CharacterToken) { |
|
||||
if (!$pending) { |
|
||||
$pending = $t; |
|
||||
} else { |
|
||||
$pending->data .= $t->data; |
|
||||
} |
|
||||
} else { |
|
||||
if ($pending) { |
|
||||
$out[] = $pending; |
|
||||
$pending = null; |
|
||||
} |
|
||||
$out[] = $t; |
|
||||
} |
|
||||
} |
|
||||
if ($pending) { |
|
||||
$out[] = $pending; |
|
||||
} |
|
||||
return $out; |
|
||||
} |
|
||||
|
|
||||
protected function makeTokenTests(string ...$file): iterable { |
|
||||
foreach ($file as $path) { |
|
||||
$f = basename($path); |
|
||||
$testSet = json_decode(file_get_contents($path), true); |
|
||||
foreach ($testSet['tests'] ?? $testSet['xmlViolationTests'] as $index => $test) { |
|
||||
$testId = "$f #$index"; |
|
||||
if ($test['doubleEscaped'] ?? false) { |
|
||||
$test['input'] = $this->reverseDoubleEscape($test['input']); |
|
||||
for ($a = 0; $a < sizeof($test['output']); $a++) { |
|
||||
for ($b = 0; $b < sizeof($test['output'][$a]); $b++) { |
|
||||
if (is_string($test['output'][$a][$b])) { |
|
||||
$test['output'][$a][$b] = $this->reverseDoubleEscape($test['output'][$a][$b]); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
$test['initialStates'] = $test['initialStates'] ?? ["Data state"]; |
|
||||
// check if a test needs a patch due to trivial differences in implementation |
|
||||
$this->patchTest($testId, $test); |
|
||||
for ($a = 0; $a < sizeof($test['initialStates']); $a++) { |
|
||||
$tokens = []; |
|
||||
foreach ($test['output'] as $token) { |
|
||||
switch ($token[0]) { |
|
||||
case "DOCTYPE": |
|
||||
$t = new DOCTYPEToken((string) $token[1], (string) $token[2], (string) $token[3]); |
|
||||
$t->forceQuirks = !$token[4]; |
|
||||
$tokens[] = $t; |
|
||||
break; |
|
||||
case "StartTag": |
|
||||
$t = new StartTagToken($token[1], $token[3] ?? false); |
|
||||
foreach ($token[2] ?? [] as $name => $value) { |
|
||||
$t->setAttribute((string) $name, $value); |
|
||||
} |
|
||||
$tokens[] = $t; |
|
||||
break; |
|
||||
case "EndTag": |
|
||||
$tokens[] = new EndTagToken($token[1]); |
|
||||
break; |
|
||||
case "Character": |
|
||||
$tokens[] = new CharacterToken($token[1]); |
|
||||
break; |
|
||||
case "Comment": |
|
||||
$tokens[] = new CommentToken($token[1]); |
|
||||
break; |
|
||||
default: |
|
||||
throw new \Exception("Token type '{$token[0]}' not implemented in standard test interpreter"); |
|
||||
} |
|
||||
unset($t); |
|
||||
} |
|
||||
yield "$testId: {$test['description']} ({$test['initialStates'][$a]})" => [ |
|
||||
$test['input'], // input |
|
||||
$tokens, // output |
|
||||
self::STATE_MAP[$test['initialStates'][$a]], // initial state |
|
||||
$test['lastStartTag'] ?? null, // open element, if any |
|
||||
$test['errors'] ?? [], // errors, if any |
|
||||
]; |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
protected function patchTest(string $id, &$test): void { |
|
||||
switch ($id) { |
|
||||
// test emits input stream error first despite peeking |
|
||||
case "test3.test #30": |
|
||||
$test['errors'] = array_reverse($test['errors']); |
|
||||
break; |
|
||||
// eof-in-comment positions in some tests don't make sense |
|
||||
// https://github.com/html5lib/html5lib-tests/issues/125 |
|
||||
case "test3.test #143": |
|
||||
$test['errors'][0]['col'] = 10; |
|
||||
break; |
|
||||
case "test3.test #144": |
|
||||
case "test3.test #145": |
|
||||
case "test3.test #146": |
|
||||
$test['errors'][0]['line'] = 2; |
|
||||
$test['errors'][0]['col'] = 2; |
|
||||
break; |
|
||||
} |
|
||||
|
|
||||
} |
|
||||
} |
|
Loading…
Reference in new issue