A modern, accurate HTML parser and serializer for PHP
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

48 lines
1.6 KiB

namespace dW\HTML5\TestCase;
use dW\HTML5\Data;
use dW\HTML5\EOFToken;
use dW\HTML5\OpenElementsStack;
use dW\HTML5\Tokenizer;
class TestTokenizer extends \dW\HTML5\Test\StandardTest {
const DEBUG = false;
public function setUp(): void {
if (self::DEBUG) {
Tokenizer::$debug = true;
/** @dataProvider provideStandardTokenizerTests */
public function testStandardTokenizerTests(string $input, array $expected, int $state, string $open = null, array $errors) {
$data = new Data($input);
$stack = new OpenElementsStack();
if ($open) {
$stack[] = (new \DOMDocument)->createElement($open);
$tokenizer = new Tokenizer($data, $stack);
$tokenizer->state = $state;
$actual = [];
do {
$t = $tokenizer->createToken();
$actual[] = $t;
} while (!($t instanceof EOFToken));
$actual = $this->combineCharacterTokens($actual);
$this->assertEquals($expected, $actual);
public function provideStandardTokenizerTests() {
$tests = [];
$blacklist = ["pendingSpecChanges.test", "xmlViolation.test"];
foreach (new \GlobIterator(\dW\HTML5\BASE."tests/html5lib-tests/tokenizer/*.test", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME) as $file) {
if (!in_array(basename($file), $blacklist)) {
$tests[] = $file;
return $this->makeTokenTests(...$tests);