Browse Source

Spec-related update

See https://github.com/whatwg/html/pull/8271

Also removed test patches which are no longer needed, and fixed a bug
exposed by new tests

The testsuite includes tests for the new <search> element, but this is
not included in the specification yet, so the tests have been suppressed
domparser
J. King 1 year ago
parent
commit
da0a4b12e3
  1. 2
      lib/Parser/Tokenizer.php
  2. 6
      lib/Parser/TreeConstructor.php
  3. 26
      tests/cases/TestTokenizer.php
  4. 21
      tests/cases/TestTreeConstructor.php

2
lib/Parser/Tokenizer.php

@ -276,7 +276,7 @@ class Tokenizer {
// DEVIATION: Character reference consumption implemented as a function
$outChar = $this->switchToCharacterReferenceState(self::DATA_STATE);
if (strspn($outChar, Data::WHITESPACE)) {
yield new WhitespaceToken($outChar); // a character reference is either all whitespace is no whitespace
yield new WhitespaceToken($outChar); // a character reference is either all whitespace or is not whitespace
} else {
yield new CharacterToken($outChar);
}

6
lib/Parser/TreeConstructor.php

@ -1076,7 +1076,7 @@ class TreeConstructor {
# 2. Generate implied end tags.
$this->stack->generateImpliedEndTags();
# 3. If the current node is not a form element, then this is a parse error.
if (!$this->stack->currentNodeName !== 'form') {
if ($this->stack->currentNodeName !== 'form') {
$this->error(ParseError::UNEXPECTED_END_TAG, $token->name);
}
# 4. Pop elements from the stack of open elements until a form element has been
@ -2036,8 +2036,8 @@ class TreeConstructor {
// mode as it may have been turned on in a previous evluation
// of this mode
$this->fosterParenting = false;
# A character token, if the current node is table, tbody, tfoot, thead, or tr element
if ($token instanceof CharacterToken && in_array($this->stack->currentNodeName, ["table", "tbody", "tfoot", "thead", "tr"])) {
# A character token, if the current node is table, tbody, template, tfoot, thead, or tr element
if ($token instanceof CharacterToken && in_array($this->stack->currentNodeName, ["table", "tbody", "template", "tfoot", "thead", "tr"])) {
# Let the pending table character tokens be an empty list of tokens.
$this->pendingTableCharacterTokens = [];
# Let the original insertion mode be the current insertion mode.

26
tests/cases/TestTokenizer.php

@ -235,32 +235,6 @@ class TestTokenizer extends \PHPUnit\Framework\TestCase {
case ["<!\u{B}", ["Data state"]]:
$test['errors'] = array_reverse($test['errors']);
break;
// eof-in-<whatever> positions in some tests don't make sense
// https://github.com/html5lib/html5lib-tests/issues/125
case ["", ["CDATA section state"]]:
// there is no position 2
$test['errors'][0]['col']--;
break;
case ["\u{A}", ["CDATA section state"]]:
// the line break is, for some reason, not counted in the test
$test['errors'][0]['line']++;
$test['errors'][0]['col'] = 1;
break;
case ["<!----!\r\n>", ["Data state"]]:
case ["<!----!\n>", ["Data state"]]:
case ["<!----!\r>", ["Data state"]]:
// the line break is, for some reason, not counted in the test
$test['errors'][0]['line']++;
$test['errors'][0]['col'] = 2;
break;
case ["<!----! >", ["Data state"]]:
$test['errors'][0]['col']++;
break;
case [hex2bin("f4808080"), ["CDATA section state"]]:
case [hex2bin("3bf4808080"), ["CDATA section state"]]:
// malpaired surrogates count as two characters
$test['errors'][0]['col']++;
break;
}
}
}

21
tests/cases/TestTreeConstructor.php

@ -33,13 +33,19 @@ class TestTreeConstructor extends \PHPUnit\Framework\TestCase {
protected $depth;
protected $ns;
protected static $passed = [];
/** @dataProvider provideStandardTreeTests */
public function testStandardTreeTests(string $data, array $exp, array $errors, $fragment): void {
public function testStandardTreeTests(string $data, array $exp, array $errors, $fragment, string $id): void {
$this->runTreeTest($data, $exp, $errors, $fragment, null);
self::$passed[$id] = true;
}
/** @dataProvider provideStandardTreeTests */
public function testStandardTreeTestsWithHtmlNamespace(string $data, array $exp, array $errors, $fragment): void {
public function testStandardTreeTestsWithHtmlNamespace(string $data, array $exp, array $errors, $fragment, string $id): void {
if (!isset(self::$passed[$id])) {
$this->markTestSkipped("Null-namespaced test failed or skipped.");
}
$config = new Config;
$config->htmlNamespace = true;
$this->runTreeTest($data, $exp, $errors, $fragment, $config);
@ -49,7 +55,13 @@ class TestTreeConstructor extends \PHPUnit\Framework\TestCase {
$files = new \AppendIterator();
$files->append(new \GlobIterator(\MensBeam\HTML\Parser\BASE."tests/html5lib-tests/tree-construction/*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
$files->append(new \GlobIterator(\MensBeam\HTML\Parser\BASE."tests/cases/tree-construction/mensbeam*.dat", \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME));
return $this->parseTreeTest($files);
// filter out tests for the prospective <search> element, which has not yet been added to HTML
$filtered = new class($files) extends \FilterIterator {
public function accept(): bool {
return !preg_match('/\bsearch-element.dat$/', parent::current());
}
};
return $this->parseTreeTest($filtered);
}
/** @dataProvider provideProcessingInstructionTreeTests */
@ -312,7 +324,8 @@ class TestTreeConstructor extends \PHPUnit\Framework\TestCase {
if (!$script) {
// scripting-dependent tests are skipped entirely since we will not support scripting
$errors = ['old' => $errors, 'new' => $newErrors];
yield basename($file)." #$index (line $pos)" => [$data, $exp, $errors, $fragment];
$id = basename($file)." #$index (line $pos)";
yield $id => [$data, $exp, $errors, $fragment, $id];
}
$l++;
$index++;

Loading…
Cancel
Save