Browse Source

Pushing forward on TreeBuilder

• Updated mensbeam/intl dependency.
• Moved scope methods from Element to OpenElementsStack. They don't need to be used outside of the parser and don't make sense there.
• Cleaned up parse errors. Displaying what is expected or found is not helpful.
ns
Dustin Wilson 6 years ago
parent
commit
0624e0be93
  1. 7
      composer.lock
  2. 91
      lib/DOM/Element.php
  3. 101
      lib/OpenElementsStack.php
  4. 18
      lib/ParseError.php
  5. 144
      lib/Tokenizer.php
  6. 173
      lib/TreeBuilder.php

7
composer.lock

@ -8,11 +8,11 @@
"packages": [
{
"name": "mensbeam/intl",
"version": "0.3.0",
"version": "0.4.0",
"source": {
"type": "git",
"url": "https://code.mensbeam.com/MensBeam/intl",
"reference": "61993bb900ebd6150c8b762c4b0158f0f4a5fc78"
"reference": "58328b7524b6889889ef0a60ab9e622b3d071a4a"
},
"require": {
"php": "^7.0"
@ -40,6 +40,7 @@
],
"description": "A set of dependency-free basic internationalization tools",
"keywords": [
"WHATWG",
"charset",
"encoding",
"internationalization",
@ -48,7 +49,7 @@
"utf-8",
"utf8"
],
"time": "2018-08-29T21:37:44+00:00"
"time": "2018-09-15T23:52:16+00:00"
}
],
"packages-dev": [],

91
lib/DOM/Element.php

@ -135,95 +135,4 @@ class Element extends \DOMElement {
return $s;
}
public function isInListItemScope(): bool {
$name = $this->name;
$ns = $this->namespaceURI;
# The stack of open elements is said to have a particular element in list item
# scope when it has that element in the specific scope consisting of the
# following element types:
#
# All the element types listed above for the has an element in scope
# algorithm.
# ol in the HTML namespace
# ul in the HTML namespace
return ($this->isInScope() || ($ns === '' && ($name === 'ol' || $name === 'ul'))) ? true : false;
}
public function isInButtonScope(): bool {
$name = $this->name;
$ns = $this->namespaceURI;
# The stack of open elements is said to have a particular element in button
# scope when it has that element in the specific scope consisting of the
# following element types:
#
# All the element types listed above for the has an element in scope
# algorithm.
# button in the HTML namespace
return ($this->isInScope() || ($ns === '' && $name === 'button')) ? true : false;
}
public function isInTableScope(): bool {
$name = $this->name;
# The stack of open elements is said to have a particular element in table scope
# when it has that element in the specific scope consisting of the following
# element types:
#
# html in the HTML namespace
# table in the HTML namespace
# template in the HTML namespace
return ($this->namespaceURI === '' && ($name === 'html' || $name === 'table' || $name === 'template')) ? true : false;
}
public function isInSelectScope(): bool {
$name = $this->name;
$ns = $this->namespaceURI;
# The stack of open elements is said to have a particular element in select
# scope when it has that element in the specific scope consisting of all element
# types except the following:
#
# optgroup in the HTML namespace
# option in the HTML namespace
return ($this->namespaceURI === '' && ($name === 'optgroup' || $name === 'option')) ? false : true;
}
protected function isInScope(): bool {
$name = $this->name;
$ns = $this->namespaceURI;
# The stack of open elements is said to have a particular element in scope when
# it has that element in the specific scope consisting of the following element
# types:
#
# applet
# caption
# html
# table
# td
# th
# marquee
# object
# template
# MathML mi
# MathML mo
# MathML mn
# MathML ms
# MathML mtext
# MathML annotation-xml
# SVG foreignObject
# SVG desc
# SVG title
return (($ns === '' && ($name === 'applet' || $name === 'caption' || $name === 'html' || $name === 'table' || $name === 'td' || $name === 'th' || $name === 'marquee' || $name === 'object' || $name === 'template')) ||
($ns === Parser::MATHML_NAMESPACE && ($name === 'mi' || $name === 'mo' || $name === 'mn' || $name === 'ms' || $name === 'mtext' || $name === 'annotation-xml')) ||
($ns === Parser::SVG_NAMESPACE && ($name === 'foreignObject' || $name === 'desc' || $name === 'title'))) ? true : false;
}
}

101
lib/OpenElementsStack.php

@ -84,19 +84,19 @@ class OpenElementsStack extends Stack {
protected function hasElementInScope(string $elementName, int $type): bool {
switch ($type) {
case 0: $func = 'isInListScope';
case 0: $func = 'isElementInListScope';
break;
case 1: $func = 'isInButtonScope';
case 1: $func = 'isElementInButtonScope';
break;
case 2: $func = 'isInTableScope';
case 2: $func = 'isElementInTableScope';
break;
case 3: $func = 'isInSelectScope';
case 3: $func = 'isElementInSelectScope';
break;
default: return false;
}
foreach (array_reverse($this->_storage) as $key => $value) {
if ($value->$func()) {
if ($this->$func($value)) {
return true;
}
}
@ -104,6 +104,97 @@ class OpenElementsStack extends Stack {
return false;
}
protected function isElementInListItemScope(Element $element): bool {
$name = $element->name;
$ns = $element->namespaceURI;
# The stack of open elements is said to have a particular element in list item
# scope when it has that element in the specific scope consisting of the
# following element types:
#
# All the element types listed above for the has an element in scope
# algorithm.
# ol in the HTML namespace
# ul in the HTML namespace
return ($this->isElementInScope($element) || ($ns === '' && ($name === 'ol' || $name === 'ul'))) ? true : false;
}
protected function isElementInButtonScope(Element $element): bool {
$name = $element->name;
$ns = $element->namespaceURI;
# The stack of open elements is said to have a particular element in button
# scope when it has that element in the specific scope consisting of the
# following element types:
#
# All the element types listed above for the has an element in scope
# algorithm.
# button in the HTML namespace
return ($this->isElementInScope($element) || ($ns === '' && $name === 'button')) ? true : false;
}
protected function isElementInTableScope(Element $element): bool {
$name = $element->name;
# The stack of open elements is said to have a particular element in table scope
# when it has that element in the specific scope consisting of the following
# element types:
#
# html in the HTML namespace
# table in the HTML namespace
# template in the HTML namespace
return ($element->namespaceURI === '' && ($name === 'html' || $name === 'table' || $name === 'template')) ? true : false;
}
protected function isElementInSelectScope(Element $element): bool {
$name = $element->name;
$ns = $element->namespaceURI;
# The stack of open elements is said to have a particular element in select
# scope when it has that element in the specific scope consisting of all element
# types except the following:
#
# optgroup in the HTML namespace
# option in the HTML namespace
return ($element->namespaceURI === '' && ($name === 'optgroup' || $name === 'option')) ? false : true;
}
protected function isElementInScope(Element $element): bool {
$name = $element->name;
$ns = $element->namespaceURI;
# The stack of open elements is said to have a particular element in scope when
# it has that element in the specific scope consisting of the following element
# types:
#
# applet
# caption
# html
# table
# td
# th
# marquee
# object
# template
# MathML mi
# MathML mo
# MathML mn
# MathML ms
# MathML mtext
# MathML annotation-xml
# SVG foreignObject
# SVG desc
# SVG title
return (($ns === '' && ($name === 'applet' || $name === 'caption' || $name === 'html' || $name === 'table' || $name === 'td' || $name === 'th' || $name === 'marquee' || $name === 'object' || $name === 'template')) ||
($ns === Parser::MATHML_NAMESPACE && ($name === 'mi' || $name === 'mo' || $name === 'mn' || $name === 'ms' || $name === 'mtext' || $name === 'annotation-xml')) ||
($ns === Parser::SVG_NAMESPACE && ($name === 'foreignObject' || $name === 'desc' || $name === 'title'))) ? true : false;
}
public function __get($property) {
$value = parent::__get($property);
if (!is_null($value)) {

18
lib/ParseError.php

@ -21,18 +21,18 @@ class ParseError {
const INVALID_NAMED_ENTITY = 14;
const INVALID_CODEPOINT = 15;
protected static $messages = ['Tag name expected; found %s',
'Unexpected end-of-file; %s expected',
'Unexpected "%s" character; %s expected',
protected static $messages = ['Tag name expected',
'Unexpected end-of-file',
'Unexpected "%s" character',
'%s attribute already exists; discarding',
'Unexpected end-of-tag; %s expected',
'Unexpected %s start tag; %s expected',
'Unexpected %s end tag; %s expected',
'Unexpected DOCTYPE; %s expected',
'Unexpected end-of-tag',
'Unexpected %s start tag',
'Unexpected %s end tag',
'Unexpected DOCTYPE',
'Invalid DOCTYPE',
'Invalid Control or Non-character; removing',
'Unexpected xmlns attribute value; %s expected',
'Unexpected "%s" character in entity; %s expected',
'Unexpected xmlns attribute value',
'Unexpected "%s" character in entity',
'"%s" is an invalid numeric entity',
'"%s" is an invalid name for an entity',
'"%s" is an invalid character codepoint'];

144
lib/Tokenizer.php

@ -431,7 +431,7 @@ class Tokenizer {
if ($char !== '') {
ParseError::trigger(ParseError::TAG_NAME_EXPECTED, $char);
} else {
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'tag name');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
}
$this->state = self::BOGUS_COMMENT_STATE;
@ -445,7 +445,7 @@ class Tokenizer {
if ($char !== '') {
ParseError::trigger(ParseError::TAG_NAME_EXPECTED, $char);
} else {
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'tag name');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
}
$this->state = self::DATA_STATE;
@ -491,7 +491,7 @@ class Tokenizer {
# Parse error. Switch to the data state. Emit a U+003C LESS-THAN SIGN character
# token and a U+002F SOLIDUS character token. Reconsume the EOF character.
// Making errors more expressive.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'tag name');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$this->data->unconsume();
return new CharacterToken('</');
@ -547,7 +547,7 @@ class Tokenizer {
if ($char !== '') {
ParseError::trigger(ParseError::TAG_NAME_EXPECTED, $char);
} else {
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'tag name');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
}
$this->state = self::DATA_STATE;
@ -1072,7 +1072,7 @@ class Tokenizer {
elseif ($char === '') {
# Switch to the data state. Parse error. Reconsume the EOF character.
$this->state = self::DATA_STATE;
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'script data');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->data->unconsume();
}
# Anything else
@ -1107,7 +1107,7 @@ class Tokenizer {
elseif ($char === '') {
# Switch to the data state. Parse error. Reconsume the EOF character.
$this->state = self::DATA_STATE;
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'script data');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->data->unconsume();
}
# Anything else
@ -1147,7 +1147,7 @@ class Tokenizer {
elseif ($char === '') {
# Switch to the data state. Parse error. Reconsume the EOF character.
$this->state = self::DATA_STATE;
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'script data');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->data->unconsume();
}
# Anything else
@ -1356,7 +1356,7 @@ class Tokenizer {
# EOF
elseif ($char === '') {
# Parse error. Switch to the data state. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'script data');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$this->data->unconsume();
}
@ -1489,7 +1489,7 @@ class Tokenizer {
# EOF
elseif ($char === '') {
# Parse error. Switch to the data state. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'attribute name');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$this->data->unconsume();
}
@ -1507,7 +1507,7 @@ class Tokenizer {
# attribute name state.
if ($char === '"' || $char === "'" || $char === '<' || $char === '=') {
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char, 'attribute name');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char);
}
// Need to add the current attribute to the token, if necessary.
@ -1573,7 +1573,7 @@ class Tokenizer {
# Append the current input character to the current attribute's name.
if ($char === '"' || $char === "'" || $char === '<' || $char === '=') {
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char, 'attribute name');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char);
}
// OPTIMIZATION: Will just check for alpha characters and strtolower the
@ -1648,7 +1648,7 @@ class Tokenizer {
# EOF
elseif ($char === '') {
# Parse error. Switch to the data state. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'attribute name, attribute value, or tag end');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$this->data->unconsume();
}
@ -1666,7 +1666,7 @@ class Tokenizer {
# attribute name state.
if ($char === '"' || $char === "'" || $char === '<' || $char === '=') {
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char, 'attribute name, attribute value, or tag end');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char);
}
// Need to add the current attribute to the token, if necessary.
@ -1714,7 +1714,7 @@ class Tokenizer {
# ">" (U+003E)
elseif ($char === '>') {
# Parse error. Switch to the data state. Emit the current tag token.
ParseError::trigger(ParseError::UNEXPECTED_END_OF_TAG, 'attribute value');
ParseError::trigger(ParseError::UNEXPECTED_END_OF_TAG);
$this->state = self::DATA_STATE;
// Need to add the current attribute to the token, if necessary.
@ -1728,7 +1728,7 @@ class Tokenizer {
# EOF
elseif ($char === '') {
# Parse error. Switch to the data state. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'attribute value');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$this->data->unconsume();
}
@ -1744,7 +1744,7 @@ class Tokenizer {
# the attribute value (unquoted) state.
if ($char === '<' || $char === '=' || $char === '`') {
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char, 'attribute value');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char);
}
$attribute->value .= $char;
@ -1783,7 +1783,7 @@ class Tokenizer {
# EOF
elseif ($char === '') {
# Parse error. Switch to the data state. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'attribute value');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$this->data->unconsume();
}
@ -1828,7 +1828,7 @@ class Tokenizer {
# EOF
elseif ($char === '') {
# Parse error. Switch to the data state. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'attribute value');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$this->data->unconsume();
}
@ -1892,7 +1892,7 @@ class Tokenizer {
}
# Parse error. Switch to the data state. Reconsume the EOF character.
elseif ($char === '') {
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'attribute value');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$this->data->unconsume();
}
@ -1909,7 +1909,7 @@ class Tokenizer {
# Append the current input character to the current attribute's value.
if ($char === '"' || $char === "'" || $char === '<' || $char === '=' || $char === '`') {
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char, 'attribute value');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char);
}
// OPTIMIZATION: Consume all characters that aren't listed above to prevent having
@ -1954,14 +1954,14 @@ class Tokenizer {
# EOF
elseif ($char === '') {
# Parse error. Switch to the data state. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'attribute name or tag end');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$this->data->unconsume();
}
# Anything else
else {
# Parse error. Switch to the before attribute name state. Reconsume the character.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char, 'attribute name or tag end');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char);
$this->state = self::BEFORE_ATTRIBUTE_NAME_STATE;
$this->data->unconsume();
}
@ -1992,14 +1992,14 @@ class Tokenizer {
# EOF
elseif ($char === '') {
# Parse error. Switch to the data state. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'tag end');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$this->data->unconsume();
}
# Anything else
else {
# Parse error. Switch to the before attribute name state. Reconsume the character.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char, 'tag end');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char);
$this->state = self::BEFORE_ATTRIBUTE_NAME_STATE;
$this->data->unconsume();
}
@ -2067,9 +2067,9 @@ class Tokenizer {
else {
$char = $this->data->consume();
if ($char !== '') {
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char, 'markup declaration');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char);
} else {
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'markup declaration');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
}
$this->state = self::BOGUS_COMMENT_STATE;
@ -2092,7 +2092,7 @@ class Tokenizer {
# ">" (U+003E)
elseif ($char === '>') {
# Parse error. Switch to the data state. Emit the comment token.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>', 'comment');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>');
$this->state = self::DATA_STATE;
return $token;
}
@ -2100,7 +2100,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Emit the comment token. Reconsume the EOF
# character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'comment');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$this->data->unconsume();
return $token;
@ -2129,7 +2129,7 @@ class Tokenizer {
# ">" (U+003E)
elseif ($char === '>') {
# Parse error. Switch to the data state. Emit the comment token.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>', 'comment');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>');
$this->state = self::DATA_STATE;
return $token;
}
@ -2137,7 +2137,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Emit the comment token. Reconsume the EOF
# character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'comment');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$this->data->unconsume();
return $token;
@ -2167,7 +2167,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Emit the comment token. Reconsume the EOF
# character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'comment');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$this->data->unconsume();
return $token;
@ -2198,7 +2198,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Emit the comment token. Reconsume the EOF
# character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'comment');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$this->data->unconsume();
return $token;
@ -2228,7 +2228,7 @@ class Tokenizer {
# "!" (U+0021)
elseif ($char === '!') {
# Parse error. Switch to the comment end bang state.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '!', 'comment end');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '!');
$this->state = self::COMMENT_END_BANG_STATE;
}
# "-" (U+002D)
@ -2239,7 +2239,7 @@ class Tokenizer {
// here every single time.
$char .= $this->data->consumeWhile('-');
for ($i = 0; $i < strlen($char); $i++) {
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '-', 'comment end');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '-');
}
$token->data .= $char;
@ -2248,7 +2248,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Emit the comment token. Reconsume the EOF
# character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'comment end');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$this->data->unconsume();
return $token;
@ -2257,7 +2257,7 @@ class Tokenizer {
else {
# Parse error. Append two "-" (U+002D) characters and the current input character
# to the comment token's data. Switch to the comment state.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char, 'comment end');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char);
$token->data .= '--'.$char;
$this->state = self::COMMENT_STATE;
}
@ -2287,7 +2287,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Emit the comment token. Reconsume the EOF
# character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'comment end');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$this->data->unconsume();
return $token;
@ -2324,7 +2324,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Create a new DOCTYPE token. Set its
# force-quirks flag to on. Emit the token. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'DOCTYPE');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$token = new DOCTYPEToken();
$token->forceQuirks = true;
@ -2334,7 +2334,7 @@ class Tokenizer {
# Anything else
else {
# Parse error. Switch to the before DOCTYPE name state. Reconsume the character.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char, 'DOCTYPE');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char);
$this->state = self::DOCTYPE_NAME_STATE;
$this->data->unconsume();
}
@ -2366,7 +2366,7 @@ class Tokenizer {
elseif ($char === '>') {
# Parse error. Create a new DOCTYPE token. Set its force-quirks flag to on. Switch
# to the data state. Emit the token.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>', 'DOCTYPE');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>');
$token = new DOCTYPEToken();
$token->forceQuirks = true;
$this->state = self::DATA_STATE;
@ -2376,7 +2376,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Create a new DOCTYPE token. Set its
# force-quirks flag to on. Emit the token. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'DOCTYPE');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$token = new DOCTYPEToken();
$token->forceQuirks = true;
@ -2428,7 +2428,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Set the DOCTYPE token's force-quirks flag
# to on. Emit that DOCTYPE token. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'DOCTYPE');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$token->forceQuirks = true;
$this->data->unconsume();
@ -2469,7 +2469,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Set the DOCTYPE token's force-quirks flag
# to on. Emit that DOCTYPE token. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'DOCTYPE name');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$token->forceQuirks = true;
$this->data->unconsume();
@ -2496,7 +2496,7 @@ class Tokenizer {
else {
// Need to unconsume what was consumed earlier.
$this->data->unconsume(5);
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char[0], 'DOCTYPE name');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char[0]);
$token->forceQuirks = true;
$this->state = self::BOGUS_DOCTYPE_STATE;
}
@ -2522,7 +2522,7 @@ class Tokenizer {
elseif ($char === '"') {
# Parse error. Set the DOCTYPE token's public identifier to the empty string (not
# missing), then switch to the DOCTYPE public identifier (double-quoted) state.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '"', 'DOCTYPE public keyword');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '"');
$token->public = '';
$this->state = self::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
}
@ -2530,7 +2530,7 @@ class Tokenizer {
elseif ($char === "'") {
# Parse error. Set the DOCTYPE token's public identifier to the empty string (not
# missing), then switch to the DOCTYPE public identifier (single-quoted) state.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, "'", 'DOCTYPE public keyword');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, "'");
$token->public = '';
$this->state = self::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
}
@ -2538,7 +2538,7 @@ class Tokenizer {
elseif ($char === '>') {
# Parse error. Set the DOCTYPE token's force-quirks flag to on. Switch to the data
# state. Emit that DOCTYPE token.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>', 'DOCTYPE public keyword');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>');
$token->forceQuirks = true;
$this->state = self::DATA_STATE;
return $token;
@ -2547,7 +2547,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Set the DOCTYPE token's force-quirks flag
# to on. Emit that DOCTYPE token. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'DOCTYPE public keyword');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$token->forceQuirks = true;
$this->data->unconsume();
@ -2557,7 +2557,7 @@ class Tokenizer {
else {
# Parse error. Set the DOCTYPE token's force-quirks flag to on. Switch to the
# bogus DOCTYPE state.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char, 'DOCTYPE public keyword');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char);
$token->forceQuirks = true;
$this->state = self::BOGUS_DOCTYPE_STATE;
}
@ -2595,7 +2595,7 @@ class Tokenizer {
elseif ($char === '>') {
# Parse error. Set the DOCTYPE token's force-quirks flag to on. Switch to the data
# state. Emit that DOCTYPE token.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>', 'DOCTYPE public identifier');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>');
$token->forceQuirks = true;
$this->state = self::DATA_STATE;
return $token;
@ -2604,7 +2604,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Set the DOCTYPE token's force-quirks flag
# to on. Emit that DOCTYPE token. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'DOCTYPE public identifier');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$token->forceQuirks = true;
$this->data->unconsume();
@ -2614,7 +2614,7 @@ class Tokenizer {
else {
# Parse error. Set the DOCTYPE token's force-quirks flag to on. Switch to the
# bogus DOCTYPE state.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char, 'DOCTYPE public identifier');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char);
$token->forceQuirks = true;
$this->state = self::BOGUS_DOCTYPE_STATE;
}
@ -2636,7 +2636,7 @@ class Tokenizer {
elseif ($char === '>') {
# Parse error. Set the DOCTYPE token's force-quirks flag to on. Switch to the data
# state. Emit that DOCTYPE token.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>', 'DOCTYPE public identifier');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>');
$token->forceQuirks = true;
$this->state = self::DATA_STATE;
return $token;
@ -2645,7 +2645,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Set the DOCTYPE token's force-quirks flag
# to on. Emit that DOCTYPE token. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'DOCTYPE public identifier');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$token->forceQuirks = true;
$this->data->unconsume();
@ -2677,7 +2677,7 @@ class Tokenizer {
elseif ($char === '>') {
# Parse error. Set the DOCTYPE token's force-quirks flag to on. Switch to the data
# state. Emit that DOCTYPE token.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>', 'DOCTYPE public identifier');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>');
$this->state = self::DATA_STATE;
return $token;
}
@ -2685,7 +2685,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Set the DOCTYPE token's force-quirks flag
# to on. Emit that DOCTYPE token. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'DOCTYPE public identifier');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$token->forceQuirks = true;
$this->data->unconsume();
@ -2740,7 +2740,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Set the DOCTYPE token's force-quirks flag
# to on. Emit that DOCTYPE token. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'DOCTYPE public identifier');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$token->forceQuirks = true;
$this->data->unconsume();
@ -2750,7 +2750,7 @@ class Tokenizer {
else {
# Parse error. Set the DOCTYPE token's force-quirks flag to on. Switch to the
# bogus DOCTYPE state.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char, 'DOCTYPE public identifier');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char);
$token->forceQuirks = true;
$this->state = self::BOGUS_DOCTYPE_STATE;
}
@ -2829,7 +2829,7 @@ class Tokenizer {
elseif ($char === '"') {
# Parse error. Set the DOCTYPE token's system identifier to the empty string (not
# missing), then switch to the DOCTYPE system identifier (double-quoted) state.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '"', 'DOCTYPE system keyword');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '"');
$token->system = '';
$this->state = self::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
}
@ -2837,7 +2837,7 @@ class Tokenizer {
elseif ($char === "'") {
# Parse error. Set the DOCTYPE token's system identifier to the empty string (not
# missing), then switch to the DOCTYPE system identifier (single-quoted) state.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, "'", 'DOCTYPE system keyword');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, "'");
$token->system = '';
$this->state = self::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
}
@ -2845,7 +2845,7 @@ class Tokenizer {
elseif ($char === '>') {
# Parse error. Set the DOCTYPE token's force-quirks flag to on. Switch to the data
# state. Emit that DOCTYPE token.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>', 'DOCTYPE system keyword');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>');
$token->forceQuirks = true;
$this->state = self::DATA_STATE;
return $token;
@ -2854,7 +2854,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Set the DOCTYPE token's force-quirks flag
# to on. Emit that DOCTYPE token. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'DOCTYPE system keyword');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$token->forceQuirks = true;
$this->data->unconsume();
@ -2864,7 +2864,7 @@ class Tokenizer {
else {
# Parse error. Set the DOCTYPE token's force-quirks flag to on. Switch to the
# bogus DOCTYPE state.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char, 'DOCTYPE system keyword');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char);
$token->forceQuirks = true;
$this->state = self::BOGUS_DOCTYPE_STATE;
}
@ -2902,7 +2902,7 @@ class Tokenizer {
elseif ($char === '>') {
# Parse error. Set the DOCTYPE token's force-quirks flag to on. Switch to the data
# state. Emit that DOCTYPE token.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>', 'DOCTYPE system identifier');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>');
$token->forceQuirks = true;
$this->state = self::DATA_STATE;
return $token;
@ -2911,7 +2911,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Set the DOCTYPE token's force-quirks flag
# to on. Emit that DOCTYPE token. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'DOCTYPE system identifier');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$token->forceQuirks = true;
$this->data->unconsume();
@ -2921,7 +2921,7 @@ class Tokenizer {
else {
# Parse error. Set the DOCTYPE token's force-quirks flag to on. Switch to the
# bogus DOCTYPE state.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char, 'DOCTYPE system identifier');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char);
$token->forceQuirks = true;
$this->state = self::BOGUS_DOCTYPE_STATE;
}
@ -2943,7 +2943,7 @@ class Tokenizer {
elseif ($char === '>') {
# Parse error. Set the DOCTYPE token's force-quirks flag to on. Switch to the data
# state. Emit that DOCTYPE token.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>', 'DOCTYPE system identifier');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>');
$this->state = self::DATA_STATE;
return $token;
}
@ -2951,7 +2951,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Set the DOCTYPE token's force-quirks flag
# to on. Emit that DOCTYPE token. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'DOCTYPE system identifier');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$token->forceQuirks = true;
$this->data->unconsume();
@ -2983,7 +2983,7 @@ class Tokenizer {
elseif ($char === '>') {
# Parse error. Set the DOCTYPE token's force-quirks flag to on. Switch to the data
# state. Emit that DOCTYPE token.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>', 'DOCTYPE system identifier');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, '>');
$this->state = self::DATA_STATE;
return $token;
}
@ -2991,7 +2991,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Set the DOCTYPE token's force-quirks flag
# to on. Emit that DOCTYPE token. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'DOCTYPE system identifier');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$token->forceQuirks = true;
$this->data->unconsume();
@ -3046,7 +3046,7 @@ class Tokenizer {
elseif ($char === '') {
# Parse error. Switch to the data state. Set the DOCTYPE token's force-quirks flag
# to on. Emit that DOCTYPE token. Reconsume the EOF character.
ParseError::trigger(ParseError::UNEXPECTED_EOF, 'DOCTYPE system identifier');
ParseError::trigger(ParseError::UNEXPECTED_EOF);
$this->state = self::DATA_STATE;
$token->forceQuirks = true;
$this->data->unconsume();
@ -3056,7 +3056,7 @@ class Tokenizer {
else {
# Parse error. Set the DOCTYPE token's force-quirks flag to on. Switch to the
# bogus DOCTYPE state.
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char, 'DOCTYPE system identifier');
ParseError::trigger(ParseError::UNEXPECTED_CHARACTER, $char);
$token->forceQuirks = true;
$this->state = self::BOGUS_DOCTYPE_STATE;
}

173
lib/TreeBuilder.php

@ -385,16 +385,15 @@ class TreeBuilder {
// DEVIATION: There is no iframe srcdoc document because there are no nested
// browsing contexts in this implementation.
switch (get_class($token)) {
case 'StartTagToken': $errorType = ParseError::UNEXPECTED_START_TAG;
case 'StartTagToken': ParseError::trigger(ParseError::UNEXPECTED_START_TAG, $token->name);
break;
case 'EndTagToken': $errorType = ParseError::UNEXPECTED_END_TAG;
case 'EndTagToken': ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $token->name);
break;
case 'EOFToken': $errorType = ParseError::UNEXPECTED_EOF;
case 'EOFToken': ParseError::trigger(ParseError::UNEXPECTED_EOF);
break;
default: throw new Exception(Exception::UNKNOWN_ERROR);
}
ParseError::trigger($errorType, 'doctype');
$this->quirksMode = self::QUIRKS_MODE_ON;
# In any case, switch the insertion mode to "before html", then reprocess the
@ -409,7 +408,7 @@ class TreeBuilder {
case self::BEFORE_HTML_MODE:
# A DOCTYPE token
if ($token instanceof DOCTYPEToken) {
ParseError::trigger(ParseError::UNEXPECTED_DOCTYPE, 'html start tag, comment');
ParseError::trigger(ParseError::UNEXPECTED_DOCTYPE);
}
# A comment token
elseif ($token instanceof CommentToken) {
@ -436,7 +435,7 @@ class TreeBuilder {
# Any other end tag
elseif ($token instanceof EndTagToken && $token->name !== 'head' && $token->name !== 'body' && $token->name !== 'html' && $token->name !== 'br') {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $token->name, 'head, body, html, br end tag');
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $token->name);
}
# An end tag whose tag name is one of: "head", "body", "html", "br"
# Anything else
@ -475,7 +474,7 @@ class TreeBuilder {
# A DOCTYPE token
elseif ($token instanceof DOCTYPEToken) {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_DOCTYPE, 'head tag');
ParseError::trigger(ParseError::UNEXPECTED_DOCTYPE);
}
elseif ($token instanceof StartTagToken) {
# A start tag whose tag name is "html"
@ -498,7 +497,7 @@ class TreeBuilder {
# Any other end tag
elseif ($token instanceof EndTagToken && $token->name !== 'head' && $token->name !== 'body' && $token->name !== 'html' && $token->name === 'br') {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $token->name, 'head, body, html, br end tag');
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $token->name);
}
# An end tag whose tag name is one of: "head", "body", "html", "br"
# Anything else
@ -535,7 +534,7 @@ class TreeBuilder {
# A DOCTYPE token
elseif ($token instanceof DOCTYPEToken) {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_DOCTYPE, 'head data');
ParseError::trigger(ParseError::UNEXPECTED_DOCTYPE);
}
elseif ($token instanceof StartTagToken) {
# A start tag whose tag name is "html"
@ -648,7 +647,7 @@ class TreeBuilder {
# A start tag whose tag name is "head"
elseif ($token->name === 'head') {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, 'head', 'base, basefont, bgsound, link, meta, title, noframes, style, noscript, script, template start tag');
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, 'head');
}
# Anything else
else {
@ -691,7 +690,7 @@ class TreeBuilder {
# If there is no template element on the stack of open elements, then this is a
# parse error; ignore the token.
if ($this->stack->search('template') === -1) {
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'template', (string)$this->stack.' end tag');
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'template');
}
# Otherwise, run these steps:
else {
@ -700,7 +699,7 @@ class TreeBuilder {
# 2. If the current node is not a template element, then this is a parse error.
if ($this->stack->currentNodeName !== 'template') {
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'template', (string)$this->stack.' end tag');
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'template');
}
# 3. Pop elements from the stack of open elements until a template element has been popped from the stack.
@ -721,7 +720,7 @@ class TreeBuilder {
# Any other end tag
else {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $token->name, (string)$this->stack.' end tag');
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $token->name);
}
}
# Anything else
@ -742,7 +741,7 @@ class TreeBuilder {
# DOCTYPE token
if ($token instanceof DOCTYPEToken) {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_DOCTYPE, 'head data');
ParseError::trigger(ParseError::UNEXPECTED_DOCTYPE);
}
elseif ($token instanceof StartTagToken) {
# A start tag whose tag name is "html"
@ -761,14 +760,14 @@ class TreeBuilder {
# A start tag whose tag name is one of: "head", "noscript"
elseif ($token->name === 'head' || $token->name === 'noscript') {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, $token->name, 'basefont, bgsound, link, meta, noframes, style start tag');
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, $token->name);
}
# Anything else
else {
# Act as described in the "anything else" entry below.
#
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, $token->name, 'basefont, bgsound, link, meta, noframes, style start tag');
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, $token->name);
# Pop the current node (which will be a noscript element) from the stack of open
# elements; the new current node will be a head element.
$this->stack->pop();
@ -793,7 +792,7 @@ class TreeBuilder {
# Act as described in the "anything else" entry below.
#
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'br', (string)$this->stack.' end tag');
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'br');
# Pop the current node (which will be a noscript element) from the stack of open
# elements; the new current node will be a head element.
$this->stack->pop();
@ -806,7 +805,7 @@ class TreeBuilder {
# Any other end tag
else {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'br', (string)$this->stack.' end tag');
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'br');
}
}
# A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED
@ -823,7 +822,7 @@ class TreeBuilder {
# Anything else
else {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'br', (string)$this->stack.' end tag');
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'br');
# Pop the current node (which will be a noscript element) from the stack of open
# elements; the new current node will be a head element.
$this->stack->pop();
@ -853,7 +852,7 @@ class TreeBuilder {
# A DOCTYPE token
elseif ($token instanceof DOCTYPEToken) {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_DOCTYPE, 'body, frameset start tag');
ParseError::trigger(ParseError::UNEXPECTED_DOCTYPE);
}
elseif ($token instanceof StartTagToken) {
# A start tag whose tag name is "html"
@ -882,7 +881,7 @@ class TreeBuilder {
# "meta", "noframes", "script", "style", "template", "title"
elseif ($token->name === 'base' || $token->name === 'basefont' || $token->name === 'bgsound' || $token->name === 'link' || $token->name === 'meta' || $token->name === 'noframes' || $token->name === 'script' || $token->name === 'style' || $token->name === 'template' || $token->name === 'title') {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, $token->name, 'body, frameset start tag');
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, $token->name);
# Push the node pointed to by the head element pointer onto the stack of open elements.
$this->stack[] = $this->headElement;
# Process the token using the rules for the "in head" insertion mode.
@ -898,7 +897,7 @@ class TreeBuilder {
# A start tag whose tag name is "head"
elseif ($token->name === 'head') {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, 'head', 'body, frameset start tag');
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, 'head');
}
# Any other start tag
else {
@ -935,7 +934,7 @@ class TreeBuilder {
# Any other end tag
else {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'head', 'body, frameset end tag');
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'head');
}
}
# Anything else
@ -980,13 +979,13 @@ class TreeBuilder {
# A DOCTYPE token
elseif ($token instanceof DOCTYPEToken) {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_DOCTYPE, 'body content');
ParseError::trigger(ParseError::UNEXPECTED_DOCTYPE);
}
elseif ($token instanceof StartTagToken) {
# A start tag whose tag name is "html"
if ($token->name === 'html') {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, 'html', 'body content');
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, 'html');
# If there is a template element on the stack of open elements, then ignore the
# token.
if ($this->stack->search('template') === -1) {
@ -1011,7 +1010,7 @@ class TreeBuilder {
# A start tag whose tag name is "body"
elseif ($token->name === 'body') {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, 'body', 'body content');
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, 'body');
# If the second element on the stack of open elements is not a body element, if
# the stack of open elements has only one node on it, or if there is a template
# element on the stack of open elements, then ignore the token. (fragment case)
@ -1033,7 +1032,7 @@ class TreeBuilder {
# A start tag whose tag name is "frameset"
elseif ($token->name === 'frameset') {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, 'frameset', 'body content');
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, 'frameset');
# If the stack of open elements has only one node on it, or if the second
# element on the stack of open elements is not a body element, then ignore the
@ -1087,7 +1086,7 @@ class TreeBuilder {
$currentNodeName = $this->stack->currentNodeName;
$currentNodeNamespace = $this->stack->currentNodeNamespace;
if ($currentNodeNamespace === '' && ($currentNodeName === 'h1' || $currentNodeName === 'h2' || $currentNodeName === 'h3' || $currentNodeName === 'h4' || $currentNodeName === 'h5' || $currentNodeName === 'h6')) {
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, $token->name, $currentNodeName . ' content or end tag');
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, $token->name);
$this->stack->pop();
}
@ -1112,20 +1111,95 @@ class TreeBuilder {
# token and move on to the next one. (Newlines at the start of pre blocks are
# ignored as an authoring convenience.)
$nextToken = $this->tokenizer->createToken();
if ($token instanceof CharacterToken) {
if ($nextToken instanceof CharacterToken) {
// Character tokens in this implementation can have more than one character in
// them.
if (strlen($token->data) === 1 && $token->data === "\n") {
if (strlen($nextToken->data) === 1 && $nextToken->data === "\n") {
return true;
} elseif (strpos($token->data, "\n") === 0) {
$token->data = substr($token->data, 1);
} elseif (strpos($nextToken->data, "\n") === 0) {
$nextToken->data = substr($nextToken->data, 1);
}
}
// Process the next token
// Process the next token
$token = $nextToken;
continue 2;
}
# A start tag whose tag name is "form"
elseif ($token->name === 'form') {
# If the form element pointer is not null, and there is no template element on
# the stack of open elements, then this is a parse error; ignore the token.
$templateInStack = ($this->stack->search('template') !== -1);
if (!is_null($this->formElement) && !$templateInStack) {
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, $token->name);
}
# Otherwise:
else {
# If the stack of open elements has a p element in button scope, then close a p
# element.
if ($this->stack->hasElementInButtonScope('p')) {
$this->closePElement();
}
# Insert an HTML element for the token, and, if there is no template element on
# the stack of open elements, set the form element pointer to point to the
# element created.
$form = $this->insertStartTagToken($token);
if ($templateInStack) {
$this->formElement = $form;
}
}
}
# A start tag whose tag name is "li"
elseif ($token->name === 'li') {
# 1. Set the frameset-ok flag to "not ok".
$this->framesetOk = false;
# 2. Initialize node to be the current node (the bottommost node of the stack).
# 3. Loop: If node is an li element, then run these substeps:
for ($i = $this->stack->length - 1; $i >= 0; $i--) {
$node = $this->stack[$i];
$nodeName = $node->nodeName;
if ($nodeName === 'li') {
# 1. Generate implied end tags, except for li elements.
$this->stack->generateImpliedEndTags('li');
# 2. If the current node is not an li element, then this is a parse error.
$currentNodeName = $this->stack->currentNodeName;
if ($currentNodeName !== 'li') {
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, $currentNodeName);
}
# 3. Pop elements from the stack of open elements until an li element has been
# popped from the stack.
do {
$poppedNodeName = $this->stack->pop()->nodeName;
} while ($poppedNodeName !== 'li');
# 4. Jump to the step labeled Done below.
break;
}
# 4. If node is in the special category, but is not an address, div, or p
# element, then jump to the step labeled Done below.
elseif ($nodeName !== 'address' && $nodeName !== 'div' && $nodeName !== 'p' && $this->isElementSpecial($node)) {
break;
}
# 5. Otherwise, set node to the previous entry in the stack of open elements and
# return to the step labeled Loop.
// The loop handles that.
}
# 6. Done: If the stack of open elements has a p element in button scope, then
# close a p element.
if ($this->stack->hasElementInButtonScope('p')) {
$this->closePElement();
}
# 7. Finally, insert an HTML element for the token.
$this->insertStartTagToken($token);
}
}
elseif ($token instanceof EndTagToken) {
# An end tag whose tag name is "template"
@ -1140,7 +1214,7 @@ class TreeBuilder {
# If the stack of open elements does not have a body element in scope, this is a
# parse error; ignore the token.
if ($this->stack->search('body') === -1) {
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'body', (string)$this->stack.' end tag');
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'body');
}
# Otherwise, if there is a node in the stack of open elements that is not either
# a dd element, a dt element, an li element, an optgroup element, an option
@ -1157,7 +1231,7 @@ class TreeBuilder {
return false;
}) !== -1) {
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'body', (string)$this->stack.' end tag');
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'body');
break;
}
@ -1195,7 +1269,7 @@ class TreeBuilder {
return false;
}) !== -1) {
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'body', (string)$this->stack.' end tag');
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, 'body');
break;
}
@ -1246,7 +1320,7 @@ class TreeBuilder {
# A DOCTYPE token
elseif ($token instanceof DOCTYPEToken) {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_DOCTYPE, 'Character, Comment, Start Tag, or End Tag');
ParseError::trigger(ParseError::UNEXPECTED_DOCTYPE);
}
elseif ($token instanceof StartTagToken) {
# A start tag whose tag name is one of: "b", "big", "blockquote", "body", "br",
@ -1263,7 +1337,7 @@ class TreeBuilder {
)
) {
# Parse error.
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, $token->name, 'Non-HTML start tag');
ParseError::trigger(ParseError::UNEXPECTED_START_TAG, $token->name);
# If the parser was originally created for the HTML fragment parsing algorithm,
# then act as described in the "any other start tag" entry below. (fragment
@ -1586,7 +1660,7 @@ class TreeBuilder {
# 2. If node is not an element with the same tag name as the token, then this is
# a parse error.
if ($nodeName !== $token->name) {
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $token->name, "$nodeName end tag");
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $token->name);
}
# 3. Loop: If node's tag name, converted to ASCII lowercase, is the same as the
# tag name of the token, pop elements from the stack of open elements until node
@ -1783,7 +1857,7 @@ class TreeBuilder {
}
}
public static function insertStartTagToken(StartTagToken $token, \DOMNode $intendedParent = null, string $namespace = null) {
public static function insertStartTagToken(StartTagToken $token, \DOMNode $intendedParent = null, string $namespace = null): Element {
if (!is_null($namespace)) {
$namespace = $token->namespace;
}
@ -2094,7 +2168,7 @@ class TreeBuilder {
# 2. If the current node is not a p element, then this is a parse error.
$currentNodeName = $this->stack->currentNodeName;
if ($currentNodeName !== 'p') {
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $currentNodeName, (string)$this->stack . ' end tag');
ParseError::trigger(ParseError::UNEXPECTED_END_TAG, $currentNodeName);
}
# 3. Pop elements from the stack of open elements until a p element has been
# popped from the stack.
@ -2102,4 +2176,21 @@ class TreeBuilder {
$poppedNodeName = $this->stack->pop()->nodeName;
} while ($poppedNodeName !== 'p');
}
protected function isElementSpecial(Element $element): bool {
$name = $element->nodeName;
$ns = $element->namespaceURI;
# The following elements have varying levels of special parsing rules: HTML’s
# address, applet, area, article, aside, base, basefont, bgsound, blockquote,
# body, br, button, caption, center, col, colgroup, dd, details, dir, div, dl,
# dt, embed, fieldset, figcaption, figure, footer, form, frame, frameset, h1,
# h2, h3, h4, h5, h6, head, header, hr, html, iframe, img, input, li, link,
# listing, main, marquee, meta, nav, noembed, noframes, noscript, object, ol, p,
# param, plaintext, pre, script, section, select, source, style, summary, table,
# tbody, td, template, textarea, tfoot, th, thead, title, tr, track, ul, wbr,
# xmp; MathML mi, MathML mo, MathML mn, MathML ms, MathML mtext, and MathML
# annotation-xml; and SVG foreignObject, SVG desc, and SVG title.
return (($ns === '' && ($name === 'address' || $name === 'applet' || $name === 'area' || $name === 'article' || $name === 'aside' || $name === 'base' || $name === 'basefont' || $name === 'bgsound' || $name === 'blockquote' || $name === 'body' || $name === 'br' || $name === 'button' || $name === 'caption' || $name === 'center' || $name === 'col' || $name === 'colgroup' || $name === 'dd' || $name === 'details' || $name === 'dir' || $name === 'div' || $name === 'dl' || $name === 'dt' || $name === 'embed' || $name === 'fieldset' || $name === 'figcaption' || $name === 'figure' || $name === 'footer' || $name === 'form' || $name === 'frame' || $name === 'frameset' || $name === 'h1' || $name === 'h2' || $name === 'h3' || $name === 'h4' || $name === 'h5' || $name === 'h6' || $name === 'head' || $name === 'header' || $name === 'hr' || $name === 'html' || $name === 'iframe' || $name === 'img' || $name === 'input' || $name === 'li' || $name === 'link' || $name === 'listing' || $name === 'main' || $name === 'marquee' || $name === 'meta' || $name === 'nav' || $name === 'noembed' || $name === 'noframes' || $name === 'noscript' || $name === 'object' || $name === 'ol' || $name === 'p' || $name === 'param' || $name === 'plaintext' || $name === 'pre' || $name === 'script' || $name === 'section' || $name === 'select' || $name === 'source' || $name === 'style' || $name === 'summary' || $name === 'table' || $name === 'tbody' || $name === 'td' || $name === 'template' || $name === 'textarea' || $name === 'tfoot' || $name === 'th' || $name === 'thead' || $name === 'title' || $name === 'tr' || $name === 'track' || $name === 'ul' || $name === 'wbr' || $name === 'xmp')) || ($ns === Parser::MATHML_NAMESPACE && ($name === 'mi' || $name === 'mo' || $name === 'mn' || $name === 'ms' || $name === 'mtext' || $name === 'annotation-xml')) || ($ns === Parser::SVG_NAMESPACE && ($name === 'foreignObject' || $name === 'desc' || $name === 'title'));
}
}
Loading…
Cancel
Save