Fix a few tree tests
This commit is contained in:
parent
bb4002abcb
commit
4e5fd35775
4 changed files with 45 additions and 73 deletions
|
@ -78,15 +78,9 @@ class OpenElementsStack extends Stack {
|
|||
$this->_storage = array_values($this->_storage);
|
||||
}
|
||||
|
||||
public function generateImpliedEndTags($exclude = []) {
|
||||
public function generateImpliedEndTags(array $exclude = []) {
|
||||
$tags = ['caption', 'colgroup', 'dd', 'dt', 'li', 'optgroup', 'option', 'p', 'rb', 'rp', 'rt', 'rtc', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'];
|
||||
|
||||
if (is_string($exclude)) {
|
||||
$exclude = [$exclude];
|
||||
}
|
||||
|
||||
assert(is_array($exclude), new Exception(Exception::STACK_STRING_ARRAY_EXPECTED));
|
||||
|
||||
if (count($exclude) > 0) {
|
||||
$modified = false;
|
||||
foreach ($exclude as $e) {
|
||||
|
|
|
@ -5,6 +5,7 @@ namespace dW\HTML5;
|
|||
class ParseError {
|
||||
protected $data;
|
||||
|
||||
// tokenization parse errors; these have been standardized
|
||||
const ENCODING_ERROR = 100;
|
||||
const UNEXPECTED_NULL_CHARACTER = 101;
|
||||
const UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME = 102;
|
||||
|
@ -53,9 +54,19 @@ class ParseError {
|
|||
const CONTROL_CHARACTER_REFERENCE = 145;
|
||||
const SURROGATE_IN_INPUT_STREAM = 146;
|
||||
const NONCHARACTER_IN_INPUT_STREAM = 147;
|
||||
const CONTROL_CHARACTER_IN_INPUT_STREAM = 148;
|
||||
const CONTROL_CHARACTER_IN_INPUT_STREAM = 148;
|
||||
// tree construction parse errors; these have not been standardized, but html5lib's error names are likely to become standard in future
|
||||
const EXPECTED_DOCTYPE_BUT_GOT_START_TAG = 200;
|
||||
const EXPECTED_DOCTYPE_BUT_GOT_END_TAG = 201;
|
||||
const EXPECTED_DOCTYPE_BUT_GOT_CHARS = 202;
|
||||
const UNEXPECTED_END_TAG = 203; // html5lib also uses 'adoption-agency-1.2' and 'adoption-agency-1.3' for this
|
||||
|
||||
const MESSAGES = [
|
||||
self::EXPECTED_DOCTYPE_BUT_GOT_START_TAG => 'Expected DOCTYPE but got start tag',
|
||||
self::EXPECTED_DOCTYPE_BUT_GOT_END_TAG => 'Expected DOCTYPE but got end tag',
|
||||
self::EXPECTED_DOCTYPE_BUT_GOT_CHARS => 'Expected DOCTYPE but got characters',
|
||||
self::UNEXPECTED_END_TAG => 'Unexpected end tag',
|
||||
|
||||
self::ENCODING_ERROR => 'Corrupt encoding near byte position %s',
|
||||
self::UNEXPECTED_NULL_CHARACTER => 'Unexpected null character',
|
||||
self::UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME => 'Unexpected "?" character instead of tag name',
|
||||
|
@ -108,55 +119,26 @@ class ParseError {
|
|||
];
|
||||
|
||||
const REPORT_OFFSETS = [
|
||||
self::ENCODING_ERROR => 0,
|
||||
self::UNEXPECTED_NULL_CHARACTER => -1,
|
||||
self::UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME => 0,
|
||||
self::EOF_BEFORE_TAG_NAME => 0,
|
||||
self::INVALID_FIRST_CHARACTER_OF_TAG_NAME => 0,
|
||||
self::MISSING_END_TAG_NAME => -1,
|
||||
self::EOF_IN_TAG => 0,
|
||||
self::EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT => 0,
|
||||
self::UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME => -1,
|
||||
self::DUPLICATE_ATTRIBUTE => -1,
|
||||
self::UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME => -1,
|
||||
self::MISSING_ATTRIBUTE_VALUE => -1,
|
||||
self::UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE => -1,
|
||||
self::MISSING_WHITESPACE_BETWEEN_ATTRIBUTES => 0,
|
||||
self::UNEXPECTED_SOLIDUS_IN_TAG => 0,
|
||||
self::CDATA_IN_HTML_CONTENT => -1,
|
||||
self::INCORRECTLY_OPENED_COMMENT => 0,
|
||||
self::ABRUPT_CLOSING_OF_EMPTY_COMMENT => -1,
|
||||
self::EOF_IN_COMMENT => 0,
|
||||
self::NESTED_COMMENT => 0,
|
||||
self::INCORRECTLY_CLOSED_COMMENT => -1,
|
||||
self::EOF_IN_DOCTYPE => 0,
|
||||
self::MISSING_WHITESPACE_BEFORE_DOCTYPE_NAME => 0,
|
||||
self::MISSING_DOCTYPE_NAME => -1,
|
||||
self::INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME => 0,
|
||||
self::MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD => -1,
|
||||
self::MISSING_DOCTYPE_PUBLIC_IDENTIFIER => -1,
|
||||
self::MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER => 0,
|
||||
self::ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER => -1,
|
||||
self::MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS => -1,
|
||||
self::MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD => -1,
|
||||
self::MISSING_DOCTYPE_SYSTEM_IDENTIFIER => -1,
|
||||
self::MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER => 0,
|
||||
self::ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER => -1,
|
||||
self::UNEXPECTED_CHARACTER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER => 0,
|
||||
self::EOF_IN_CDATA => 0,
|
||||
self::END_TAG_WITH_ATTRIBUTES => -1,
|
||||
self::END_TAG_WITH_TRAILING_SOLIDUS => -1,
|
||||
self::MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE => 0,
|
||||
self::UNKNOWN_NAMED_CHARACTER_REFERENCE => 0,
|
||||
self::ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE => 0,
|
||||
self::NULL_CHARACTER_REFERENCE => 0,
|
||||
self::CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE => 0,
|
||||
self::SURROGATE_CHARACTER_REFERENCE => 0,
|
||||
self::NONCHARACTER_CHARACTER_REFERENCE => 0,
|
||||
self::CONTROL_CHARACTER_REFERENCE => 0,
|
||||
self::SURROGATE_IN_INPUT_STREAM => 0,
|
||||
self::NONCHARACTER_IN_INPUT_STREAM => 0,
|
||||
self::CONTROL_CHARACTER_IN_INPUT_STREAM => 0,
|
||||
];
|
||||
|
||||
public function setHandler() {
|
||||
|
|
|
@ -10,7 +10,7 @@ trait ParseErrorEmitter {
|
|||
$data = ($this instanceof Data) ? $this : ($this->data ?? null);
|
||||
assert($data instanceof Data);
|
||||
assert($this->errorHandler instanceof ParseError);
|
||||
list($line, $column) = $data->whereIs(ParseError::REPORT_OFFSETS[$code]);
|
||||
list($line, $column) = $data->whereIs(ParseError::REPORT_OFFSETS[$code] ?? 0);
|
||||
return $this->errorHandler->emit($data->filePath, $line, $column, $code, ...$arg);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -109,12 +109,6 @@ class TreeBuilder {
|
|||
|
||||
$this->insertionMode = self::INITIAL_MODE;
|
||||
$this->quirksMode = self::QUIRKS_MODE_OFF;
|
||||
|
||||
static::$instance = $this;
|
||||
}
|
||||
|
||||
public function __destruct() {
|
||||
static::$instance = null;
|
||||
}
|
||||
|
||||
public function emitToken(Token $token) {
|
||||
|
@ -388,14 +382,16 @@ class TreeBuilder {
|
|||
# set the Document to quirks mode.
|
||||
// DEVIATION: There is no iframe srcdoc document because there are no nested
|
||||
// browsing contexts in this implementation.
|
||||
switch (get_class($token)) {
|
||||
case 'StartTagToken': $this->error(ParseError::UNEXPECTED_START_TAG, $token->name);
|
||||
break;
|
||||
case 'EndTagToken': $this->error(ParseError::UNEXPECTED_END_TAG, $token->name);
|
||||
break;
|
||||
case 'EOFToken': $this->error(ParseError::UNEXPECTED_EOF);
|
||||
break;
|
||||
default: throw new Exception(Exception::UNKNOWN_ERROR);
|
||||
if ($token instanceof StartTagToken) {
|
||||
$this->error(ParseError::EXPECTED_DOCTYPE_BUT_GOT_START_TAG);
|
||||
} elseif ($token instanceof EndTagToken) {
|
||||
$this->error(ParseError::EXPECTED_DOCTYPE_BUT_GOT_END_TAG);
|
||||
} elseif ($token instanceof CharacterToken) {
|
||||
$this->error(ParseError::EXPECTED_DOCTYPE_BUT_GOT_CHARS);
|
||||
} elseif ($token instanceof EOFToken) {
|
||||
$this->error(ParseError::UNEXPECTED_EOF);
|
||||
} else {
|
||||
throw new \Exception("Unexpected token type".get_class($token));
|
||||
}
|
||||
|
||||
$this->quirksMode = self::QUIRKS_MODE_ON;
|
||||
|
@ -431,7 +427,7 @@ class TreeBuilder {
|
|||
# Create an element for the token in the HTML namespace, with the Document as
|
||||
# the intended parent. Append it to the Document object. Put this element in the
|
||||
# stack of open elements.
|
||||
$element = static::insertStartTagToken($token, $this->DOM);
|
||||
$element = $this->insertStartTagToken($token, $this->DOM);
|
||||
|
||||
# Switch the insertion mode to "before head".
|
||||
$this->insertionMode = self::BEFORE_HEAD_MODE;
|
||||
|
@ -490,7 +486,7 @@ class TreeBuilder {
|
|||
# A start tag whose tag name is "head"
|
||||
elseif ($token->name === 'head') {
|
||||
# Insert an HTML element for the token.
|
||||
$element = static::insertStartTagToken($token);
|
||||
$element = $this->insertStartTagToken($token);
|
||||
# Set the head element pointer to the newly created head element.
|
||||
$this->headElement = $element;
|
||||
|
||||
|
@ -507,7 +503,7 @@ class TreeBuilder {
|
|||
# Anything else
|
||||
else {
|
||||
# Insert an HTML element for a "head" start tag token with no attributes.
|
||||
$element = static::insertStartTagToken(new StartTagToken('head'));
|
||||
$element = $this->insertStartTagToken(new StartTagToken('head'));
|
||||
# Set the head element pointer to the newly created head element.
|
||||
$this->headElement = $element;
|
||||
|
||||
|
@ -551,7 +547,7 @@ class TreeBuilder {
|
|||
elseif ($token->name === 'base' || $token->name === 'basefont' || $token->name === 'bgsound' || $token->name === 'link') {
|
||||
# Insert an HTML element for the token. Immediately pop the current node off the
|
||||
# stack of open elements.
|
||||
static::insertStartTagToken($token);
|
||||
$this->insertStartTagToken($token);
|
||||
$this->stack->pop();
|
||||
|
||||
# Acknowledge the token’s *self-closing flag*, if it is set.
|
||||
|
@ -561,7 +557,7 @@ class TreeBuilder {
|
|||
elseif ($token->name === 'meta') {
|
||||
# Insert an HTML element for the token. Immediately pop the current node off the
|
||||
# stack of open elements.
|
||||
static::insertStartTagToken($token);
|
||||
$this->insertStartTagToken($token);
|
||||
$this->stack->pop();
|
||||
|
||||
# Acknowledge the token’s *self-closing flag*, if it is set.
|
||||
|
@ -597,7 +593,7 @@ class TreeBuilder {
|
|||
// flag is always disabled.
|
||||
elseif ($token->name === 'noscript') {
|
||||
# Insert an HTML element for the token.
|
||||
static::insertStartTagToken($token);
|
||||
$this->insertStartTagToken($token);
|
||||
# Switch the insertion mode to "in head noscript".
|
||||
$this->insertionMode = self::IN_HEAD_NOSCRIPT_MODE;
|
||||
}
|
||||
|
@ -615,7 +611,7 @@ class TreeBuilder {
|
|||
// intended parent isn't used when determining anything;
|
||||
// Parser::createAndInsertElement will get the adjusted insertion location
|
||||
// anyway.
|
||||
static::insertStartTagToken($token);
|
||||
$this->insertStartTagToken($token);
|
||||
|
||||
# 3. Mark the element as being "parser-inserted" and unset the element’s
|
||||
# "non-blocking" flag.
|
||||
|
@ -637,7 +633,7 @@ class TreeBuilder {
|
|||
# A start tag whose tag name is "template"
|
||||
elseif ($token->name === 'template') {
|
||||
# Insert an HTML element for the token.
|
||||
static::insertStartTagToken($token);
|
||||
$this->insertStartTagToken($token);
|
||||
# Insert a marker at the end of the list of active formatting elements.
|
||||
$this->activeFormattingElementsList->insertMarker();
|
||||
# Set the frameset-ok flag to "not ok".
|
||||
|
@ -703,7 +699,7 @@ class TreeBuilder {
|
|||
|
||||
# 2. If the current node is not a template element, then this is a parse error.
|
||||
if ($this->stack->currentNodeName !== 'template') {
|
||||
$this->error(ParseError::UNEXPECTED_END_TAG, 'template');
|
||||
$this->error(ParseError::UNEXPECTED_END_TAG);
|
||||
}
|
||||
|
||||
# 3. Pop elements from the stack of open elements until a template element has been popped from the stack.
|
||||
|
@ -722,7 +718,7 @@ class TreeBuilder {
|
|||
# Any other end tag
|
||||
else {
|
||||
# Parse error.
|
||||
$this->error(ParseError::UNEXPECTED_END_TAG, $token->name);
|
||||
$this->error(ParseError::UNEXPECTED_END_TAG);
|
||||
}
|
||||
}
|
||||
# Anything else
|
||||
|
@ -1326,7 +1322,7 @@ class TreeBuilder {
|
|||
}
|
||||
|
||||
# Switch the insertion mode to "after body".
|
||||
self::$insertionMode = self::AFTER_BODY_MODE;
|
||||
$this->insertionMode = self::AFTER_BODY_MODE;
|
||||
|
||||
// The only thing different between body and html here is that when processing
|
||||
// an html end tag the token is reprocessed.
|
||||
|
@ -1789,7 +1785,7 @@ class TreeBuilder {
|
|||
|
||||
# Insert a foreign element for the token, in the same namespace as the adjusted
|
||||
# current node.
|
||||
static::insertStartTagToken($token, null, $this->stack->adjustedCurrentNode->namespaceURI);
|
||||
$this->insertStartTagToken($token, null, $this->stack->adjustedCurrentNode->namespaceURI);
|
||||
|
||||
# If the token has its self-closing flag set, then run the appropriate steps
|
||||
# from the following list:
|
||||
|
@ -1944,7 +1940,7 @@ class TreeBuilder {
|
|||
];
|
||||
}
|
||||
|
||||
public static function insertCharacterToken(CharacterToken $token) {
|
||||
public function insertCharacterToken(CharacterToken $token) {
|
||||
# 1. Let data be the characters passed to the algorithm, or, if no characters
|
||||
# were explicitly specified, the character of the character token being
|
||||
# processed.
|
||||
|
@ -1952,7 +1948,7 @@ class TreeBuilder {
|
|||
|
||||
# 2. Let the adjusted insertion location be the appropriate place for inserting
|
||||
# a node.
|
||||
$location = static::$instance->appropriatePlaceForInsertingNode();
|
||||
$location = $this->appropriatePlaceForInsertingNode();
|
||||
$adjustedInsertionLocation = $location['node'];
|
||||
$insertBefore = $location['insert before'];
|
||||
|
||||
|
@ -1998,7 +1994,7 @@ class TreeBuilder {
|
|||
$adjustedInsertionLocation = $position;
|
||||
$insertBefore = false;
|
||||
} else {
|
||||
$location = static::$instance->appropriatePlaceForInsertingNode();
|
||||
$location = $this->appropriatePlaceForInsertingNode();
|
||||
$adjustedInsertionLocation = $location['node'];
|
||||
$insertBefore = $location['insert before'];
|
||||
}
|
||||
|
@ -2016,7 +2012,7 @@ class TreeBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
public static function insertStartTagToken(StartTagToken $token, \DOMNode $intendedParent = null, string $namespace = null): Element {
|
||||
public function insertStartTagToken(StartTagToken $token, \DOMNode $intendedParent = null, string $namespace = null): Element {
|
||||
if (!is_null($namespace)) {
|
||||
$namespace = $token->namespace;
|
||||
}
|
||||
|
@ -2042,9 +2038,9 @@ class TreeBuilder {
|
|||
// DEVIATION: There is no point to setting the synchronous custom elements flag
|
||||
// and custom element definition; there is no scripting in this implementation.
|
||||
if ($namespace === Parser::HTML_NAMESPACE) {
|
||||
$element = static::$instance->DOM->createElement($token->name);
|
||||
$element = $this->DOM->createElement($token->name);
|
||||
} else {
|
||||
$element = static::$instance->DOM->createElementNS($namespace, $token->name);
|
||||
$element = $this->DOM->createElementNS($namespace, $token->name);
|
||||
}
|
||||
|
||||
# 8. Append each attribute in the given token to element.
|
||||
|
@ -2108,7 +2104,7 @@ class TreeBuilder {
|
|||
|
||||
# 1. Let the adjusted insertion location be the appropriate place for inserting
|
||||
# a node.
|
||||
$location = static::$instance->appropriatePlaceForInsertingNode($intendedParent);
|
||||
$location = $this->appropriatePlaceForInsertingNode($intendedParent);
|
||||
|
||||
$adjustedInsertionLocation = $location['node'];
|
||||
$insertBefore = $location['insert before'];
|
||||
|
@ -2136,7 +2132,7 @@ class TreeBuilder {
|
|||
// DEVIATION: Unnecessary because there is no scripting in this implementation.
|
||||
|
||||
# 4. Push element onto the stack of open elements so that it is the new current node.
|
||||
static::$instance->stack[] = $element;
|
||||
$this->stack[] = $element;
|
||||
|
||||
# Return element.
|
||||
return $element;
|
||||
|
@ -2148,7 +2144,7 @@ class TreeBuilder {
|
|||
# invoked in response to a start tag token.
|
||||
|
||||
# 1. Insert an HTML element for the token.
|
||||
static::insertStartTagToken($token);
|
||||
$this->insertStartTagToken($token);
|
||||
|
||||
# 2. If the algorithm that was invoked is the generic raw text element parsing
|
||||
# algorithm, switch the tokenizer to the RAWTEXT state; otherwise the algorithm
|
||||
|
|
Loading…
Reference in a new issue