Browse Source

More cleaning up of exceptions

split-manual
Dustin Wilson 3 years ago
parent
commit
02add5633a
  1. 16
      lib/ActiveFormattingElementsList.php
  2. 22
      lib/Exception.php
  3. 4
      lib/Stack.php
  4. 90
      lib/Tokenizer.php
  5. 410
      lib/TreeBuilder.php

16
lib/ActiveFormattingElementsList.php

@ -21,13 +21,13 @@ class ActiveFormattingElementsList extends Stack {
$count = $this->count;
assert($offset >= 0 && $offset <= $count, new Exception(Exception::STACK_INVALID_INDEX, $offset));
assert($value instanceof ActiveFormattingElementsMarker || (
is_array($value)
&& count($value) === 2
&& isset($value['token'])
is_array($value)
&& count($value) === 2
&& isset($value['token'])
&& isset($value['element'])
&& $value['token'] instanceof StartTagToken
&& $value['element'] instanceof Element
), new \Exception("Active formatting element value is invalid"));
), new Exception(Exception::STACK_INVALID_VALUE));
if ($value instanceof ActiveFormattingElementsMarker) {
$this->_storage[$offset ?? $count] = $value;
} elseif ($count && ($offset ?? $count) === $count) {
@ -51,7 +51,7 @@ class ActiveFormattingElementsList extends Stack {
if ($pos > $lastMarker) {
do {
$matches += (int) $this->matchElement($value['element'], $this->_storage[$pos]['element']);
// Stop once there are three matches or the marker is reached
// Stop once there are three matches or the marker is reached
} while ($matches < 3 && (--$pos) > $lastMarker);
}
if ($matches === 3) {
@ -78,8 +78,8 @@ class ActiveFormattingElementsList extends Stack {
# attributes in each pair have identical names, namespaces, and values (the
# order of the attributes does not matter).
if (
$a->nodeName !== $b->nodeName
|| $a->namespaceURI !== $b->namespaceURI
$a->nodeName !== $b->nodeName
|| $a->namespaceURI !== $b->namespaceURI
|| $a->attributes->length !== $b->attributes->length
) {
return false;
@ -93,7 +93,7 @@ class ActiveFormattingElementsList extends Stack {
}
public function insert(StartTagToken $token, Element $element, ?int $at = null): void {
assert($at === null || ($at >= 0 && $at <= $this->count), new \Exception("Invalid list index $at (max ".$this->count.")"));
assert($at === null || ($at >= 0 && $at <= $this->count), new Exception(Exception::STACK_INVALID_INDEX, $at));
if ($at === null) {
$this[] = [
'token' => $token,

22
lib/Exception.php

@ -6,6 +6,7 @@ class Exception extends \Exception {
const INVALID_CODE = 10000;
const UNKNOWN_ERROR = 10001;
const INCORRECT_PARAMETERS_FOR_MESSAGE = 10002;
const UNREACHABLE_CODE = 10003;
const PARSER_NONEMPTY_DOCUMENT = 10101;
@ -16,41 +17,54 @@ class Exception extends \Exception {
const STACK_INCORRECTLY_EMPTY = 10205;
const STACK_INVALID_STATE = 10206;
const STACK_NO_CONTEXT_EXISTS = 10207;
const STACK_INVALID_VALUE = 10208;
const STACK_INVALID_OFFSET = 10209;
const STACK_ROOT_ELEMENT_DELETE = 10210;
const DATA_NODATA = 10301;
const DATA_INVALID_DATA_CONSUMPTION_LENGTH = 10302;
const TOKENIZER_INVALID_STATE = 10401;
const TOKENIZER_INVALID_CHARACTER_REFERENCE_STATE = 10402;
const TREEBUILDER_FORMELEMENT_EXPECTED = 10501;
const TREEBUILDER_DOCUMENTFRAG_ELEMENT_DOCUMENT_DOCUMENTFRAG_EXPECTED = 10502;
const TREEBUILDER_UNEXPECTED_END_OF_FILE = 10503;
const TREEBUILDER_NON_EMPTY_TARGET_DOCUMENT = 10504;
const TREEBUILDER_INVALID_TOKEN_CLASS = 10505;
const TREEBUILDER_INVALID_INSERTION_LOCATION = 10506;
protected static $messages = [
10000 => 'Invalid error code',
10001 => 'Unknown error; escaping',
10002 => 'Incorrect number of parameters for Exception message; %s expected',
10003 => 'Unreachable code',
10101 => 'Non-empty Document supplied as argument for Parser',
10201 => '%s is an invalid Stack index',
10201 => 'Invalid Stack index at %s',
10202 => 'Element, Document, or DOMDocumentFragment expected for fragment context',
10203 => 'Element, string, or array expected',
10204 => 'String or array expected',
10205 => 'Stack is incorrectly empty',
10206 => 'Stack is in an invalid state; dump: %s',
10207 => 'No %s context exists in stack',
10208 => 'Stack value is invalid',
10209 => 'Invalid stack offset; offset must be %s',
10210 => 'Root element cannot be deleted from the stack',
10301 => 'Data string expected; found %s',
10302 => '%s is an invalid data consumption length; a value of 1 or above is expected',
10401 => 'The Tokenizer has entered an invalid state',
10401 => 'The Tokenizer has entered an invalid state: %s',
10402 => 'Invalid character reference consumption state: %s',
10501 => 'Form element expected, found %s',
10502 => 'Element, Document, or DOMDocumentFragment expected; found %s',
10503 => 'Unexpected end of file',
10601 => 'Method %1$s::%2$s has been disabled from %1$s'
10504 => 'Target document is not empty',
10505 => 'Invalid token class: %s',
10506 => 'Invalid insertion location'
];
public function __construct(int $code, ...$args) {

4
lib/Stack.php

@ -31,7 +31,7 @@ abstract class Stack implements \ArrayAccess, \Countable, \IteratorAggregate {
assert($offset >= 0 && $offset < count($this->_storage), new Exception(Exception::STACK_INVALID_INDEX, $offset));
return $this->_storage[$offset];
}
public function count(): int {
return $this->count;
}
@ -52,7 +52,7 @@ abstract class Stack implements \ArrayAccess, \Countable, \IteratorAggregate {
}
public function top(int $offset = 0) {
assert($offset >= 0, new \Exception("Offset must be at least 0"));
assert($offset >= 0, new Exception(Exception::STACK_INVALID_OFFSET, '<= 0'));
return ($c = $this->count) > $offset ? $this->_storage[$c - ($offset + 1)] : null;
}
}

90
lib/Tokenizer.php

@ -180,11 +180,11 @@ class Tokenizer {
];
const ATTRIBUTE_VALUE_STATE_SET = [
# A character reference is said to be consumed as part of an attribute
# if the return state is either attribute value (double-quoted) state,
# A character reference is said to be consumed as part of an attribute
# if the return state is either attribute value (double-quoted) state,
# attribute value (single-quoted) state or attribute value (unquoted) state.
self::ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE,
self::ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE,
self::ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE,
self::ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE,
self::ATTRIBUTE_VALUE_UNQUOTED_STATE
];
@ -608,7 +608,7 @@ class Tokenizer {
# U+0000 NULL
elseif ($char === "\0") {
# This is an unexpected-null-character parse error.
# Append a U+FFFD REPLACEMENT CHARACTER character to
# Append a U+FFFD REPLACEMENT CHARACTER character to
# the current tag token's tag name.
$this->error(ParseError::UNEXPECTED_NULL_CHARACTER);
$token->name .= "\u{FFFD}";
@ -3343,14 +3343,14 @@ class Tokenizer {
# Not a valid state, unimplemented, or implemented elsewhere
else {
throw new \Exception("Unimplemented state: ".(self::STATE_NAMES[$this->state] ?? $this->state)); // @codeCoverageIgnore
throw new Exception(Exception::TOKENIZER_INVALID_STATE, (self::STATE_NAMES[$this->state] ?? $this->state)); // @codeCoverageIgnore
}
}
}
protected function switchToCharacterReferenceState(int $returnState): string {
// This function implements states 72 through 80,
// "Character reference" through "Numeric character reference end" states
// This function implements states 72 through 80,
// "Character reference" through "Numeric character reference end" states
$this->state = self::CHARACTER_REFERENCE_STATE;
while (true) {
@ -3394,13 +3394,13 @@ class Tokenizer {
# 13.2.5.73 Named character reference state
elseif ($this->state === self::NAMED_CHARACTER_REFERENCE_STATE) {
# Consume the maximum number of characters possible,
# with the consumed characters matching one of the
# identifiers in the first column of the named character
# Consume the maximum number of characters possible,
# with the consumed characters matching one of the
# identifiers in the first column of the named character
# references table (in a case-sensitive manner).
// DEVIATION:
// We consume all possible alphanumeric characters,
// We consume all possible alphanumeric characters,
// up to the length of the longest in the table
$candidate = $this->data->consumeWhile(self::CTYPE_ALNUM, CharacterReference::LONGEST_NAME);
// Keep a record of the terminating character, which is used later
@ -3421,33 +3421,33 @@ class Tokenizer {
$match = CharacterReference::NAMES[$match];
}
}
# Append each character to the temporary buffer when it's consumed.
$this->temporaryBuffer .= $candidate;
# If there is a match
if (!is_null($match)) {
# If the character reference was consumed as part of an attribute,
# and the last character matched is not a U+003B SEMICOLON character (;),
# If the character reference was consumed as part of an attribute,
# and the last character matched is not a U+003B SEMICOLON character (;),
# and the next input character is either a U+003D EQUALS SIGN character (=)
# or an ASCII alphanumeric...
if (in_array($returnState, self::ATTRIBUTE_VALUE_STATE_SET) && $next !== ';' && ($next === '=' || ctype_alnum($next))) {
# ... then, for historical reasons, flush code points consumed
# ... then, for historical reasons, flush code points consumed
# as a character reference and switch to the return state.
$this->state = $returnState;
return $this->temporaryBuffer;
}
}
# Otherwise:
else {
# If the last character matched is not a U+003B SEMICOLON character (;),
# If the last character matched is not a U+003B SEMICOLON character (;),
# then this is a missing-semicolon-after-character-reference parse error.
if ($next !== ';') {
$this->error(ParseError::MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE);
}
# Set the temporary buffer to the empty string.
# Append one or two characters corresponding to the
# character reference name (as given by the second
# column of the named character references table)
# Set the temporary buffer to the empty string.
# Append one or two characters corresponding to the
# character reference name (as given by the second
# column of the named character references table)
# to the temporary buffer.
# Flush code points consumed as a character reference.
# Switch to the return state.
@ -3459,7 +3459,7 @@ class Tokenizer {
}
# Otherwise:
else {
# Flush code points consumed as a character reference.
# Flush code points consumed as a character reference.
# Switch to the ambiguous ampersand state.
// DEVIATION: We flush only when switching to the return state
@ -3479,7 +3479,7 @@ class Tokenizer {
# ASCII alphanumeric
if (ctype_alnum($char)) {
# If the character reference was consumed as part of an attribute,
# If the character reference was consumed as part of an attribute,
# then append the current input character to the current attribute's value.
# Otherwise, emit the current input character as a character token.
@ -3510,7 +3510,7 @@ class Tokenizer {
$charRefCode = 0;
# Consume the next input character.
$char = $this->data->consume();
# U+0078 LATIN SMALL LETTER X
#U+0058 LATIN CAPITAL LETTER X
if ($char === 'x' || $char === 'X') {
@ -3531,7 +3531,7 @@ class Tokenizer {
elseif ($this->state === self::HEXADECIMAL_CHARACTER_REFERENCE_START_STATE) {
# Consume the next input character.
$char = $this->data->consume();
# ASCII hex digit
if (ctype_xdigit($char)) {
# Reconsume in the hexadecimal character reference state.
@ -3557,7 +3557,7 @@ class Tokenizer {
elseif ($this->state === self::DECIMAL_CHARACTER_REFERENCE_START_STATE) {
# Consume the next input character.
$char = $this->data->consume();
# ASCII digit
if (ctype_digit($char)) {
# Reconsume in the decimal character reference state.
@ -3583,17 +3583,17 @@ class Tokenizer {
elseif ($this->state === self::HEXADECIMAL_CHARACTER_REFERENCE_STATE) {
# Consume the next input character.
$char = $this->data->consume();
# ASCII digit
# ASCII upper hex digit
# ASCII lower hex digit
if (ctype_xdigit($char)) {
# Multiply the character reference code by 16.
# Add a numeric version of the current input
# Multiply the character reference code by 16.
# Add a numeric version of the current input
# character to the character reference code.
// OPTIMIZATION: Combine all digit types
// NOTE: This branch should never be reached
// NOTE: This branch should never be reached
$charRefCode = ($charRefCode * 16) + hexdec($char); // @codeCoverageIgnore
}
# U+003B SEMICOLON
@ -3615,15 +3615,15 @@ class Tokenizer {
elseif ($this->state === self::DECIMAL_CHARACTER_REFERENCE_STATE) {
# Consume the next input character.
$char = $this->data->consume();
# ASCII digit
if (ctype_digit($char)) {
# Multiply the character reference code by 10.
# Add a numeric version of the current input
# Multiply the character reference code by 10.
# Add a numeric version of the current input
# character to the character reference code.
// OPTIMIZATION: Combine all digit types
// NOTE: This branch should never be reached
// NOTE: This branch should never be reached
$charRefCode = ($charRefCode * 10) + ((int) ($char)); // @codeCoverageIgnore
}
# U+003B SEMICOLON
@ -3651,29 +3651,29 @@ class Tokenizer {
$this->error(ParseError::NULL_CHARACTER_REFERENCE);
$charRefCode = 0xFFFD;
}
# If the number is greater than 0x10FFFF, then this is a
# If the number is greater than 0x10FFFF, then this is a
# character-reference-outside-unicode-range parse error.
# Set the character reference code to 0xFFFD.
elseif ($charRefCode > 0x10FFFF) {
$this->error(ParseError::CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE);
$charRefCode = 0xFFFD;
}
# If the number is a surrogate, then this is a
# If the number is a surrogate, then this is a
# surrogate-character-reference parse error.
# Set the character reference code to 0xFFFD.
elseif ($charRefCode >= 0xD800 && $charRefCode <= 0xDFFF) {
$this->error(ParseError::SURROGATE_CHARACTER_REFERENCE);
$charRefCode = 0xFFFD;
}
# If the number is a noncharacter, then this is a
# If the number is a noncharacter, then this is a
# noncharacter-character-reference parse error.
elseif (($charRefCode >= 0xFDD0 && $charRefCode <= 0xFDEF) || ($charRefCode % 0x10000 & 0xFFFE) === 0xFFFE) {
$this->error(ParseError::NONCHARACTER_CHARACTER_REFERENCE);
}
# If the number is 0x0D, or a control that's not ASCII whitespace, then
# this is a control-character-reference parse error.
# If the number is one of the numbers in the first column of the following
# table, then find the row with that number in the first column, and set
# If the number is 0x0D, or a control that's not ASCII whitespace, then
# this is a control-character-reference parse error.
# If the number is one of the numbers in the first column of the following
# table, then find the row with that number in the first column, and set
# the character reference code to the number in the second column of that row.
elseif (($charRefCode < 0x20 && !in_array($charRefCode, [0x9, 0xA, 0xC])) || ($charRefCode >= 0x7F && $charRefCode <= 0x9F)) {
// NOTE: Table elided
@ -3687,8 +3687,8 @@ class Tokenizer {
# Not a valid state, unimplemented, or implemented elsewhere
else {
throw new \Exception("Unimplemented character reference consumption state: ".(self::STATE_NAMES[$this->state] ?? $this->state)); // @codeCoverageIgnore
throw new Exception(Exception::TOKENIZER_INVALID_CHARACTER_REFERENCE_STATE, (self::STATE_NAMES[$this->state] ?? $this->state)); // @codeCoverageIgnore
}
}
}
}
}

410
lib/TreeBuilder.php

@ -206,11 +206,11 @@ class TreeBuilder {
# keygen, li, link, listing, main, marquee, menu, meta, nav, noembed, noframes,
# noscript, object, ol, p, param, plaintext, pre, script, section, select,
# source, style, summary, table, tbody, td, template, textarea, tfoot, th,
# thead, title, tr, track, ul, wbr, xmp; MathML mi, MathML mo, MathML mn,
# thead, title, tr, track, ul, wbr, xmp; MathML mi, MathML mo, MathML mn,
# MathML ms, MathML mtext, and MathML annotation-xml; and SVG foreignObject,
# SVG desc, and SVG title.
# SVG desc, and SVG title.
protected const SPECIAL_ELEMENTS = [
Parser::HTML_NAMESPACE => ['address', 'applet', 'area', 'article', 'aside', 'base', 'basefont', 'bgsound', 'blockquote', 'body', 'br', 'button', 'caption', 'center', 'col', 'colgroup', 'dd', 'details', 'dir', 'div', 'dl', 'dt', 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr', 'html', 'iframe', 'img', 'input', 'keygen', 'li', 'link', 'listing', 'main', 'marquee', 'menu', 'meta', 'nav', 'noembed', 'noframes', 'noscript', 'object', 'ol', 'p', 'param', 'plaintext', 'pre', 'script', 'section', 'select', 'source', 'style', 'summary', 'table', 'tbody', 'td', 'template', 'textarea', 'tfoot', 'th', 'thead', 'title', 'tr', 'track', 'ul', 'wbr', 'xmp'],
Parser::HTML_NAMESPACE => ['address', 'applet', 'area', 'article', 'aside', 'base', 'basefont', 'bgsound', 'blockquote', 'body', 'br', 'button', 'caption', 'center', 'col', 'colgroup', 'dd', 'details', 'dir', 'div', 'dl', 'dt', 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr', 'html', 'iframe', 'img', 'input', 'keygen', 'li', 'link', 'listing', 'main', 'marquee', 'menu', 'meta', 'nav', 'noembed', 'noframes', 'noscript', 'object', 'ol', 'p', 'param', 'plaintext', 'pre', 'script', 'section', 'select', 'source', 'style', 'summary', 'table', 'tbody', 'td', 'template', 'textarea', 'tfoot', 'th', 'thead', 'title', 'tr', 'track', 'ul', 'wbr', 'xmp'],
Parser::MATHML_NAMESPACE => ['mi', 'mo', 'mn', 'ms', 'mtext', 'annotation-xml'],
Parser::SVG_NAMESPACE => ['foreignObject', 'desc', 'title'],
];
@ -230,7 +230,7 @@ class TreeBuilder {
];
public function __construct(Document $dom, Data $data, Tokenizer $tokenizer, \Generator $tokenList, ParseError $errorHandler, OpenElementsStack $stack, TemplateInsertionModesStack $templateInsertionModes, ?\DOMElement $fragmentContext = null) {
assert(!$dom->hasChildNodes() && !$dom->doctype, new \Exception("Target document is not empty"));
assert(!$dom->hasChildNodes() && !$dom->doctype, new Exception(Exception::TREEBUILDER_NON_EMPTY_TARGET_DOCUMENT));
$this->DOM = $dom;
$this->fragmentContext = $fragmentContext;
$this->stack = $stack;
@ -270,7 +270,7 @@ class TreeBuilder {
# Reset the parser's insertion mode appropriately.
$this->resetInsertionMode();
# Set the parser's form element pointer to the nearest node to the context element
# that is a form element (going straight up the ancestor chain, and including the
# that is a form element (going straight up the ancestor chain, and including the
# element itself, if it is a form element), if any. (If there is no such form element,
# the form element pointer keeps its initial value, null.)
$node = $fragmentContext;
@ -280,7 +280,7 @@ class TreeBuilder {
break;
}
} while ($node = $node->parentNode);
# Place the input into the input stream for the HTML parser just created.
# Place the input into the input stream for the HTML parser just created.
# The encoding confidence is irrelevant.
// Already done.
# Start the parser and let it run until it has consumed all the characters just inserted into the input stream.
@ -294,7 +294,7 @@ class TreeBuilder {
$this->debugLog .= "EMITTED: ".constant(get_class($token)."::NAME")."\n";
return true;
})());
assert($token instanceof CharacterToken || $token instanceof CommentToken || $token instanceof TagToken || $token instanceof DOCTYPEToken || $token instanceof EOFToken, new \Exception("Unknown token class ".get_class($token)));
assert($token instanceof CharacterToken || $token instanceof CommentToken || $token instanceof TagToken || $token instanceof DOCTYPEToken || $token instanceof EOFToken, new Exception(Exception::TREEBUILDER_INVALID_TOKEN_CLASS, get_class($token)));
$iterations = 0;
$insertionMode = $this->insertionMode;
@ -307,7 +307,7 @@ class TreeBuilder {
# 13.2.6 Tree construction
#
# As each token is emitted from the tokenizer, the user agent must follow the
# appropriate steps from the following list, known as the tree construction dispatcher:
# appropriate steps from the following list, known as the tree construction dispatcher:
if (
# If the stack of open elements is empty
!$this->stack->currentNode
@ -337,7 +337,7 @@ class TreeBuilder {
# corresponding to the current insertion mode in HTML content.
ProcessToken:
assert($iterations++ < 50, new LoopException("Probable infinite loop detected in HTML content handling (inner reprocessing)"));
assert((function() use ($insertionMode) {
$mode = self::INSERTION_MODE_NAMES[$insertionMode] ?? $insertionMode;
$this->debugLog .= " Mode: $mode (".(string) $this->stack.")\n";
@ -372,7 +372,7 @@ class TreeBuilder {
# A DOCTYPE token
elseif ($token instanceof DOCTYPEToken) {
# If the DOCTYPE token's name is not "html", or the token's public identifier is
# not missing, or the token's system identifier is neither missing nor
# not missing, or the token's system identifier is neither missing nor
# "about:legacy-compat", then there is a parse error.
if ($token->name !== 'html' || $token->public !== null || !($token->system === null || $token->system === 'about:legacy-compat')) {
$this->error(ParseError::UNKNOWN_DOCTYPE);
@ -389,7 +389,7 @@ class TreeBuilder {
# it is returned as the value of the doctype attribute of the Document object.
$this->DOM->appendChild($this->DOM->implementation->createDocumentType($token->name ?? ' ', $token->public ?? '', $token->system ?? ''));
# Then, if the document is not an iframe srcdoc document, and the DOCTYPE token
# matches one of the conditions in the following list, then set the Document to
# quirks mode:
@ -397,69 +397,69 @@ class TreeBuilder {
// browsing contexts to consider.
$public = strtolower($token->public ?? '');
$system = strtolower($token->system ?? '');
if ($token->forceQuirks === true
|| $token->name !== 'html'
|| $public === '-//w3o//dtd w3 html strict 3.0//en//'
|| $public === '-/w3c/dtd html 4.0 transitional/en'
|| $public === 'html'
|| $system === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd'
|| strpos($public, '+//silmaril//dtd html pro v0r11 19970101//') === 0
|| strpos($public, '-//as//dtd html 3.0 aswedit + extensions//') === 0
|| strpos($public, '+//silmaril//dtd html pro v0r11 19970101//') === 0
|| strpos($public, '-//as//dtd html 3.0 aswedit + extensions//') === 0
|| strpos($public, '-//advasoft ltd//dtd html 3.0 aswedit + extensions//') === 0
|| strpos($public, '-//ietf//dtd html 2.0 level 1//') === 0
|| strpos($public, '-//ietf//dtd html 2.0 level 2//') === 0
|| strpos($public, '-//ietf//dtd html 2.0 strict level 1//') === 0
|| strpos($public, '-//ietf//dtd html 2.0 strict level 2//') === 0
|| strpos($public, '-//ietf//dtd html 2.0 strict//') === 0
|| strpos($public, '-//ietf//dtd html 2.0//') === 0
|| strpos($public, '-//ietf//dtd html 2.1e//') === 0
|| strpos($public, '-//ietf//dtd html 3.0//') === 0
|| strpos($public, '-//ietf//dtd html 3.2 final//') === 0
|| strpos($public, '-//ietf//dtd html 3.2//') === 0
|| strpos($public, '-//ietf//dtd html 3//') === 0
|| strpos($public, '-//ietf//dtd html level 0//') === 0
|| strpos($public, '-//ietf//dtd html level 1//') === 0
|| strpos($public, '-//ietf//dtd html level 2//') === 0
|| strpos($public, '-//ietf//dtd html level 3//') === 0
|| strpos($public, '-//ietf//dtd html strict level 0//') === 0
|| strpos($public, '-//ietf//dtd html strict level 1//') === 0
|| strpos($public, '-//ietf//dtd html strict level 2//') === 0
|| strpos($public, '-//ietf//dtd html strict level 3//') === 0
|| strpos($public, '-//ietf//dtd html strict//') === 0
|| strpos($public, '-//ietf//dtd html//') === 0
|| strpos($public, '-//metrius//dtd metrius presentational//') === 0
|| strpos($public, '-//microsoft//dtd internet explorer 2.0 html strict//') === 0
|| strpos($public, '-//microsoft//dtd internet explorer 2.0 html//') === 0
|| strpos($public, '-//microsoft//dtd internet explorer 2.0 tables//') === 0
|| strpos($public, '-//microsoft//dtd internet explorer 3.0 html strict//') === 0
|| strpos($public, '-//microsoft//dtd internet explorer 3.0 html//') === 0
|| strpos($public, '-//microsoft//dtd internet explorer 3.0 tables//') === 0
|| strpos($public, '-//netscape comm. corp.//dtd html//') === 0
|| strpos($public, '-//netscape comm. corp.//dtd strict html//') === 0
|| strpos($public, '-//o\'reilly and associates//dtd html 2.0//') === 0
|| strpos($public, '-//o\'reilly and associates//dtd html extended 1.0//') === 0
|| strpos($public, '-//o\'reilly and associates//dtd html extended relaxed 1.0//') === 0
|| strpos($public, '-//sq//dtd html 2.0 hotmetal + extensions//') === 0
|| strpos($public, '-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//') === 0
|| strpos($public, '-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//') === 0
|| strpos($public, '-//spyglass//dtd html 2.0 extended//') === 0
|| strpos($public, '-//sun microsystems corp.//dtd hotjava html//') === 0
|| strpos($public, '-//sun microsystems corp.//dtd hotjava strict html//') === 0
|| strpos($public, '-//w3c//dtd html 3 1995-03-24//') === 0
|| strpos($public, '-//w3c//dtd html 3.2 draft//') === 0
|| strpos($public, '-//w3c//dtd html 3.2 final//') === 0
|| strpos($public, '-//w3c//dtd html 3.2//') === 0
|| strpos($public, '-//w3c//dtd html 3.2s draft//') === 0
|| strpos($public, '-//w3c//dtd html 4.0 frameset//') === 0
|| strpos($public, '-//w3c//dtd html 4.0 transitional//') === 0
|| strpos($public, '-//w3c//dtd html experimental 19960712//') === 0
|| strpos($public, '-//w3c//dtd html experimental 970421//') === 0
|| strpos($public, '-//w3c//dtd w3 html//') === 0
|| strpos($public, '-//w3o//dtd w3 html 3.0//') === 0
|| strpos($public, '-//webtechs//dtd mozilla html 2.0//') === 0
|| strpos($public, '-//webtechs//dtd mozilla html//') === 0
if ($token->forceQuirks === true
|| $token->name !== 'html'
|| $public === '-//w3o//dtd w3 html strict 3.0//en//'
|| $public === '-/w3c/dtd html 4.0 transitional/en'
|| $public === 'html'
|| $system === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd'
|| strpos($public, '+//silmaril//dtd html pro v0r11 19970101//') === 0
|| strpos($public, '-//as//dtd html 3.0 aswedit + extensions//') === 0
|| strpos($public, '+//silmaril//dtd html pro v0r11 19970101//') === 0
|| strpos($public, '-//as//dtd html 3.0 aswedit + extensions//') === 0
|| strpos($public, '-//advasoft ltd//dtd html 3.0 aswedit + extensions//') === 0
|| strpos($public, '-//ietf//dtd html 2.0 level 1//') === 0
|| strpos($public, '-//ietf//dtd html 2.0 level 2//') === 0
|| strpos($public, '-//ietf//dtd html 2.0 strict level 1//') === 0
|| strpos($public, '-//ietf//dtd html 2.0 strict level 2//') === 0
|| strpos($public, '-//ietf//dtd html 2.0 strict//') === 0
|| strpos($public, '-//ietf//dtd html 2.0//') === 0
|| strpos($public, '-//ietf//dtd html 2.1e//') === 0
|| strpos($public, '-//ietf//dtd html 3.0//') === 0
|| strpos($public, '-//ietf//dtd html 3.2 final//') === 0
|| strpos($public, '-//ietf//dtd html 3.2//') === 0
|| strpos($public, '-//ietf//dtd html 3//') === 0
|| strpos($public, '-//ietf//dtd html level 0//') === 0
|| strpos($public, '-//ietf//dtd html level 1//') === 0
|| strpos($public, '-//ietf//dtd html level 2//') === 0
|| strpos($public, '-//ietf//dtd html level 3//') === 0
|| strpos($public, '-//ietf//dtd html strict level 0//') === 0
|| strpos($public, '-//ietf//dtd html strict level 1//') === 0
|| strpos($public, '-//ietf//dtd html strict level 2//') === 0
|| strpos($public, '-//ietf//dtd html strict level 3//') === 0
|| strpos($public, '-//ietf//dtd html strict//') === 0
|| strpos($public, '-//ietf//dtd html//') === 0
|| strpos($public, '-//metrius//dtd metrius presentational//') === 0
|| strpos($public, '-//microsoft//dtd internet explorer 2.0 html strict//') === 0
|| strpos($public, '-//microsoft//dtd internet explorer 2.0 html//') === 0
|| strpos($public, '-//microsoft//dtd internet explorer 2.0 tables//') === 0
|| strpos($public, '-//microsoft//dtd internet explorer 3.0 html strict//') === 0
|| strpos($public, '-//microsoft//dtd internet explorer 3.0 html//') === 0
|| strpos($public, '-//microsoft//dtd internet explorer 3.0 tables//') === 0
|| strpos($public, '-//netscape comm. corp.//dtd html//') === 0
|| strpos($public, '-//netscape comm. corp.//dtd strict html//') === 0
|| strpos($public, '-//o\'reilly and associates//dtd html 2.0//') === 0
|| strpos($public, '-//o\'reilly and associates//dtd html extended 1.0//') === 0
|| strpos($public, '-//o\'reilly and associates//dtd html extended relaxed 1.0//') === 0
|| strpos($public, '-//sq//dtd html 2.0 hotmetal + extensions//') === 0
|| strpos($public, '-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//') === 0
|| strpos($public, '-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//') === 0
|| strpos($public, '-//spyglass//dtd html 2.0 extended//') === 0
|| strpos($public, '-//sun microsystems corp.//dtd hotjava html//') === 0
|| strpos($public, '-//sun microsystems corp.//dtd hotjava strict html//') === 0
|| strpos($public, '-//w3c//dtd html 3 1995-03-24//') === 0
|| strpos($public, '-//w3c//dtd html 3.2 draft//') === 0
|| strpos($public, '-//w3c//dtd html 3.2 final//') === 0
|| strpos($public, '-//w3c//dtd html 3.2//') === 0
|| strpos($public, '-//w3c//dtd html 3.2s draft//') === 0
|| strpos($public, '-//w3c//dtd html 4.0 frameset//') === 0
|| strpos($public, '-//w3c//dtd html 4.0 transitional//') === 0
|| strpos($public, '-//w3c//dtd html experimental 19960712//') === 0
|| strpos($public, '-//w3c//dtd html experimental 970421//') === 0
|| strpos($public, '-//w3c//dtd w3 html//') === 0
|| strpos($public, '-//w3o//dtd w3 html 3.0//') === 0
|| strpos($public, '-//webtechs//dtd mozilla html 2.0//') === 0
|| strpos($public, '-//webtechs//dtd mozilla html//') === 0
|| (is_null($token->system) && strpos($public, '-//w3c//dtd html 4.01 frameset//') === 0)
|| (is_null($token->system) && strpos($public, '-//w3c//dtd html 4.01 transitional//') === 0)
) {
@ -471,9 +471,9 @@ class TreeBuilder {
// DEVIATION: There is no iframe srcdoc document because there are no nested
// browsing contexts in this implementation.
elseif (
strpos($public, '-//w3c//dtd xhtml 1.0 frameset//') === 0
|| strpos($public, '-//w3c//dtd xhtml 1.0 transitional//') === 0
|| (!is_null($token->system) && strpos($public, '-//w3c//dtd html 4.01 frameset//') === 0)
strpos($public, '-//w3c//dtd xhtml 1.0 frameset//') === 0
|| strpos($public, '-//w3c//dtd xhtml 1.0 transitional//') === 0
|| (!is_null($token->system) && strpos($public, '-//w3c//dtd html 4.01 frameset//') === 0)
|| (!is_null($token->system) && strpos($public, '-//w3c//dtd html 4.01 transitional//') === 0)
) {
$this->DOM->quirksMode = Document::LIMITED_QUIRKS_MODE;
@ -492,7 +492,7 @@ class TreeBuilder {
# set the Document to quirks mode.
// DEVIATION: There is no iframe srcdoc document because there are no nested
// browsing contexts in this implementation.
assert($token instanceof CharacterToken || $token instanceof TagToken || $token instanceof EOFToken, new \Exception("Unexpected token class ".get_class($token)));
assert($token instanceof CharacterToken || $token instanceof TagToken || $token instanceof EOFToken, new Exception(Exception::TREEBUILDER_INVALID_TOKEN_CLASS, get_class($token)));
if ($token instanceof StartTagToken) {
$this->error(ParseError::EXPECTED_DOCTYPE_BUT_GOT_START_TAG, $token->name);
} elseif ($token instanceof EndTagToken) {
@ -648,7 +648,7 @@ class TreeBuilder {
}
# A start tag whose tag name is one of: "base", "basefont", "bgsound", "link"
elseif ($token->name === 'base' || $token->name === 'basefont' || $token->name === 'bgsound' || $token->name === 'link') {
# Insert an HTML element for the token.
# Insert an HTML element for the token.
# Immediately pop the current node off the stack of open elements.
$this->insertStartTagToken($token);
$this->stack->pop();
@ -657,7 +657,7 @@ class TreeBuilder {
}
# A start tag whose tag name is "meta"
elseif ($token->name === 'meta') {
# Insert an HTML element for the token.
# Insert an HTML element for the token.
# Immediately pop the current node off the stack of open elements.
$this->insertStartTagToken($token);
$this->stack->pop();
@ -707,8 +707,8 @@ class TreeBuilder {
# parent being the element in which the adjusted insertion location finds
# itself.
// DEVIATION: Because there is no scripting in this implementation, there is no
// need to get the adjusted insertion location as the intended parent isn't used
// when determining anything; Parser::createAndInsertElement will get the
// need to get the adjusted insertion location as the intended parent isn't used
// when determining anything; Parser::createAndInsertElement will get the
// adjusted insertion location anyway.
$this->insertStartTagToken($token);
@ -752,7 +752,7 @@ class TreeBuilder {
else {
# Act as described in the "anything else" entry below.
# Pop the current node (which will be the head element) off
# Pop the current node (which will be the head element) off
# the stack of open elements.
$this->stack->pop();
# Switch the insertion mode to "after head".
@ -765,7 +765,7 @@ class TreeBuilder {
elseif ($token instanceof EndTagToken) {
# An end tag whose tag name is "head"
if ($token->name === 'head') {
# Pop the current node (which will be the head element) off
# Pop the current node (which will be the head element) off
# the stack of open elements.
$this->stack->pop();
# Switch the insertion mode to "after head".
@ -775,7 +775,7 @@ class TreeBuilder {
elseif ($token->name === 'body' || $token->name === 'html' || $token->name === 'br') {
# Act as described in the "anything else" entry below.
# Pop the current node (which will be the head element) off
# Pop the current node (which will be the head element) off
# the stack of open elements.
$this->stack->pop();
# Switch the insertion mode to "after head".
@ -900,7 +900,7 @@ class TreeBuilder {
} elseif ($token instanceof EOFToken) {
$this->error(ParseError::UNEXPECTED_EOF);
}
# Pop the current node (which will be a noscript element) from the stack
# Pop the current node (which will be a noscript element) from the stack
# of open elements; the new current node will be a head element.
$this->stack->pop();
# Switch the insertion mode to "in head".
@ -1049,7 +1049,7 @@ class TreeBuilder {
# A character token that is U+0000 NULL
if ($token instanceof NullCharacterToken) {
# Parse error. Ignore the token
// DEVIATION: the parse error is already reported by the tokenizer;
// DEVIATION: the parse error is already reported by the tokenizer;
// this is probably an oversight in the specification, so we don't
// report it a second time
}
@ -1361,9 +1361,9 @@ class TreeBuilder {
if (($pos = $this->activeFormattingElementsList->findToMarker("a")) > -1) {
$this->error(ParseError::UNEXPECTED_START_TAG_IMPLIES_END_TAG, $token->name);
$element = $this->activeFormattingElementsList[$pos]['element'];
# ... run the adoption agency algorithm for the token,
# ... run the adoption agency algorithm for the token,
$this->adopt($token);
# ... then remove that element from the list of active formatting elements and the
# ... then remove that element from the list of active formatting elements and the
# stack of open elements if the adoption agency algorithm didn't already remove it
# (it might not have if the element is not in table scope).
$this->activeFormattingElementsList->removeSame($element);
@ -1394,7 +1394,7 @@ class TreeBuilder {
# If the stack of open elements has a nobr element in scope, then this is a parse error;
if($this->stack->hasElementInScope("nobr")) {
$this->error(ParseError::UNEXPECTED_START_TAG_IMPLIES_END_TAG, $token->name);
# ... run the adoption agency algorithm for the token,
# ... run the adoption agency algorithm for the token,
$this->adopt($token);
# ... then once again reconstruct the active formatting elements, if any.
$this->reconstructActiveFormattingElements();
@ -1433,7 +1433,7 @@ class TreeBuilder {
elseif ($token->name === "area" || $token->name === "br" || $token->name === "embed" || $token->name === "img" || $token->name === "keygen" || $token->name === "wbr") {
# Reconstruct the active formatting elements, if any.
$this->reconstructActiveFormattingElements();
# Insert an HTML element for the token.
# Insert an HTML element for the token.
# Immediately pop the current node off the stack of open elements.
$this->insertStartTagToken($token);
$this->stack->pop();
@ -1446,15 +1446,15 @@ class TreeBuilder {
elseif ($token->name === "input") {
# Reconstruct the active formatting elements, if any.
$this->reconstructActiveFormattingElements();
# Insert an HTML element for the token.
# Insert an HTML element for the token.
# Immediately pop the current node off the stack of open elements.
$element = $this->insertStartTagToken($token);
$this->stack->pop();
# Acknowledge the token's self-closing flag, if it is set.
$token->selfClosingAcknowledged = true;
# If the token does not have an attribute with the name "type",
# or if it does, but that attribute's value is not an ASCII
# case-insensitive match for the string "hidden", then:
# If the token does not have an attribute with the name "type",
# or if it does, but that attribute's value is not an ASCII
# case-insensitive match for the string "hidden", then:
# set the frameset-ok flag to "not ok".
// DEVIATION: check the element instead as this is simpler
if ($element->getAttribute("type") !== "hidden") {
@ -1487,7 +1487,7 @@ class TreeBuilder {
# A start tag whose tag name is "image"
elseif ($token->name === "image") {
# Parse error.
$this->error(ParseError::UNEXPECTED_START_TAG_ALIAS, $token->name, "img");
$this->error(ParseError::UNEXPECTED_START_TAG_ALIAS, $token->name, "img");
# Change the token's tag name to "img" and reprocess it. (Don't ask.)
$token->name = "img";
goto ProcessToken;
@ -1556,11 +1556,11 @@ class TreeBuilder {
$this->insertStartTagToken($token);
# Set the frameset-ok flag to "not ok".
$this->framesetOk = false;
# If the insertion mode is one of "in table", "in caption",
# "in table body", "in row", or "in cell", then switch
# If the insertion mode is one of "in table", "in caption",
# "in table body", "in row", or "in cell", then switch
# the insertion mode to "in select in table".
if (in_array($this->insertionMode, [
self::IN_TABLE_MODE,
self::IN_TABLE_MODE,
self::IN_CAPTION_MODE,
self::IN_TABLE_BODY_MODE,
self::IN_ROW_MODE,
@ -1586,7 +1586,7 @@ class TreeBuilder {
}
# A start tag whose tag name is one of: "rb", "rtc"
elseif ($token->name === "rb" || $token->name === "rtc") {
# If the stack of open elements has a ruby element in scope, then generate implied end tags.
# If the stack of open elements has a ruby element in scope, then generate implied end tags.
if ($this->stack->hasElementInScope("ruby")) {
$this->stack->generateImpliedEndTags();
# If the current node is not now a ruby element, this is a parse error.
@ -1600,7 +1600,7 @@ class TreeBuilder {
# A start tag whose tag name is one of: "rp", "rt"
elseif ($token->name == "rp" || $token->name === "rt") {
# If the stack of open elements has a ruby element in scope,
# then generate implied end tags, except for rtc elements.
# then generate implied end tags, except for rtc elements.
if ($this->stack->hasElementInScope("ruby")) {
$this->stack->generateImpliedEndTags("rtc");
# If the current node is not now a rtc element or a ruby element, this is a parse error.
@ -1615,7 +1615,7 @@ class TreeBuilder {
elseif ($token->name === "math") {
# Reconstruct the active formatting elements, if any.
$this->reconstructActiveFormattingElements();
# Adjust MathML attributes for the token. (This fixes the case of MathML attributes that are not all lowercase.)
# Adjust MathML attributes for the token. (This fixes the case of MathML attributes that are not all lowercase.)
# Adjust foreign attributes for the token. (This fixes the use of namespaced attributes, in particular XLink.)
foreach ($token->attributes as $a) {
if ($a->name === 'definitionurl') {
@ -1653,7 +1653,7 @@ class TreeBuilder {
elseif ($token->name === "caption" || $token->name === "col" || $token->name === "colgroup" || $token->name === "frame" || $token->name === "head" || $token->name === "tbody" || $token->name === "td" || $token->name === "tfoot" || $token->name === "th" || $token->name === "thead" || $token->name === "tr") {
# Parse error. Ignore the token.
$this->error(ParseError::UNEXPECTED_START_TAG, $token->name);
}
}
# Any other start tag
else {
# Reconstruct the active formatting elements, if any.
@ -1730,7 +1730,7 @@ class TreeBuilder {
# If there is no template element on the stack of open elements, then run these
# substeps:
if ($this->stack->find('template') === -1) {
# 1. Let node be the element that the form element pointer is set to,
# 1. Let node be the element that the form element pointer is set to,
# or null if it is not set to an element.
$node = $this->formElement;
# 2. Set the form element pointer to null.
@ -1772,7 +1772,7 @@ class TreeBuilder {
}
# An end tag whose tag name is "p"
elseif ($token->name === "p") {
# If the stack of open elements does not have a p element in button scope, then this is a parse error;
# If the stack of open elements does not have a p element in button scope, then this is a parse error;
if (!$this->stack->hasElementInButtonScope("p")) {
$this->error(ParseError::UNEXPECTED_END_TAG, $token->name);
# insert an HTML element for a "p" start tag token with no attributes.
@ -1783,7 +1783,7 @@ class TreeBuilder {
}
# An end tag whose tag name is "li"
elseif ($token->name === "li") {
# If the stack of open elements does not have an li element in
# If the stack of open elements does not have an li element in
# list item scope, then this is a parse error; ignore the token.
if (!$this->stack->hasElementInListItemScope("li")) {
$this->error(ParseError::UNEXPECTED_END_TAG, $token->name);
@ -1802,24 +1802,24 @@ class TreeBuilder {
}
# An end tag whose tag name is one of: "dd", "dt"
elseif ($token->name === "dd" || $token->name === "dt") {
# If the stack of open elements does not have an element in
# scope that is an HTML element with the same tag name as that of
# If the stack of open elements does not have an element in
# scope that is an HTML element with the same tag name as that of
# the token, then this is a parse error; ignore the token.
if (!$this->stack->hasElementInScope($token->name)) {
$this->error(ParseError::UNEXPECTED_END_TAG, $token->name);
}
# Otherwise, run these steps:
else {
# Generate implied end tags, except for HTML elements
# Generate implied end tags, except for HTML elements
# with the same tag name as the token.
$this->stack->generateImpliedEndTags($token->name);
# If the current node is not an HTML element with the same
# If the current node is not an HTML element with the same
# tag name as that of the token, then this is a parse error.
if ($this->stack->currentNodeName !== $token->name || $this->stack->currentNodeNamespace !== null) {
$this->error(ParseError::UNEXPECTED_END_TAG, $token->name);
}
# Pop elements from the stack of open elements until an HTML
# element with the same tag name as the token has been
# Pop elements from the stack of open elements until an HTML
# element with the same tag name as the token has been
# popped from the stack.
$this->stack->popUntil($token->name);
}
@ -1841,8 +1841,8 @@ class TreeBuilder {
if ($this->stack->currentNodeName !== $token->name || $this->stack->currentNodeNamespace !== null) {
$this->error(ParseError::UNEXPECTED_END_TAG, $token->name);
}
# Pop elements from the stack of open elements until an HTML
# element whose tag name is one of "h1", "h2", "h3", "h4",
# Pop elements from the stack of open elements until an HTML
# element whose tag name is one of "h1", "h2", "h3", "h4",
# "h5", or "h6" has been popped from the stack.
$this->stack->popUntil("h1", "h2", "h3", "h4", "h5", "h6");
}
@ -1859,7 +1859,7 @@ class TreeBuilder {
}
# An end tag token whose tag name is one of: "applet", "marquee", "object"
elseif ($token->name === "applet" || $token->name === "marquee" || $token->name === "object") {
# If the stack of open elements does not have an element in scope that
# If the stack of open elements does not have an element in scope that
# is an HTML element with the same tag name as that of the token, then
# this is a parse error; ignore the token.
if (!$this->stack->hasElementInScope($token->name)) {
@ -1869,13 +1869,13 @@ class TreeBuilder {
else {
# Generate implied end tags.
$this->stack->generateImpliedEndTags();
# If the current node is not an HTML element with the same tag
# If the current node is not an HTML element with the same tag
# name as that of the token, then this is a parse error.
if ($this->stack->currentNodeName !== $token->name || $this->stack->currentNodeNamespace !== null) {
$this->error(ParseError::UNEXPECTED_END_TAG, $token->name);
}
# Pop elements from the stack of open elements until an HTML
# element with the same tag name as the token has been
# Pop elements from the stack of open elements until an HTML
# element with the same tag name as the token has been
# popped from the stack.
$this->stack->popUntil($token->name);
# Clear the list of active formatting elements up to the last marker.
@ -1884,7 +1884,7 @@ class TreeBuilder {
}
# An end tag whose tag name is "br"
elseif ($token->name === "br") {
# Parse error. Drop the attributes from the token, and act as described
# Parse error. Drop the attributes from the token, and act as described
# in the next entry; i.e. act as if this was a "br" start tag token with
# no attributes, rather than the end tag token that it actually is.
$this->error(ParseError::UNEXPECTED_END_TAG, $token->name);
@ -1910,7 +1910,7 @@ class TreeBuilder {
$this->stack->popUntilSame($node);
continue 2;
}
# Otherwise, if node is in the special category, then
# Otherwise, if node is in the special category, then
# this is a parse error; ignore the token, and return.
elseif ($this->isElementSpecial($node)) {
$this->error(ParseError::UNEXPECTED_END_TAG, $token->name);
@ -1976,7 +1976,7 @@ class TreeBuilder {
// Anything else
else {
// No other cases are possible
throw new \Exception("UNREACHABLE CODE"); // @codeCoverageIgnore
throw new Exception(Exception::UNREACHABLE_CODE); // @codeCoverageIgnore
}
}
# 13.2.6.4.9 The "in table" insertion mode
@ -2188,7 +2188,7 @@ class TreeBuilder {
if ($token instanceof CharacterToken) {
$this->error(ParseError::FOSTERED_CHAR);
} elseif ($token instanceof StartTagToken) {
$this->error(ParseError::FOSTERED_START_TAG, $token->name);
$this->error(ParseError::FOSTERED_START_TAG, $token->name);
} elseif ($token instanceof EndTagToken) {
$this->error(ParseError::FOSTERED_END_TAG, $token->name);
}
@ -2221,7 +2221,7 @@ class TreeBuilder {
}
}
# If any of the tokens in the pending table character tokens
# list are character tokens that are not ASCII whitespace,
# list are character tokens that are not ASCII whitespace,
# then this is a parse error: reprocess the character tokens
# in the pending table character tokens list using the rules
# given in the "anything else" entry in the "in table"
@ -2295,7 +2295,7 @@ class TreeBuilder {
($token instanceof StartTagToken && in_array($token->name, ["caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr"]))
|| ($token instanceof EndTagToken && $token->name === "table")
) {
$errorCode = ($token instanceof StartTagToken) ? ParseError::UNEXPECTED_START_TAG : ParseError::UNEXPECTED_END_TAG;
$errorCode = ($token instanceof StartTagToken) ? ParseError::UNEXPECTED_START_TAG : ParseError::UNEXPECTED_END_TAG;
# If the stack of open elements does not have a caption
# element in table scope, this is a parse error; ignore
# the token. (fragment case)
@ -2339,7 +2339,7 @@ class TreeBuilder {
# 13.2.6.4.12 The "in column group" insertion mode
elseif ($insertionMode === self::IN_COLUMN_GROUP_MODE) {
# A character token that is one of U+0009 CHARACTER TABULATION,
# U+000A LINE FEED (LF), U+000C FORM FEED (FF),
# U+000A LINE FEED (LF), U+000C FORM FEED (FF),
# U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
if ($token instanceof WhitespaceToken) {
# Insert the character.
@ -2413,7 +2413,7 @@ class TreeBuilder {
if ($token instanceof CharacterToken) {
$this->error(ParseError::UNEXPECTED_CHAR);
} elseif ($token instanceof StartTagToken) {
$this->error(ParseError::UNEXPECTED_START_TAG, $token->name);
$this->error(ParseError::UNEXPECTED_START_TAG, $token->name);
} elseif ($token instanceof EndTagToken) {
$this->error(ParseError::UNEXPECTED_END_TAG, $token->name);
}
@ -2878,7 +2878,7 @@ class TreeBuilder {
else {
# Parse error. Ignore the token.
// NOTE: All other cases are start or end tags handled above
throw new \Exception("UNREACHABLE CODE"); // @codeCoverageIgnore
throw new Exception(Exception::UNREACHABLE_CODE); // @codeCoverageIgnore
}
}
# 13.2.6.4.17 The "in select in table" insertion mode
@ -2955,7 +2955,7 @@ class TreeBuilder {
# of template insertion modes.
$this->templateInsertionModes->pop();
# Push "in table" onto the stack of template insertion
# modes so that it is the new current
# modes so that it is the new current
# template insertion mode.
$this->templateInsertionModes[] = self::IN_TABLE_MODE;
# Switch the insertion mode to "in table", and
@ -2982,7 +2982,7 @@ class TreeBuilder {
# Pop the current template insertion mode off the stack
# of template insertion modes.
$this->templateInsertionModes->pop();
# Push "in table body" onto the stack of template
# Push "in table body" onto the stack of template
# insertion modes so that it is the new current
# template insertion mode.
$this->templateInsertionModes[] = self::IN_TABLE_BODY_MODE;
@ -2996,7 +2996,7 @@ class TreeBuilder {
# Pop the current template insertion mode off the stack
# of template insertion modes.
$this->templateInsertionModes->pop();
# Push "in row" onto the stack of template insertion
# Push "in row" onto the stack of template insertion
# modes so that it is the new current template
# insertion mode.
$this->templateInsertionModes[] = self::IN_ROW_MODE;
@ -3108,7 +3108,7 @@ class TreeBuilder {
# Anything else
else {
# Parse error.
assert($token instanceof CharacterToken || $token instanceof TagToken, new \Exception("Unexpected token class ".get_class($token)));
assert($token instanceof CharacterToken || $token instanceof TagToken, new Exception(Exception::TREEBUILDER_INVALID_TOKEN_CLASS, get_class($token)));
if ($token instanceof StartTagToken) {
$this->error(ParseError::UNEXPECTED_START_TAG, $token->name);
} elseif ($token instanceof EndTagToken) {
@ -3210,7 +3210,7 @@ class TreeBuilder {
# Anything else
else {
# Parse error. Ignore the token.
assert($token instanceof CharacterToken || $token instanceof TagToken, new \Exception("Unexpected token class ".get_class($token)));
assert($token instanceof CharacterToken || $token instanceof TagToken, new Exception(Exception::TREEBUILDER_INVALID_TOKEN_CLASS, get_class($token)));
if ($token instanceof StartTagToken) {
$this->error(ParseError::UNEXPECTED_START_TAG, $token->name);
} elseif ($token instanceof EndTagToken) {
@ -3228,7 +3228,7 @@ class TreeBuilder {
# 13.2.6.4.21 The "after frameset" insertion mode
elseif ($insertionMode === self::AFTER_FRAMESET_MODE) {
# A character token that is one of U+0009 CHARACTER TABULATION,
# U+000A LINE FEED (LF), U+000C FORM FEED (FF),
# U+000A LINE FEED (LF), U+000C FORM FEED (FF),
# U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
if ($token instanceof WhitespaceToken) {
# Insert the character.
@ -3271,7 +3271,7 @@ class TreeBuilder {
# Anything else
else {
# Parse error. Ignore the token.
assert($token instanceof CharacterToken || $token instanceof TagToken, new \Exception("Unexpected token class ".get_class($token)));
assert($token instanceof CharacterToken || $token instanceof TagToken, new Exception(Exception::TREEBUILDER_INVALID_TOKEN_CLASS, get_class($token)));
if ($token instanceof StartTagToken) {
$this->error(ParseError::UNEXPECTED_START_TAG, $token->name);
} elseif ($token instanceof EndTagToken) {
@ -3312,7 +3312,7 @@ class TreeBuilder {
# Anything else
else {
# Parse error.
assert($token instanceof CharacterToken || $token instanceof TagToken, new \Exception("Unexpected token class ".get_class($token)));
assert($token instanceof CharacterToken || $token instanceof TagToken, new Exception(Exception::TREEBUILDER_INVALID_TOKEN_CLASS, get_class($token)));
if ($token instanceof StartTagToken) {
$this->error(ParseError::UNEXPECTED_START_TAG, $token->name);
} elseif ($token instanceof EndTagToken) {
@ -3358,7 +3358,7 @@ class TreeBuilder {
# Anything else
else {
# Parse error. Ignore the token.
assert($token instanceof CharacterToken || $token instanceof TagToken, new \Exception("Unexpected token class ".get_class($token)));
assert($token instanceof CharacterToken || $token instanceof TagToken, new Exception(Exception::TREEBUILDER_INVALID_TOKEN_CLASS, get_class($token)));
if ($token instanceof StartTagToken) {
$this->error(ParseError::UNEXPECTED_START_TAG, $token->name);
} elseif ($token instanceof EndTagToken) {
@ -3369,26 +3369,26 @@ class TreeBuilder {
}
}
else {
throw new \Exception("UNREACHABLE CODE"); // @codeCoverageIgnore
throw new Exception(Exception::UNREACHABLE_CODE); // @codeCoverageIgnore
}
}
# Otherwise
else {
# Process the token according to the rules given in the section
# for parsing tokens in foreign content.
assert((function() {
$this->debugLog .= " Mode: Foreign content (".(string) $this->stack.")\n";
return true;
})());
# 13.2.6.5 The rules for parsing tokens in foreign content
#
# When the user agent is to apply the rules for parsing tokens in foreign
# content, the user agent must handle the token as follows:
// NOTE: Foster parenting is turned off when evaluating this
// mode as it may have been turned on in a previous evluation
// of the "in table" mode
@ -3431,51 +3431,51 @@ class TreeBuilder {
# "sub", "sup", "table", "tt", "u", "ul", "var"
# A start tag whose tag name is "font", if the token has any attributes named
# "color", "face", or "size"
if ($token->name === 'b'
|| $token->name === 'big'
|| $token->name === 'blockquote'
|| $token->name === 'body'
|| $token->name === 'br'
|| $token->name === 'center'
|| $token->name === 'code'
|| $token->name === 'dd'
|| $token->name === 'div'
|| $token->name === 'dl'
|| $token->name === 'dt'
|| $token->name === 'em'
|| $token->name === 'embed'
|| $token->name === 'h1'
|| $token->name === 'h2'
|| $token->name === 'h3'
|| $token->name === 'h4'
|| $token->name === 'h5'
|| $token->name === 'h6'
|| $token->name === 'head'
|| $token->name === 'hr'
|| $token->name === 'i'
|| $token->name === 'img'
|| $token->name === 'li'
|| $token->name === 'listing'
|| $token->name === 'menu'
|| $token->name === 'meta'
|| $token->name === 'nobr'
|| $token->name === 'ol'
|| $token->name === 'p'
|| $token->name === 'pre'
|| $token->name === 'ruby'
|| $token->name === 's'
|| $token->name === 'small'
|| $token->name === 'span'
|| $token->name === 'strong'
|| $token->name === 'strike'
|| $token->name === 'sub'
|| $token->name === 'sup'
|| $token->name === 'table'
|| $token->name === 'tt'
|| $token->name === 'u'
|| $token->name === 'ul'
|| $token->name === 'var'
|| ($token->name === 'font'
if ($token->name === 'b'
|| $token->name === 'big'
|| $token->name === 'blockquote'
|| $token->name === 'body'
|| $token->name === 'br'
|| $token->name === 'center'
|| $token->name === 'code'
|| $token->name === 'dd'
|| $token->name === 'div'
|| $token->name === 'dl'
|| $token->name === 'dt'
|| $token->name === 'em'
|| $token->name === 'embed'
|| $token->name === 'h1'
|| $token->name === 'h2'
|| $token->name === 'h3'
|| $token->name === 'h4'
|| $token->name === 'h5'
|| $token->name === 'h6'
|| $token->name === 'head'
|| $token->name === 'hr'
|| $token->name === 'i'
|| $token->name === 'img'
|| $token->name === 'li'
|| $token->name === 'listing'
|| $token->name === 'menu'
|| $token->name === 'meta'
|| $token->name === 'nobr'
|| $token->name === 'ol'
|| $token->name === 'p'
|| $token->name === 'pre'
|| $token->name === 'ruby'
|| $token->name === 's'
|| $token->name === 'small'
|| $token->name === 'span'
|| $token->name === 'strong'
|| $token->name === 'strike'
|| $token->name === 'sub'
|| $token->name === 'sup'
|| $token->name === 'table'
|| $token->name === 'tt'
|| $token->name === 'u'
|| $token->name === 'ul'
|| $token->name === 'var'
|| ($token->name === 'font'
&& ($token->hasAttribute('color') || $token->hasAttribute('face') || $token->hasAttribute('size'))
)
) {
@ -3488,7 +3488,7 @@ class TreeBuilder {
while (($node = $this->stack->currentNode) && !($node->namespaceURI === null || $this->isMathMLTextIntegrationPoint($node) || $this->isHTMLIntegrationPoint($node))) {
$this->stack->pop();
}
# Process the token using the rules for the
# Process the token using the rules for the
# "in body" insertion mode.
// DEVIATION: Spec bug
// See https://github.com/whatwg/html/issues/6439
@ -3532,7 +3532,7 @@ class TreeBuilder {
# attributes, in particular lang attributes in the XML namespace.)
// DOMElement::setAttributeNS requires the prefix and local name be in one
// string, so there is no need to separate the prefix and the local name here.
$a->namespace = self::FOREIGN_ATTRIBUTE_NAMESPACE_MAP[$a->name] ?? null;
$a->namespace = self::FOREIGN_ATTRIBUTE_NAMESPACE_MAP[$a->name] ?? null;
}
# Insert a foreign element for the token, in the same namespace as the adjusted
# current node.
@ -3564,7 +3564,7 @@ class TreeBuilder {
// We do this below before the loop
# If node's tag name, converted to ASCII lowercase, is not the
# same as the tag name of the token, then this is a parse error.
// DEVIATION: We only generate the parse error if we don't reach
// DEVIATION: We only generate the parse error if we don't reach
// "Otherwise" below, to avoid reporting the parse error a second
// time in HTML content parsing
$pos = count($this->stack) - 1;
@ -3607,8 +3607,8 @@ class TreeBuilder {
}
protected function adopt(TagToken $token): void {
# The adoption agency algorithm, which takes as its only argument a
# token 'token' for which the algorithm is being run, consists of
# The adoption agency algorithm, which takes as its only argument a
# token 'token' for which the algorithm is being run, consists of
# the following steps:
assert((function() {
@ -3619,7 +3619,7 @@ class TreeBuilder {
# Let subject be token's tag name.
$subject = $token->name;
$errorCode = $token instanceof StartTagToken ? ParseError::UNEXPECTED_START_TAG : ParseError::UNEXPECTED_END_TAG;
# If the current node is an HTML element whose tag name is subject,
# If the current node is an HTML element whose tag name is subject,
# and the current node is not in the list of active formatting elements,
# then pop the current node off the stack of open elements, and return.
$currentNode = $this->stack->currentNode;
@ -3640,7 +3640,7 @@ class TreeBuilder {
}
# Increment outer loop counter by one.
$outerLoopCounter++;
# Let formatting element be the last element in the list of active
# Let formatting element be the last element in the list of active
# formatting elements that:
# 1. is between the end of the list and the last marker in the list,
# if any, or the start of the list otherwise, and
@ -3671,7 +3671,7 @@ class TreeBuilder {
}
}
}
# If formatting element is not in the stack of open elements,
# If formatting element is not in the stack of open elements,
# then this is a parse error; remove the element from the
# list, and return.
if (($stackIndex = $this->stack->findSame($formattingElement)) === -1) {
@ -3679,7 +3679,7 @@ class TreeBuilder {
unset($this->activeFormattingElementsList[$formattingElementIndex]);
return;
}
# If formatting element is in the stack of open elements, but
# If formatting element is in the stack of open elements, but
# the element is not in scope, then this is a parse error; return.
if (!$this->stack->hasElementInScope($formattingElement)) {
$this->error($errorCode, $token->name);
@ -3695,7 +3695,7 @@ class TreeBuilder {
# special category. There might not be one.
$furthestBlock = null;
for ($k = ($stackIndex + 1); $k < count($this->stack); $k++) {
if ($this->isElementSpecial($this->stack[$k])) {
if ($this->isElementSpecial($this->stack[$k])) {
$furthestBlockIndex = $k;
$furthestBlock = $this->stack[$k];
break;
@ -3727,7 +3727,7 @@ class TreeBuilder {
InnerLoop:
$innerLoopCounter++;
# Let node be the element immediately above node in the stack of open
# elements, or if node is no longer in the stack of open elements
# elements, or if node is no longer in the stack of open elements
# (e.g. because it got removed by this algorithm), the element that
# was immediately above node in the stack of open elements before
# node was removed.
@ -3758,7 +3758,7 @@ class TreeBuilder {
}
# Create an element for the token for which the element node was
# created, in the HTML namespace, with common ancestor as the
# intended parent; replace the entry for node in the list of
# intended parent; replace the entry for node in the list of
# active formatting elements with an entry for the new element,
# replace the entry for node in the stack of open elements with
# an entry for the new element, and let node be the new element.
@ -3784,7 +3784,7 @@ class TreeBuilder {
# Return to the step labeled inner loop.
goto InnerLoop;
# Insert whatever last node ended up being in the previous step
# at the appropriate place for inserting a node, but using
# at the appropriate place for inserting a node, but using
# common ancestor as the override target.
AfterInnerLoop:
$place = $this->appropriatePlaceForInsertingNode($commonAncestor);
@ -3812,7 +3812,7 @@ class TreeBuilder {
# Remove formatting element from the stack of open elements, and
# insert the new element into the stack of open elements
# immediately below the position of furthest block in that stack.
assert($stackIndex > 0, new \Exception("Attempting to delete root element from stack"));
assert($stackIndex > 0, new Exception(Exception::STACK_ROOT_ELEMENT_DELETE));
$this->stack->removeSame($formattingElement);
$this->stack->insert($element, $this->stack->findSame($furthestBlock) + 1);
# Jump back to the step labeled outer loop.
@ -3830,12 +3830,12 @@ class TreeBuilder {
# override target, is the position in an element returned by running the
# following steps:
# If there was an override target specified, then let target
# If there was an override target specified, then let target
# be the override target. Otherwise, let target be the current node.
$target = $overrideTarget ?? $this->stack->currentNode;
assert(isset($target), new \Exception("Open elements stack is empty"));
assert(isset($target), new Exception(Exception::STACK_INCORRECTLY_EMPTY));
# Determine the adjusted insertion location using the first matching steps
# from the following list:
# from the following list:
$targetNodeName = $target->nodeName;
# If foster parenting is enabled and target is a table, tbody, tfoot, thead, or tr element
if ($this->fosterParenting && ($targetNodeName === 'table' || $targetNodeName === 'tbody' || $targetNodeName === 'tfoot' || $targetNodeName === 'thead' || $targetNodeName === 'tr')) {
@ -3911,7 +3911,7 @@ class TreeBuilder {
$location = $this->appropriatePlaceForInsertingNode();
$adjustedInsertionLocation = $location['node'];
$insertBefore = $location['insert before'];
assert($adjustedInsertionLocation instanceof \DOMNode, new \Exception("No insertion location"));
assert($adjustedInsertionLocation instanceof \DOMNode, new Exception(Exception::TREEBUILDER_INVALID_INSERTION_LOCATION));
# 3. If the adjusted insertion location is in a Document node, then abort these
# steps.
if ((($insertBefore === false) ? $adjustedInsertionLocation : $adjustedInsertionLocation->parentNode) instanceof \DOMDocument) {
@ -4304,7 +4304,7 @@ class TreeBuilder {
# Return element.
return $element;
}
public function isMathMLTextIntegrationPoint(Element $e): bool {
return (
$e->namespaceURI === Parser::MATHML_NAMESPACE && (
@ -4346,7 +4346,7 @@ class TreeBuilder {
if ($entry instanceof ActiveFormattingElementsMarker || $this->stack->findSame($entry['element']) > -1) {
return;
}
# 3. Let entry be the last (most recently added) element in the list of
# 3. Let entry be the last (most recently added) element in the list of
# active formatting elements.
// Already done
while ($pos >= 0) {

Loading…
Cancel
Save