Browse Source

Overhaul open elements stack

ns
J. King 3 years ago
parent
commit
979cec628e
  1. 6
      lib/ActiveFormattingElementsList.php
  2. 496
      lib/OpenElementsStack.php
  3. 16
      lib/TemplateInsertionModesStack.php
  4. 122
      lib/TreeBuilder.php

6
lib/ActiveFormattingElementsList.php

@ -17,8 +17,10 @@ namespace dW\HTML5;
class ActiveFormattingElementsList extends Stack {
protected $_storage = [];
protected $stack;
protected $tree;
public function __construct(OpenElementsStack $stack) {
public function __construct(TreeBuilder $tree, OpenElementsStack $stack) {
$this->tree = $tree;
$this->stack = $stack;
}
@ -143,7 +145,7 @@ class ActiveFormattingElementsList extends Stack {
# 8. Create: Insert an HTML element for the token for which the element entry
# was created, to obtain new element.
create:
$element = TreeBuilder::insertStartTagToken($entry['token']);
$element = $this->tree->insertStartTagToken($entry['token']);
# 9. Replace the entry for entry in the list with an entry for new element.
$this->_storage[key($this->_storage)]['element'] = $element;

496
lib/OpenElementsStack.php

@ -2,7 +2,91 @@
declare(strict_types=1);
namespace dW\HTML5;
class OpenElementsStack extends Stack {
class OpenElementsStack extends \splStack {
protected const IMPLIED_END_TAGS = [
'dd' => true,
'dt' => true,
'li' => true,
'optgroup' => true,
'option' => true,
'p' => true,
'rb' => true,
'rp' => true,
'rt' => true,
'rtc' => true,
];
protected const IMPLIED_END_TAGS_THOROUGH = [
'caption' => true,
'colgroup' => true,
'dd' => true,
'dt' => true,
'li' => true,
'optgroup' => true,
'option' => true,
'p' => true,
'rb' => true,
'rp' => true,
'rt' => true,
'rtc' => true,
'tbody' => true,
'td' => true,
'tfoot' => true,
'th' => true,
'thead' => true,
'tr' => true,
];
protected const GENERAL_SCOPE = [
Parser::HTML_NAMESPACE => [
'applet',
'caption',
'html',
'table',
'td',
'th',
'marquee',
'object',
'template'
],
Parser::MATHML_NAMESPACE => [
'mi',
'mo',
'mn',
'ms',
'mtext',
'annotation-xml'
],
Parser::SVG_NAMESPACE => [
'foreignObject',
'desc',
'title'
],
];
protected const LIST_ITEM_SCOPE = [
// everything in general scope, and these in the HTML namespace
'ol',
'ul',
];
protected const BUTTON_SCOPE = [
// everything in general scope, and these in the HTML namespace
'button',
];
protected const TABLE_SCOPE = [
Parser::HTML_NAMESPACE => [
'html',
'table',
'template',
],
];
protected const SELECT_SCOPE = [
// all elements EXCEPT these
Parser::HTML_NAMESPACE => [
'optgroup',
'option',
],
];
protected $fragmentCase;
protected $fragmentContext;
@ -18,88 +102,68 @@ class OpenElementsStack extends Stack {
$this->fragmentContext = $fragmentContext;
}
public function popUntil($target) {
if ($target instanceof Element) {
do {
$node = $this->pop;
} while (!$node->isSameNode($target));
} elseif (is_string($target)) {
do {
$poppedNodeName = $this->pop()->nodeName;
} while ($poppedNodeName !== $target);
} elseif (is_array($target)) {
do {
$poppedNodeName = $this->pop()->nodeName;
} while (!in_array($poppedNodeName, $target));
} else {
throw new Exception(Exception::STACK_ELEMENT_STRING_ARRAY_EXPECTED);
}
public function popUntil(string ...$target): void {
do {
$node = $this->pop();
} while (!in_array($node->nodeName, $target));
}
public function search($needle): int {
if (!$needle) {
return -1;
}
public function popUntilSame(Element $target): void {
do {
$node = $this->pop();
} while (!$node->isSameNode($target));
}
if ($needle instanceof \DOMElement) {
foreach (array_reverse($this->_storage) as $key => $value) {
if ($value->isSameNode($needle)) {
return $key;
}
}
} elseif (is_string($needle)) {
foreach (array_reverse($this->_storage) as $key => $value) {
if ($value->nodeName === $needle) {
return $key;
}
}
} elseif ($needle instanceof \Closure) {
foreach (array_reverse($this->_storage) as $key => $value) {
if ($needle($value) === true) {
return $key;
}
public function find(string ...$name): int {
foreach ($this as $k => $node) {
if (in_array($node->nodeName, $name)) {
return $k;
}
}
return -1;
}
// Remove an arbitrary element from the array.
public function remove($target) {
$key = $this->search($target);
if ($key === -1) {
return;
} elseif ($key === count($this->_storage) - 1) {
$this->pop();
return;
public function findNot(string ...$name): int {
foreach ($this as $k => $node) {
if (!in_array($node->nodeName, $name)) {
return $k;
}
}
unset($this->_storage[$key]);
$this->_storage = array_values($this->_storage);
return -1;
}
public function generateImpliedEndTags(array $exclude = []) {
$tags = ['caption', 'colgroup', 'dd', 'dt', 'li', 'optgroup', 'option', 'p', 'rb', 'rp', 'rt', 'rtc', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'];
if (count($exclude) > 0) {
$modified = false;
foreach ($exclude as $e) {
$key = array_search($e, $tags);
if ($key !== false) {
unset($tags[$key]);
$modified = true;
}
public function findSame(\DOMElement $node): int {
foreach ($this as $k => $node) {
if ($node->isSameNode($node)) {
return $k;
}
}
return -1;
}
if ($modified) {
$tags = array_values($tags);
}
public function generateImpliedEndTags(string ...$exclude): void {
# When the steps below require the UA to generate implied end tags,
# then, while the current node is {elided list of element names},
# the UA must pop the current node off the stack of open elements.
#
# If a step requires the UA to generate implied end tags but lists
# an element to exclude from the process, then the UA must perform
# the above steps as if that element was not in the above list.
$map = self::IMPLIED_END_TAGS;
foreach($exclude as $name) {
$map[$name] = false;
}
while (!$this->isEmpty() && ($map[$this->top()->nodeName] ?? false)) {
$this->pop();
}
}
$currentNodeName = end($this->_storage)->nodeName;
while (in_array($currentNodeName, $tags)) {
public function generateImpliedEndTagsThoroughly(): void {
# When the steps below require the UA to generate all implied end tags
# thoroughly, then, while the current node is {elided list of element names},
# the UA must pop the current node off the stack of open elements.
while (!$this->isEmpty() && (self::IMPLIED_END_TAGS_THOROUGH[$this->top()->nodeName] ?? false)) {
$this->pop();
$currentNodeName = end($this->_storage)->nodeName;
}
}
@ -108,296 +172,102 @@ class OpenElementsStack extends Stack {
# it has that element in the specific scope consisting of the following element
# types:
#
# applet
# caption
# html
# table
# td
# th
# marquee
# object
# template
# MathML mi
# MathML mo
# MathML mn
# MathML ms
# MathML mtext
# MathML annotation-xml
# SVG foreignObject
# SVG desc
# SVG title
$list = [
Parser::HTML_NAMESPACE => [
'applet',
'caption',
'html',
'table',
'td',
'th',
'marquee',
'object',
'template'
],
Parser::MATHML_NAMESPACE => [
'mi',
'mo',
'mn',
'ms',
'mtext',
'annotation-xml'
],
Parser::SVG_NAMESPACE => [
'foreignObject',
'desc',
'title'
]
];
return $this->hasElementInScopeHandler($target, $list);
# {elided}
return $this->hasElementInScopeHandler($target, self::GENERAL_SCOPE);
}
public function hasElementInListItemScope($target): bool {
# The stack of open elements is said to have a particular element in list item scope when it has that element in the specific scope consisting of the following element types:
#
# All the element types listed above for the has an element in scope algorithm.
# ol in the HTML namespace
# ul in the HTML namespace
$list = [
Parser::HTML_NAMESPACE => [
'applet',
'caption',
'html',
'table',
'td',
'th',
'marquee',
'object',
'template',
'ol',
'ul'
],
Parser::MATHML_NAMESPACE => [
'mi',
'mo',
'mn',
'ms',
'mtext',
'annotation-xml'
],
Parser::SVG_NAMESPACE => [
'foreignObject',
'desc',
'title'
]
];
return $this->hasElementInScopeHandler($target, $list);
$scope = self::GENERAL_SCOPE;
$scope[Parser::HTML_NAMESPACE] = array_merge($scope[Parser::HTML_NAMESPACE], self::LIST_ITEM_SCOPE);
return $this->hasElementInScopeHandler($target, $scope);
}
public function hasElementInButtonScope($target): bool {
# The stack of open elements is said to have a particular element in button
# scope when it has that element in the specific scope consisting of the
# following element types:
#
# All the element types listed above for the has an element in scope algorithm.
# button in the HTML namespace
$list = [
Parser::HTML_NAMESPACE => [
'applet',
'caption',
'html',
'table',
'td',
'th',
'marquee',
'object',
'template',
'button'
],
Parser::MATHML_NAMESPACE => [
'mi',
'mo',
'mn',
'ms',
'mtext',
'annotation-xml'
],
Parser::SVG_NAMESPACE => [
'foreignObject',
'desc',
'title'
]
];
return $this->hasElementInScopeHandler($target, $list);
$scope = self::GENERAL_SCOPE;
$scope[Parser::HTML_NAMESPACE] = array_merge($scope[Parser::HTML_NAMESPACE], self::BUTTON_SCOPE);
return $this->hasElementInScopeHandler($target, $scope);
}
public function hasElementInTableScope($target): bool {
# The stack of open elements is said to have a particular element in table scope
# when it has that element in the specific scope consisting of the following
# element types:
#
# All the element types listed above for the has an element in scope algorithm.
# html in the HTML namespace
# table in the HTML namespace
# template in the HTML namespace
// Not sure what to do here. I am going to assume the elements without a
// namespace in the element types listed above are meant for the HTML namespace.
// If so then these listed here are redundant. My interpretation therefore has
// this being an alias for hasElementInScope.
return $this->hasElementInScope($target);
return $this->hasElementInScopeHandler($target, self::TABLE_SCOPE);
}
public function hasElementInSelectScope(string $target): bool {
# The stack of open elements is said to have a particular element in select
# scope when it has that element in the specific scope consisting of all element
# types except the following:
# The stack of open elements is said to have a particular element
# in select scope when it has that element in the specific scope
# consisting of all element types EXCEPT the following:
#
# All the element types listed above for the has an element in scope algorithm.
# optgroup in the HTML namespace
# option in the HTML namespace
$list = [
Parser::HTML_NAMESPACE => [
'applet',
'caption',
'html',
'table',
'td',
'th',
'marquee',
'object',
'template',
'button',
'optgroup',
'option'
],
Parser::MATHML_NAMESPACE => [
'mi',
'mo',
'mn',
'ms',
'mtext',
'annotation-xml'
],
Parser::SVG_NAMESPACE => [
'foreignObject',
'desc',
'title'
]
];
return $this->hasElementInScopeHandler($target, $list);
return $this->hasElementInScopeHandler($target, self::SELECT_SCOPE, false);
}
protected function hasElementInScopeHandler($target, array $list): bool {
# 1. Initialize node to be the current node (the bottommost node of the stack).
// Handled by loop.
foreach (array_reverse($this->_storage) as $node) {
# 2. If node is the target node, terminate in a match state.
if ($target instanceof \DOMElement) {
protected function hasElementInScopeHandler($target, array $list, $matchType = true): bool {
assert(is_string($target) || $target instanceof \DOMElement, new \Exception("Invalid input type"));
# The stack of open elements is said to have an element target node
# in a specific scope consisting of a list of element types list
# when the following algorithm terminates in a match state:
if ($target instanceof \DOMElement) {
# Initialize node to be the current node (the bottommost node of the stack).
foreach ($this as $node) {
# If node is the target node, terminate in a match state.
if ($node->isSameNode($target)) {
return true;
}
} elseif (is_string($target)) {
# Otherwise, if node is one of the element types in list, terminate in a failure state.
$ns = $node->namespaceURI ?? Parser::HTML_NAMESPACE;
if (in_array($node->nodeName, $list[$ns] ?? []) === $matchType) {
return false;
}
# Otherwise, set node to the previous entry in the stack of
# open elements and return to step 2. (This will never fail,
# since the loop will always terminate in the previous step
# if the top of the stack — an html element — is reached.)
}
} else {
# Initialize node to be the current node (the bottommost node of the stack).
foreach ($this as $node) {
# If node is the target node, terminate in a match state.
if ($node->nodeName === $target) {
return true;
}
}
# 3. Otherwise, if node is one of the element types in list, terminate in a
# failure state.
else {
foreach ($list as $namespace => $subList) {
if ($namespace === Parser::HTML_NAMESPACE) {
$namespace = '';
}
if ($node->namespaceURI !== $namespace) {
continue;
}
foreach ($subList as $name) {
if ($node->nodeName === $name) {
return false;
}
}
# Otherwise, if node is one of the element types in list, terminate in a failure state.
$ns = $node->namespaceURI ?? Parser::HTML_NAMESPACE;
if (in_array($node->nodeName, $list[$ns] ?? []) === $matchType) {
return false;
}
# Otherwise, set node to the previous entry in the stack of
# open elements and return to step 2. (This will never fail,
# since the loop will always terminate in the previous step
# if the top of the stack — an html element — is reached.)
}
# Otherwise, set node to the previous entry in the stack of open elements and
# return to step 2. (This will never fail, since the loop will always terminate
# in the previous step if the top of the stack — an html element — is reached.)
// Handled by loop.
}
return false;
}
public function __get($property) {
$value = parent::__get($property);
if (!is_null($value)) {
return $value;
}
switch ($property) {
case 'adjustedCurrentNode':
# The adjusted current node is the context element if the parser was created by
# the HTML fragment parsing algorithm and the stack of open elements has only one
# element in it (fragment case); otherwise, the adjusted current node is the
# current node.
return ($this->fragmentCase && $this->length === 1) ? $this->fragmentContext : $this->currentNode;
break;
return ($this->fragmentCase && count($this) === 1) ? $this->fragmentContext : $this->__get('currentNode');
case 'adjustedCurrentNodeName':
$adjustedCurrentNode = $this->adjustedCurrentNode;
$adjustedCurrentNode = $this->__get('adjustedCurrentNode');
return (!is_null($adjustedCurrentNode)) ? $adjustedCurrentNode->nodeName : null;
break;
case 'adjustedCurrentNodeNamespace':
$adjustedCurrentNode = $this->adjustedCurrentNode;
$adjustedCurrentNode = $this->__get('adjustedCurrentNode');
return (!is_null($adjustedCurrentNode)) ? $adjustedCurrentNode->namespaceURI: null;
break;
case 'currentNode':
$currentNode = end($this->_storage);
return ($currentNode) ? $currentNode : null;
break;
return $this->isEmpty() ? null : $this->top();
case 'currentNodeName':
$currentNode = $this->currentNode;
$currentNode = $this->__get('currentNode');
return ($currentNode && $currentNode->nodeType) ? $currentNode->nodeName : null;
break;
case 'currentNodeNamespace':
$currentNode = $this->currentNode;
$currentNode = $this->__get('currentNode');
return (!is_null($currentNode)) ? $currentNode->namespaceURI: null;
break;
default: return null;
}
}
// Used when listing expected elements when returning parse errors
public function __toString(): string {
if (count($this->_storage) > 1) {
// Don't output the name of the root element.
for ($i = 1, $temp = []; $i < count($this->_storage) - 1; $i++) {
$temp[] = $this->_storage[$i]->nodeName;
}
return implode(', ', array_unique($temp));
} else {
return '';
default:
return null;
}
}
}

16
lib/TemplateInsertionModesStack.php

@ -2,19 +2,13 @@
declare(strict_types=1);
namespace dW\HTML5;
class TemplateInsertionModesStack extends Stack {
class TemplateInsertionModesStack extends \SplStack {
public function __get($property) {
$value = parent::__get($property);
if (!is_null($value)) {
return $value;
}
switch ($property) {
case 'currentMode': return
$currentMode = end($this->_storage);
return ($currentMode) ? $currentMode : null;
break;
default: return null;
case 'currentMode':
return $this->isEmpty() ? null : $this->top();
default:
return null;
}
}
}

122
lib/TreeBuilder.php

@ -193,8 +193,23 @@ class TreeBuilder {
'ychannelselector' => 'yChannelSelector',
'zoomandpan' => 'zoomAndPan',
];
# The following elements have varying levels of special parsing rules: HTML’s
# address, applet, area, article, aside, base, basefont, bgsound, blockquote,
# body, br, button, caption, center, col, colgroup, dd, details, dir, div, dl,
# dt, embed, fieldset, figcaption, figure, footer, form, frame, frameset, h1,
# h2, h3, h4, h5, h6, head, header, hr, html, iframe, img, input, li, link,
# listing, main, marquee, meta, nav, noembed, noframes, noscript, object, ol, p,
# param, plaintext, pre, script, section, select, source, style, summary, table,
# tbody, td, template, textarea, tfoot, th, thead, title, tr, track, ul, wbr,
# xmp; MathML mi, MathML mo, MathML mn, MathML ms, MathML mtext, and MathML
# annotation-xml; and SVG foreignObject, SVG desc, and SVG title.
protected const SPECIAL_ELEMENTS = [
Parser::HTML_NAMESPACE => ['address', 'applet', 'area', 'article', 'aside', 'base', 'basefont', 'bgsound', 'blockquote', 'body', 'br', 'button', 'caption', 'center', 'col', 'colgroup', 'dd', 'details', 'dir', 'div', 'dl', 'dt', 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'iframe', 'img', 'input', 'li', 'link', 'listing', 'main', 'marquee', 'meta', 'nav', 'noembed', 'noframes', 'noscript', 'object', 'ol', 'p', 'param', 'plaintext', 'pre', 'script', 'section', 'select', 'source', 'style', 'summary', 'table', 'tbody', 'td', 'template', 'textarea', 'tfoot', 'th', 'thead', 'title', 'tr', 'track', 'ul', 'wbr', 'xmp'],
Parser::MATHML_NAMESPACE => ['mi', 'mo', 'mn', 'ms', 'mtext', 'annotation-xml'],
Parser::SVG_NAMESPACE => ['foreignObject', 'desc', 'title'],
];
public function __construct(Document $dom, $formElement, bool $fragmentCase = false, $fragmentContext = null, OpenElementsStack $stack, Stack $templateInsertionModes, Tokenizer $tokenizer, ParseError $errorHandler, Data $data) {
public function __construct(Document $dom, $formElement, bool $fragmentCase = false, $fragmentContext = null, OpenElementsStack $stack, TemplateInsertionModesStack $templateInsertionModes, Tokenizer $tokenizer, ParseError $errorHandler, Data $data) {
// If the form element isn't an instance of DOMElement that has a node name of
// "form" or null then there's a problem.
if (!is_null($formElement) && !($formElement instanceof \DOMElement && $formElement->nodeName === 'form')) {
@ -221,7 +236,7 @@ class TreeBuilder {
$this->errorHandler = $errorHandler;
// Initialize the list of active formatting elements.
$this->activeFormattingElementsList = new ActiveFormattingElementsList($stack);
$this->activeFormattingElementsList = new ActiveFormattingElementsList($this, $stack);
$this->insertionMode = self::INITIAL_MODE;
$this->quirksMode = self::QUIRKS_MODE_OFF;
@ -238,6 +253,7 @@ class TreeBuilder {
assert($iterations++ < 50, new LoopException("Probable infinite loop detected in HTML content handling"));
$adjustedCurrentNode = $this->stack->adjustedCurrentNode;
$adjustedCurrentNodeName = $this->stack->adjustedCurrentNodeName;
assert(!$adjustedCurrentNode || $adjustedCurrentNodeName, new \Exception("The adjusted current node must have a name if not null"));
$adjustedCurrentNodeNamespace = $this->stack->adjustedCurrentNodeNamespace;
# 13.2.6 Tree construction
@ -246,7 +262,7 @@ class TreeBuilder {
# appropriate steps from the following list, known as the tree construction dispatcher:
#
# If the stack of open elements is empty
if ($this->stack->length === 0 ||
if (count($this->stack) === 0 ||
# If the adjusted current node is an element in the HTML namespace
// PHP's DOM returns null when the namespace isn't specified... eg. HTML.
is_null($adjustedCurrentNodeNamespace) || (
@ -747,7 +763,7 @@ class TreeBuilder {
elseif ($token->name === 'template') {
# If there is no template element on the stack of open elements, then this is a
# parse error; ignore the token.
if ($this->stack->search('template') === -1) {
if ($this->stack->find('template') === -1) {
$this->error(ParseError::UNEXPECTED_END_TAG);
}
# Otherwise, run these steps:
@ -916,7 +932,7 @@ class TreeBuilder {
$this->parseTokenInHTMLContent($token, self::IN_HEAD_MODE);
# Remove the node pointed to by the head element pointer from the stack of open
# elements. (It might not be the current node at this point.)
$key = $this->stack->search($this->headElement);
$key = $this->stack->findSame($this->headElement);
if ($key !== -1) {
unset($this->stack[$key]);
}
@ -1015,7 +1031,7 @@ class TreeBuilder {
$this->error(ParseError::UNEXPECTED_START_TAG, 'html');
# If there is a template element on the stack of open elements, then ignore the
# token.
if ($this->stack->search('template') === -1) {
if ($this->stack->find('template') === -1) {
# Otherwise, for each attribute on the token, check to see if the attribute is
# already present on the top element of the stack of open elements. If it is
# not, add the attribute and its corresponding value to that element.
@ -1040,7 +1056,7 @@ class TreeBuilder {
# If the second element on the stack of open elements is not a body element, if
# the stack of open elements has only one node on it, or if there is a template
# element on the stack of open elements, then ignore the token. (fragment case)
if (!($this->stack[1]->tagName !== 'body' || $this->stack->length === 1 || $this->stack->search('template') !== -1)) {
if (!($this->stack[1]->tagName !== 'body' || count($this->stack) === 1 || $this->stack->find('template') !== -1)) {
# Otherwise, set the frameset-ok flag to "not ok"; then, for each attribute on
# the token, check to see if the attribute is already present on the body
# element (the second element) on the stack of open elements, and if it is not,
@ -1064,7 +1080,7 @@ class TreeBuilder {
# element on the stack of open elements is not a body element, then ignore the
# token. (fragment case)
# If the frameset-ok flag is set to "not ok", ignore the token.
if (!($this->stack->length === 1 || $this->stack[1]->tagName !== 'body' || $this->framesetOk === false)) {
if (!(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body' || $this->framesetOk === false)) {
# Otherwise, run the following steps:
#
# 1. Remove the second element on the stack of open elements from its parent
@ -1075,7 +1091,7 @@ class TreeBuilder {
}
# 2. Pop all the nodes from the bottom of the stack of open elements, from the
# current node up to, but not including, the root html element.
for ($i = $this->stack->length - 1; $i > 0; $i--) {
for ($i = count($this->stack) - 1; $i > 0; $i--) {
$this->stack->pop();
}
# 3. Insert an HTML element for the token.
@ -1094,7 +1110,6 @@ class TreeBuilder {
if ($this->stack->hasElementInButtonScope('p')) {
$this->closePElement();
}
# Insert an HTML element for the token.
$this->insertStartTagToken($token);
}
@ -1111,7 +1126,7 @@ class TreeBuilder {
# off the stack of open elements.
$currentNodeName = $this->stack->currentNodeName;
$currentNodeNamespace = $this->stack->currentNodeNamespace;
if ($currentNodeNamespace === '' && ($currentNodeName === 'h1' || $currentNodeName === 'h2' || $currentNodeName === 'h3' || $currentNodeName === 'h4' || $currentNodeName === 'h5' || $currentNodeName === 'h6')) {
if ($currentNodeNamespace === null && ($currentNodeName === 'h1' || $currentNodeName === 'h2' || $currentNodeName === 'h3' || $currentNodeName === 'h4' || $currentNodeName === 'h5' || $currentNodeName === 'h6')) {
$this->error(ParseError::UNEXPECTED_START_TAG, $token->name);
$this->stack->pop();
}
@ -1146,7 +1161,6 @@ class TreeBuilder {
$nextToken->data = substr($nextToken->data, 1);
}
}
// Process the next token
$token = $nextToken;
goto ProcessToken;
@ -1155,7 +1169,7 @@ class TreeBuilder {
elseif ($token->name === 'form') {
# If the form element pointer is not null, and there is no template element on
# the stack of open elements, then this is a parse error; ignore the token.
$templateInStack = ($this->stack->search('template') !== -1);
$templateInStack = ($this->stack->find('template') !== -1);
if (!is_null($this->formElement) && !$templateInStack) {
$this->error(ParseError::UNEXPECTED_START_TAG, $token->name);
}
@ -1183,13 +1197,13 @@ class TreeBuilder {
# 2. Initialize node to be the current node (the bottommost node of the stack).
# 3. Loop: If node is an li element, then run these substeps:
for ($i = $this->stack->length - 1; $i >= 0; $i--) {
for ($i = count($this->stack) - 1; $i >= 0; $i--) {
$node = $this->stack[$i];
$nodeName = $node->nodeName;
if ($nodeName === 'li') {
# 1. Generate implied end tags, except for li elements.
$this->stack->generateImpliedEndTags(["li"]);
$this->stack->generateImpliedEndTags("li");
# 2. If the current node is not an li element, then this is a parse error.
if ($this->stack->currentNodeName !== 'li') {
@ -1201,13 +1215,13 @@ class TreeBuilder {
$this->stack->popUntil('li');
# 4. Jump to the step labeled Done below.
return true;
break;
}
# 4. If node is in the special category, but is not an address, div, or p
# element, then jump to the step labeled Done below.
if ($nodeName !== 'address' && $nodeName !== 'div' && $nodeName !== 'p' && $this->isElementSpecial($node)) {
return true;
break;
}
# 5. Otherwise, set node to the previous entry in the stack of open elements and
@ -1230,7 +1244,7 @@ class TreeBuilder {
$this->framesetOk = false;
# 2. Initialize node to be the current node (the bottommost node of the stack).
for ($i = $this->stack->length - 1; $i >= 0; $i--) {
for ($i = count($this->stack) - 1; $i >= 0; $i--) {
$node = $this->stack[$i];
$nodeName = $node->nodeName;
@ -1240,7 +1254,7 @@ class TreeBuilder {
# 4. If node is a dt element, then run these substeps:
if ($nodeName === 'dd' || $nodeName === 'dt') {
# 1. Generate implied end tags, except for dd or dt elements.
$this->stack->generateImpliedEndTags(['dd', 'dt']);
$this->stack->generateImpliedEndTags('dd', 'dt');
# 2. If the current node is not a dd or dt element, then this is a parse error.
if ($this->stack->currentNodeName !== $nodeName) {
@ -1249,16 +1263,16 @@ class TreeBuilder {
# 3. Pop elements from the stack of open elements until a dd or dt element has been
# popped from the stack.
$this->stack->popUntil(['dd', 'dt']);
$this->stack->popUntil('dd', 'dt');
# 4. Jump to the step labeled Done below.
return true;
break;
}
# 5. If node is in the special category, but is not an address, div, or p
# element, then jump to the step labeled Done below.
if ($nodeName !== 'address' && $nodeName !== 'div' && $nodeName !== 'p' && $this->isElementSpecial($node)) {
return true;
break;
}
# 6. Otherwise, set node to the previous entry in the stack of open elements and
@ -1342,7 +1356,7 @@ class TreeBuilder {
# If the stack of open elements does not have a body element in scope, this is a
# parse error; ignore the token.
if (!$this->stack->hasElementInScope('body')) {
$this->error(ParseError::UNEXPECTED_END_TAG, 'body');
$this->error(ParseError::UNEXPECTED_END_TAG);
}
# Otherwise, if there is a node in the stack of open elements that is not either
# a dd element, a dt element, an li element, an optgroup element, an option
@ -1351,21 +1365,11 @@ class TreeBuilder {
# element, a tr element, the body element, or the html element, then this is a
# parse error.
else {
if ($this->stack->search(function($node) {
$n = $node->nodeName;
if ($n !== 'dd' && $n !== 'dt' && $n !== 'li' && $n !== 'optgroup' && $n !== 'option' && $n !== 'p' && $n !== 'rb' && $n !== 'rp' && $n !== 'rt' && $n !== 'rtc' && $n !== 'tbody' && $n !== 'td' && $n !== 'tfoot' && $n !== 'th' && $n !== 'thead' && $n !== 'tr' && $n !== 'body' && $n !== 'html') {
return true;
}
return false;
}) !== -1) {
$this->error(ParseError::UNEXPECTED_END_TAG, 'body');
return true;
if ($this->stack->findNot('dd', 'dt', 'li', 'optgroup', 'option', 'p', 'rb', 'rp', 'rt', 'rtc', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'body', 'html') > -1) {
$this->error(ParseError::UNEXPECTED_END_TAG);
}
# Switch the insertion mode to "after body".
$this->insertionMode = self::AFTER_BODY_MODE;
// The only thing different between body and html here is that when processing
// an html end tag the token is reprocessed.
if ($token->name === 'html') {
@ -1405,7 +1409,7 @@ class TreeBuilder {
elseif ($token->name === 'form') {
# If there is no template element on the stack of open elements, then run these
# substeps:
if ($this->stack->search('template') === -1) {
if ($this->stack->find('template') === -1) {
# 1. Let node be the element that the form element pointer is set to, or null if it
# is not set to an element.
$node = $this->formElement;
@ -1424,7 +1428,10 @@ class TreeBuilder {
$this->error(ParseError::UNEXPECTED_END_TAG, $token->name);
}
# 6. Remove node from the stack of open elements
$this->stack->remove($node);
$key = $this->stack->findSame($node);
if ($key > -1) {
unset($this->stack[$key]);
}
}
# If there is a template element on the stack of open elements, then run these
# substeps instead:
@ -1452,7 +1459,7 @@ class TreeBuilder {
# An end-of-file token
elseif ($token instanceof EOFToken) {
# If the stack of template insertion modes is not empty, then process the token using the rules for the "in template" insertion mode.
if ($this->templateInsertionModes->length !== 0) {
if (count($this->templateInsertionModes) !== 0) {
$insertionMode = self::IN_TEMPLATE_MODE;
goto ProcessToken;
}
@ -1463,16 +1470,8 @@ class TreeBuilder {
# a p element, an rb element, an rp element, an rt element, an rtc element, a
# tbody element, a td element, a tfoot element, a th element, a thead element, a
# tr element, the body element, or the html element, then this is a parse error.
if ($this->stack->search(function($node) {
$n = $node->nodeName;
if ($n !== 'dd' && $n !== 'dt' && $n !== 'li' && $n !== 'optgroup' && $n !== 'option' && $n !== 'p' && $n !== 'rb' && $n !== 'rp' && $n !== 'rt' && $n !== 'rtc' && $n !== 'tbody' && $n !== 'td' && $n !== 'tfoot' && $n !== 'th' && $n !== 'thead' && $n !== 'tr' && $n !== 'body' && $n !== 'html') {
return true;
}
return false;
}) !== -1) {
if ($this->stack->findNot('dd', 'dt', 'li', 'optgroup', 'option', 'p', 'rb', 'rp', 'rt', 'rtc', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'body', 'html') > -1) {
$this->error(ParseError::UNEXPECTED_END_TAG, 'body');
return true;
}
# 2. Stop parsing.
@ -1675,10 +1674,10 @@ class TreeBuilder {
# 3. Loop: If node's tag name, converted to ASCII lowercase, is the same as the
# tag name of the token, pop elements from the stack of open elements until node
# has been popped from the stack, and then abort these steps.
$count = $this->stack->length - 1;
$count = count($this->stack) - 1;
while (true) {
if (strtolower($nodeName) === $token->name) {
$this->stack->popUntil($node);
$this->stack->popUntilSame($node);
break;
}
@ -1728,12 +1727,12 @@ class TreeBuilder {
#
# 1. Let last template be the last template element in the stack of open
# elements, if any.
$lastTemplateKey = $this->stack->search('template');
$lastTemplateKey = $this->stack->find('template');
$lastTemplate = ($lastTemplateKey !== -1 ) ? $this->stack[$lastTemplateKey] : null;
# 2. Let last table be the last table element in the stack of open elements, if
# any.
$lastTableKey = $this->stack->search('table');
$lastTableKey = $this->stack->find('table');
$lastTable = ($lastTableKey !== -1 ) ? $this->stack[$lastTableKey] : null;
# 3. If there is a last template and either there is no last table, or there is
@ -2035,7 +2034,7 @@ class TreeBuilder {
$node = $this->stack->currentNode;
$nodeName = $this->stack->currentNodeName;
// Keeping up with the position, too.
$position = $this->stack->length - 1;
$position = count($this->stack) - 1;
# 3. Loop: If node is the first node in the stack of open elements, then set
# last to true, and, if the parser was originally created as part of the HTML
@ -2176,11 +2175,11 @@ class TreeBuilder {
# must run the following steps:
# 1. Generate implied end tags, except for p elements.
$this->stack->generateImpliedEndTags(["p"]);
$this->stack->generateImpliedEndTags("p");
# 2. If the current node is not a p element, then this is a parse error.
$currentNodeName = $this->stack->currentNodeName;
if ($currentNodeName !== 'p') {
$this->error(ParseError::UNEXPECTED_END_TAG, $currentNodeName);
$this->error(ParseError::UNEXPECTED_END_TAG);
}
# 3. Pop elements from the stack of open elements until a p element has been
# popped from the stack.
@ -2189,18 +2188,7 @@ class TreeBuilder {
protected function isElementSpecial(Element $element): bool {
$name = $element->nodeName;
$ns = $element->namespaceURI;
# The following elements have varying levels of special parsing rules: HTML’s
# address, applet, area, article, aside, base, basefont, bgsound, blockquote,
# body, br, button, caption, center, col, colgroup, dd, details, dir, div, dl,
# dt, embed, fieldset, figcaption, figure, footer, form, frame, frameset, h1,
# h2, h3, h4, h5, h6, head, header, hr, html, iframe, img, input, li, link,
# listing, main, marquee, meta, nav, noembed, noframes, noscript, object, ol, p,
# param, plaintext, pre, script, section, select, source, style, summary, table,
# tbody, td, template, textarea, tfoot, th, thead, title, tr, track, ul, wbr,
# xmp; MathML mi, MathML mo, MathML mn, MathML ms, MathML mtext, and MathML
# annotation-xml; and SVG foreignObject, SVG desc, and SVG title.
return (($ns === '' && ($name === 'address' || $name === 'applet' || $name === 'area' || $name === 'article' || $name === 'aside' || $name === 'base' || $name === 'basefont' || $name === 'bgsound' || $name === 'blockquote' || $name === 'body' || $name === 'br' || $name === 'button' || $name === 'caption' || $name === 'center' || $name === 'col' || $name === 'colgroup' || $name === 'dd' || $name === 'details' || $name === 'dir' || $name === 'div' || $name === 'dl' || $name === 'dt' || $name === 'embed' || $name === 'fieldset' || $name === 'figcaption' || $name === 'figure' || $name === 'footer' || $name === 'form' || $name === 'frame' || $name === 'frameset' || $name === 'h1' || $name === 'h2' || $name === 'h3' || $name === 'h4' || $name === 'h5' || $name === 'h6' || $name === 'head' || $name === 'header' || $name === 'hr' || $name === 'html' || $name === 'iframe' || $name === 'img' || $name === 'input' || $name === 'li' || $name === 'link' || $name === 'listing' || $name === 'main' || $name === 'marquee' || $name === 'meta' || $name === 'nav' || $name === 'noembed' || $name === 'noframes' || $name === 'noscript' || $name === 'object' || $name === 'ol' || $name === 'p' || $name === 'param' || $name === 'plaintext' || $name === 'pre' || $name === 'script' || $name === 'section' || $name === 'select' || $name === 'source' || $name === 'style' || $name === 'summary' || $name === 'table' || $name === 'tbody' || $name === 'td' || $name === 'template' || $name === 'textarea' || $name === 'tfoot' || $name === 'th' || $name === 'thead' || $name === 'title' || $name === 'tr' || $name === 'track' || $name === 'ul' || $name === 'wbr' || $name === 'xmp')) || ($ns === Parser::MATHML_NAMESPACE && ($name === 'mi' || $name === 'mo' || $name === 'mn' || $name === 'ms' || $name === 'mtext' || $name === 'annotation-xml')) || ($ns === Parser::SVG_NAMESPACE && ($name === 'foreignObject' || $name === 'desc' || $name === 'title')));
$ns = $element->namespaceURI ?? Parser::HTML_NAMESPACE;
return in_array($name, self::SPECIAL_ELEMENTS[$ns] ?? []);
}
}

Loading…
Cancel
Save