diff --git a/lib/ActiveFormattingElementsList.php b/lib/ActiveFormattingElementsList.php index ed19044..18be4bf 100644 --- a/lib/ActiveFormattingElementsList.php +++ b/lib/ActiveFormattingElementsList.php @@ -17,8 +17,10 @@ namespace dW\HTML5; class ActiveFormattingElementsList extends Stack { protected $_storage = []; protected $stack; + protected $tree; - public function __construct(OpenElementsStack $stack) { + public function __construct(TreeBuilder $tree, OpenElementsStack $stack) { + $this->tree = $tree; $this->stack = $stack; } @@ -143,7 +145,7 @@ class ActiveFormattingElementsList extends Stack { # 8. Create: Insert an HTML element for the token for which the element entry # was created, to obtain new element. create: - $element = TreeBuilder::insertStartTagToken($entry['token']); + $element = $this->tree->insertStartTagToken($entry['token']); # 9. Replace the entry for entry in the list with an entry for new element. $this->_storage[key($this->_storage)]['element'] = $element; diff --git a/lib/OpenElementsStack.php b/lib/OpenElementsStack.php index 690ef13..617d618 100644 --- a/lib/OpenElementsStack.php +++ b/lib/OpenElementsStack.php @@ -2,7 +2,91 @@ declare(strict_types=1); namespace dW\HTML5; -class OpenElementsStack extends Stack { +class OpenElementsStack extends \splStack { + protected const IMPLIED_END_TAGS = [ + 'dd' => true, + 'dt' => true, + 'li' => true, + 'optgroup' => true, + 'option' => true, + 'p' => true, + 'rb' => true, + 'rp' => true, + 'rt' => true, + 'rtc' => true, + ]; + protected const IMPLIED_END_TAGS_THOROUGH = [ + 'caption' => true, + 'colgroup' => true, + 'dd' => true, + 'dt' => true, + 'li' => true, + 'optgroup' => true, + 'option' => true, + 'p' => true, + 'rb' => true, + 'rp' => true, + 'rt' => true, + 'rtc' => true, + 'tbody' => true, + 'td' => true, + 'tfoot' => true, + 'th' => true, + 'thead' => true, + 'tr' => true, + ]; + protected const GENERAL_SCOPE = [ + Parser::HTML_NAMESPACE => [ + 'applet', + 'caption', + 'html', + 'table', + 'td', + 'th', + 'marquee', + 'object', + 'template' + ], + Parser::MATHML_NAMESPACE => [ + 'mi', + 'mo', + 'mn', + 'ms', + 'mtext', + 'annotation-xml' + ], + Parser::SVG_NAMESPACE => [ + 'foreignObject', + 'desc', + 'title' + ], + ]; + protected const LIST_ITEM_SCOPE = [ + // everything in general scope, and these in the HTML namespace + 'ol', + 'ul', + ]; + protected const BUTTON_SCOPE = [ + // everything in general scope, and these in the HTML namespace + 'button', + ]; + protected const TABLE_SCOPE = [ + Parser::HTML_NAMESPACE => [ + 'html', + 'table', + 'template', + ], + ]; + protected const SELECT_SCOPE = [ + // all elements EXCEPT these + Parser::HTML_NAMESPACE => [ + 'optgroup', + 'option', + ], + ]; + + + protected $fragmentCase; protected $fragmentContext; @@ -18,88 +102,68 @@ class OpenElementsStack extends Stack { $this->fragmentContext = $fragmentContext; } - public function popUntil($target) { - if ($target instanceof Element) { - do { - $node = $this->pop; - } while (!$node->isSameNode($target)); - } elseif (is_string($target)) { - do { - $poppedNodeName = $this->pop()->nodeName; - } while ($poppedNodeName !== $target); - } elseif (is_array($target)) { - do { - $poppedNodeName = $this->pop()->nodeName; - } while (!in_array($poppedNodeName, $target)); - } else { - throw new Exception(Exception::STACK_ELEMENT_STRING_ARRAY_EXPECTED); - } + public function popUntil(string ...$target): void { + do { + $node = $this->pop(); + } while (!in_array($node->nodeName, $target)); } - public function search($needle): int { - if (!$needle) { - return -1; - } + public function popUntilSame(Element $target): void { + do { + $node = $this->pop(); + } while (!$node->isSameNode($target)); + } - if ($needle instanceof \DOMElement) { - foreach (array_reverse($this->_storage) as $key => $value) { - if ($value->isSameNode($needle)) { - return $key; - } - } - } elseif (is_string($needle)) { - foreach (array_reverse($this->_storage) as $key => $value) { - if ($value->nodeName === $needle) { - return $key; - } - } - } elseif ($needle instanceof \Closure) { - foreach (array_reverse($this->_storage) as $key => $value) { - if ($needle($value) === true) { - return $key; - } + public function find(string ...$name): int { + foreach ($this as $k => $node) { + if (in_array($node->nodeName, $name)) { + return $k; } } - return -1; } - // Remove an arbitrary element from the array. - public function remove($target) { - $key = $this->search($target); - if ($key === -1) { - return; - } elseif ($key === count($this->_storage) - 1) { - $this->pop(); - return; + public function findNot(string ...$name): int { + foreach ($this as $k => $node) { + if (!in_array($node->nodeName, $name)) { + return $k; + } } - - unset($this->_storage[$key]); - $this->_storage = array_values($this->_storage); + return -1; } - public function generateImpliedEndTags(array $exclude = []) { - $tags = ['caption', 'colgroup', 'dd', 'dt', 'li', 'optgroup', 'option', 'p', 'rb', 'rp', 'rt', 'rtc', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']; - - if (count($exclude) > 0) { - $modified = false; - foreach ($exclude as $e) { - $key = array_search($e, $tags); - if ($key !== false) { - unset($tags[$key]); - $modified = true; - } + public function findSame(\DOMElement $node): int { + foreach ($this as $k => $node) { + if ($node->isSameNode($node)) { + return $k; } + } + return -1; + } - if ($modified) { - $tags = array_values($tags); - } + public function generateImpliedEndTags(string ...$exclude): void { + # When the steps below require the UA to generate implied end tags, + # then, while the current node is {elided list of element names}, + # the UA must pop the current node off the stack of open elements. + # + # If a step requires the UA to generate implied end tags but lists + # an element to exclude from the process, then the UA must perform + # the above steps as if that element was not in the above list. + $map = self::IMPLIED_END_TAGS; + foreach($exclude as $name) { + $map[$name] = false; + } + while (!$this->isEmpty() && ($map[$this->top()->nodeName] ?? false)) { + $this->pop(); } + } - $currentNodeName = end($this->_storage)->nodeName; - while (in_array($currentNodeName, $tags)) { + public function generateImpliedEndTagsThoroughly(): void { + # When the steps below require the UA to generate all implied end tags + # thoroughly, then, while the current node is {elided list of element names}, + # the UA must pop the current node off the stack of open elements. + while (!$this->isEmpty() && (self::IMPLIED_END_TAGS_THOROUGH[$this->top()->nodeName] ?? false)) { $this->pop(); - $currentNodeName = end($this->_storage)->nodeName; } } @@ -108,296 +172,102 @@ class OpenElementsStack extends Stack { # it has that element in the specific scope consisting of the following element # types: # - # applet - # caption - # html - # table - # td - # th - # marquee - # object - # template - # MathML mi - # MathML mo - # MathML mn - # MathML ms - # MathML mtext - # MathML annotation-xml - # SVG foreignObject - # SVG desc - # SVG title - - $list = [ - Parser::HTML_NAMESPACE => [ - 'applet', - 'caption', - 'html', - 'table', - 'td', - 'th', - 'marquee', - 'object', - 'template' - ], - - Parser::MATHML_NAMESPACE => [ - 'mi', - 'mo', - 'mn', - 'ms', - 'mtext', - 'annotation-xml' - ], - - Parser::SVG_NAMESPACE => [ - 'foreignObject', - 'desc', - 'title' - ] - ]; - - return $this->hasElementInScopeHandler($target, $list); + # {elided} + return $this->hasElementInScopeHandler($target, self::GENERAL_SCOPE); } public function hasElementInListItemScope($target): bool { - # The stack of open elements is said to have a particular element in list item scope when it has that element in the specific scope consisting of the following element types: - # - # All the element types listed above for the has an element in scope algorithm. - # ol in the HTML namespace - # ul in the HTML namespace - - $list = [ - Parser::HTML_NAMESPACE => [ - 'applet', - 'caption', - 'html', - 'table', - 'td', - 'th', - 'marquee', - 'object', - 'template', - 'ol', - 'ul' - ], - - Parser::MATHML_NAMESPACE => [ - 'mi', - 'mo', - 'mn', - 'ms', - 'mtext', - 'annotation-xml' - ], - - Parser::SVG_NAMESPACE => [ - 'foreignObject', - 'desc', - 'title' - ] - ]; - - return $this->hasElementInScopeHandler($target, $list); + $scope = self::GENERAL_SCOPE; + $scope[Parser::HTML_NAMESPACE] = array_merge($scope[Parser::HTML_NAMESPACE], self::LIST_ITEM_SCOPE); + return $this->hasElementInScopeHandler($target, $scope); } public function hasElementInButtonScope($target): bool { - # The stack of open elements is said to have a particular element in button - # scope when it has that element in the specific scope consisting of the - # following element types: - # - # All the element types listed above for the has an element in scope algorithm. - # button in the HTML namespace - - $list = [ - Parser::HTML_NAMESPACE => [ - 'applet', - 'caption', - 'html', - 'table', - 'td', - 'th', - 'marquee', - 'object', - 'template', - 'button' - ], - - Parser::MATHML_NAMESPACE => [ - 'mi', - 'mo', - 'mn', - 'ms', - 'mtext', - 'annotation-xml' - ], - - Parser::SVG_NAMESPACE => [ - 'foreignObject', - 'desc', - 'title' - ] - ]; - - return $this->hasElementInScopeHandler($target, $list); + $scope = self::GENERAL_SCOPE; + $scope[Parser::HTML_NAMESPACE] = array_merge($scope[Parser::HTML_NAMESPACE], self::BUTTON_SCOPE); + return $this->hasElementInScopeHandler($target, $scope); } public function hasElementInTableScope($target): bool { - # The stack of open elements is said to have a particular element in table scope - # when it has that element in the specific scope consisting of the following - # element types: - # - # All the element types listed above for the has an element in scope algorithm. - # html in the HTML namespace - # table in the HTML namespace - # template in the HTML namespace - - // Not sure what to do here. I am going to assume the elements without a - // namespace in the element types listed above are meant for the HTML namespace. - // If so then these listed here are redundant. My interpretation therefore has - // this being an alias for hasElementInScope. - - return $this->hasElementInScope($target); + return $this->hasElementInScopeHandler($target, self::TABLE_SCOPE); } public function hasElementInSelectScope(string $target): bool { - # The stack of open elements is said to have a particular element in select - # scope when it has that element in the specific scope consisting of all element - # types except the following: + # The stack of open elements is said to have a particular element + # in select scope when it has that element in the specific scope + # consisting of all element types EXCEPT the following: # - # All the element types listed above for the has an element in scope algorithm. # optgroup in the HTML namespace # option in the HTML namespace - - $list = [ - Parser::HTML_NAMESPACE => [ - 'applet', - 'caption', - 'html', - 'table', - 'td', - 'th', - 'marquee', - 'object', - 'template', - 'button', - 'optgroup', - 'option' - ], - - Parser::MATHML_NAMESPACE => [ - 'mi', - 'mo', - 'mn', - 'ms', - 'mtext', - 'annotation-xml' - ], - - Parser::SVG_NAMESPACE => [ - 'foreignObject', - 'desc', - 'title' - ] - ]; - - return $this->hasElementInScopeHandler($target, $list); + return $this->hasElementInScopeHandler($target, self::SELECT_SCOPE, false); } - - protected function hasElementInScopeHandler($target, array $list): bool { - # 1. Initialize node to be the current node (the bottommost node of the stack). - // Handled by loop. - foreach (array_reverse($this->_storage) as $node) { - # 2. If node is the target node, terminate in a match state. - if ($target instanceof \DOMElement) { + protected function hasElementInScopeHandler($target, array $list, $matchType = true): bool { + assert(is_string($target) || $target instanceof \DOMElement, new \Exception("Invalid input type")); + # The stack of open elements is said to have an element target node + # in a specific scope consisting of a list of element types list + # when the following algorithm terminates in a match state: + if ($target instanceof \DOMElement) { + # Initialize node to be the current node (the bottommost node of the stack). + foreach ($this as $node) { + # If node is the target node, terminate in a match state. if ($node->isSameNode($target)) { return true; } - } elseif (is_string($target)) { + # Otherwise, if node is one of the element types in list, terminate in a failure state. + $ns = $node->namespaceURI ?? Parser::HTML_NAMESPACE; + if (in_array($node->nodeName, $list[$ns] ?? []) === $matchType) { + return false; + } + # Otherwise, set node to the previous entry in the stack of + # open elements and return to step 2. (This will never fail, + # since the loop will always terminate in the previous step + # if the top of the stack — an html element — is reached.) + } + } else { + # Initialize node to be the current node (the bottommost node of the stack). + foreach ($this as $node) { + # If node is the target node, terminate in a match state. if ($node->nodeName === $target) { return true; } - } - - # 3. Otherwise, if node is one of the element types in list, terminate in a - # failure state. - else { - foreach ($list as $namespace => $subList) { - if ($namespace === Parser::HTML_NAMESPACE) { - $namespace = ''; - } - - if ($node->namespaceURI !== $namespace) { - continue; - } - - foreach ($subList as $name) { - if ($node->nodeName === $name) { - return false; - } - } + # Otherwise, if node is one of the element types in list, terminate in a failure state. + $ns = $node->namespaceURI ?? Parser::HTML_NAMESPACE; + if (in_array($node->nodeName, $list[$ns] ?? []) === $matchType) { + return false; } + # Otherwise, set node to the previous entry in the stack of + # open elements and return to step 2. (This will never fail, + # since the loop will always terminate in the previous step + # if the top of the stack — an html element — is reached.) } - - # Otherwise, set node to the previous entry in the stack of open elements and - # return to step 2. (This will never fail, since the loop will always terminate - # in the previous step if the top of the stack — an html element — is reached.) - // Handled by loop. } - - return false; } - public function __get($property) { - $value = parent::__get($property); - if (!is_null($value)) { - return $value; - } - switch ($property) { case 'adjustedCurrentNode': # The adjusted current node is the context element if the parser was created by # the HTML fragment parsing algorithm and the stack of open elements has only one # element in it (fragment case); otherwise, the adjusted current node is the # current node. - return ($this->fragmentCase && $this->length === 1) ? $this->fragmentContext : $this->currentNode; - break; + return ($this->fragmentCase && count($this) === 1) ? $this->fragmentContext : $this->__get('currentNode'); case 'adjustedCurrentNodeName': - $adjustedCurrentNode = $this->adjustedCurrentNode; + $adjustedCurrentNode = $this->__get('adjustedCurrentNode'); return (!is_null($adjustedCurrentNode)) ? $adjustedCurrentNode->nodeName : null; - break; case 'adjustedCurrentNodeNamespace': - $adjustedCurrentNode = $this->adjustedCurrentNode; + $adjustedCurrentNode = $this->__get('adjustedCurrentNode'); return (!is_null($adjustedCurrentNode)) ? $adjustedCurrentNode->namespaceURI: null; - break; case 'currentNode': - $currentNode = end($this->_storage); - return ($currentNode) ? $currentNode : null; - break; + return $this->isEmpty() ? null : $this->top(); case 'currentNodeName': - $currentNode = $this->currentNode; + $currentNode = $this->__get('currentNode'); return ($currentNode && $currentNode->nodeType) ? $currentNode->nodeName : null; - break; case 'currentNodeNamespace': - $currentNode = $this->currentNode; + $currentNode = $this->__get('currentNode'); return (!is_null($currentNode)) ? $currentNode->namespaceURI: null; - break; - default: return null; - } - } - - // Used when listing expected elements when returning parse errors - public function __toString(): string { - if (count($this->_storage) > 1) { - // Don't output the name of the root element. - for ($i = 1, $temp = []; $i < count($this->_storage) - 1; $i++) { - $temp[] = $this->_storage[$i]->nodeName; - } - - return implode(', ', array_unique($temp)); - } else { - return ''; + default: + return null; } } } diff --git a/lib/TemplateInsertionModesStack.php b/lib/TemplateInsertionModesStack.php index 255939d..6ead188 100644 --- a/lib/TemplateInsertionModesStack.php +++ b/lib/TemplateInsertionModesStack.php @@ -2,19 +2,13 @@ declare(strict_types=1); namespace dW\HTML5; -class TemplateInsertionModesStack extends Stack { +class TemplateInsertionModesStack extends \SplStack { public function __get($property) { - $value = parent::__get($property); - if (!is_null($value)) { - return $value; - } - switch ($property) { - case 'currentMode': return - $currentMode = end($this->_storage); - return ($currentMode) ? $currentMode : null; - break; - default: return null; + case 'currentMode': + return $this->isEmpty() ? null : $this->top(); + default: + return null; } } } diff --git a/lib/TreeBuilder.php b/lib/TreeBuilder.php index 791f9b7..85ed3df 100644 --- a/lib/TreeBuilder.php +++ b/lib/TreeBuilder.php @@ -193,8 +193,23 @@ class TreeBuilder { 'ychannelselector' => 'yChannelSelector', 'zoomandpan' => 'zoomAndPan', ]; + # The following elements have varying levels of special parsing rules: HTML’s + # address, applet, area, article, aside, base, basefont, bgsound, blockquote, + # body, br, button, caption, center, col, colgroup, dd, details, dir, div, dl, + # dt, embed, fieldset, figcaption, figure, footer, form, frame, frameset, h1, + # h2, h3, h4, h5, h6, head, header, hr, html, iframe, img, input, li, link, + # listing, main, marquee, meta, nav, noembed, noframes, noscript, object, ol, p, + # param, plaintext, pre, script, section, select, source, style, summary, table, + # tbody, td, template, textarea, tfoot, th, thead, title, tr, track, ul, wbr, + # xmp; MathML mi, MathML mo, MathML mn, MathML ms, MathML mtext, and MathML + # annotation-xml; and SVG foreignObject, SVG desc, and SVG title. + protected const SPECIAL_ELEMENTS = [ + Parser::HTML_NAMESPACE => ['address', 'applet', 'area', 'article', 'aside', 'base', 'basefont', 'bgsound', 'blockquote', 'body', 'br', 'button', 'caption', 'center', 'col', 'colgroup', 'dd', 'details', 'dir', 'div', 'dl', 'dt', 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'iframe', 'img', 'input', 'li', 'link', 'listing', 'main', 'marquee', 'meta', 'nav', 'noembed', 'noframes', 'noscript', 'object', 'ol', 'p', 'param', 'plaintext', 'pre', 'script', 'section', 'select', 'source', 'style', 'summary', 'table', 'tbody', 'td', 'template', 'textarea', 'tfoot', 'th', 'thead', 'title', 'tr', 'track', 'ul', 'wbr', 'xmp'], + Parser::MATHML_NAMESPACE => ['mi', 'mo', 'mn', 'ms', 'mtext', 'annotation-xml'], + Parser::SVG_NAMESPACE => ['foreignObject', 'desc', 'title'], + ]; - public function __construct(Document $dom, $formElement, bool $fragmentCase = false, $fragmentContext = null, OpenElementsStack $stack, Stack $templateInsertionModes, Tokenizer $tokenizer, ParseError $errorHandler, Data $data) { + public function __construct(Document $dom, $formElement, bool $fragmentCase = false, $fragmentContext = null, OpenElementsStack $stack, TemplateInsertionModesStack $templateInsertionModes, Tokenizer $tokenizer, ParseError $errorHandler, Data $data) { // If the form element isn't an instance of DOMElement that has a node name of // "form" or null then there's a problem. if (!is_null($formElement) && !($formElement instanceof \DOMElement && $formElement->nodeName === 'form')) { @@ -221,7 +236,7 @@ class TreeBuilder { $this->errorHandler = $errorHandler; // Initialize the list of active formatting elements. - $this->activeFormattingElementsList = new ActiveFormattingElementsList($stack); + $this->activeFormattingElementsList = new ActiveFormattingElementsList($this, $stack); $this->insertionMode = self::INITIAL_MODE; $this->quirksMode = self::QUIRKS_MODE_OFF; @@ -238,6 +253,7 @@ class TreeBuilder { assert($iterations++ < 50, new LoopException("Probable infinite loop detected in HTML content handling")); $adjustedCurrentNode = $this->stack->adjustedCurrentNode; $adjustedCurrentNodeName = $this->stack->adjustedCurrentNodeName; + assert(!$adjustedCurrentNode || $adjustedCurrentNodeName, new \Exception("The adjusted current node must have a name if not null")); $adjustedCurrentNodeNamespace = $this->stack->adjustedCurrentNodeNamespace; # 13.2.6 Tree construction @@ -246,7 +262,7 @@ class TreeBuilder { # appropriate steps from the following list, known as the tree construction dispatcher: # # If the stack of open elements is empty - if ($this->stack->length === 0 || + if (count($this->stack) === 0 || # If the adjusted current node is an element in the HTML namespace // PHP's DOM returns null when the namespace isn't specified... eg. HTML. is_null($adjustedCurrentNodeNamespace) || ( @@ -747,7 +763,7 @@ class TreeBuilder { elseif ($token->name === 'template') { # If there is no template element on the stack of open elements, then this is a # parse error; ignore the token. - if ($this->stack->search('template') === -1) { + if ($this->stack->find('template') === -1) { $this->error(ParseError::UNEXPECTED_END_TAG); } # Otherwise, run these steps: @@ -916,7 +932,7 @@ class TreeBuilder { $this->parseTokenInHTMLContent($token, self::IN_HEAD_MODE); # Remove the node pointed to by the head element pointer from the stack of open # elements. (It might not be the current node at this point.) - $key = $this->stack->search($this->headElement); + $key = $this->stack->findSame($this->headElement); if ($key !== -1) { unset($this->stack[$key]); } @@ -1015,7 +1031,7 @@ class TreeBuilder { $this->error(ParseError::UNEXPECTED_START_TAG, 'html'); # If there is a template element on the stack of open elements, then ignore the # token. - if ($this->stack->search('template') === -1) { + if ($this->stack->find('template') === -1) { # Otherwise, for each attribute on the token, check to see if the attribute is # already present on the top element of the stack of open elements. If it is # not, add the attribute and its corresponding value to that element. @@ -1040,7 +1056,7 @@ class TreeBuilder { # If the second element on the stack of open elements is not a body element, if # the stack of open elements has only one node on it, or if there is a template # element on the stack of open elements, then ignore the token. (fragment case) - if (!($this->stack[1]->tagName !== 'body' || $this->stack->length === 1 || $this->stack->search('template') !== -1)) { + if (!($this->stack[1]->tagName !== 'body' || count($this->stack) === 1 || $this->stack->find('template') !== -1)) { # Otherwise, set the frameset-ok flag to "not ok"; then, for each attribute on # the token, check to see if the attribute is already present on the body # element (the second element) on the stack of open elements, and if it is not, @@ -1064,7 +1080,7 @@ class TreeBuilder { # element on the stack of open elements is not a body element, then ignore the # token. (fragment case) # If the frameset-ok flag is set to "not ok", ignore the token. - if (!($this->stack->length === 1 || $this->stack[1]->tagName !== 'body' || $this->framesetOk === false)) { + if (!(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body' || $this->framesetOk === false)) { # Otherwise, run the following steps: # # 1. Remove the second element on the stack of open elements from its parent @@ -1075,7 +1091,7 @@ class TreeBuilder { } # 2. Pop all the nodes from the bottom of the stack of open elements, from the # current node up to, but not including, the root html element. - for ($i = $this->stack->length - 1; $i > 0; $i--) { + for ($i = count($this->stack) - 1; $i > 0; $i--) { $this->stack->pop(); } # 3. Insert an HTML element for the token. @@ -1094,7 +1110,6 @@ class TreeBuilder { if ($this->stack->hasElementInButtonScope('p')) { $this->closePElement(); } - # Insert an HTML element for the token. $this->insertStartTagToken($token); } @@ -1111,7 +1126,7 @@ class TreeBuilder { # off the stack of open elements. $currentNodeName = $this->stack->currentNodeName; $currentNodeNamespace = $this->stack->currentNodeNamespace; - if ($currentNodeNamespace === '' && ($currentNodeName === 'h1' || $currentNodeName === 'h2' || $currentNodeName === 'h3' || $currentNodeName === 'h4' || $currentNodeName === 'h5' || $currentNodeName === 'h6')) { + if ($currentNodeNamespace === null && ($currentNodeName === 'h1' || $currentNodeName === 'h2' || $currentNodeName === 'h3' || $currentNodeName === 'h4' || $currentNodeName === 'h5' || $currentNodeName === 'h6')) { $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); $this->stack->pop(); } @@ -1146,7 +1161,6 @@ class TreeBuilder { $nextToken->data = substr($nextToken->data, 1); } } - // Process the next token $token = $nextToken; goto ProcessToken; @@ -1155,7 +1169,7 @@ class TreeBuilder { elseif ($token->name === 'form') { # If the form element pointer is not null, and there is no template element on # the stack of open elements, then this is a parse error; ignore the token. - $templateInStack = ($this->stack->search('template') !== -1); + $templateInStack = ($this->stack->find('template') !== -1); if (!is_null($this->formElement) && !$templateInStack) { $this->error(ParseError::UNEXPECTED_START_TAG, $token->name); } @@ -1183,13 +1197,13 @@ class TreeBuilder { # 2. Initialize node to be the current node (the bottommost node of the stack). # 3. Loop: If node is an li element, then run these substeps: - for ($i = $this->stack->length - 1; $i >= 0; $i--) { + for ($i = count($this->stack) - 1; $i >= 0; $i--) { $node = $this->stack[$i]; $nodeName = $node->nodeName; if ($nodeName === 'li') { # 1. Generate implied end tags, except for li elements. - $this->stack->generateImpliedEndTags(["li"]); + $this->stack->generateImpliedEndTags("li"); # 2. If the current node is not an li element, then this is a parse error. if ($this->stack->currentNodeName !== 'li') { @@ -1201,13 +1215,13 @@ class TreeBuilder { $this->stack->popUntil('li'); # 4. Jump to the step labeled Done below. - return true; + break; } # 4. If node is in the special category, but is not an address, div, or p # element, then jump to the step labeled Done below. if ($nodeName !== 'address' && $nodeName !== 'div' && $nodeName !== 'p' && $this->isElementSpecial($node)) { - return true; + break; } # 5. Otherwise, set node to the previous entry in the stack of open elements and @@ -1230,7 +1244,7 @@ class TreeBuilder { $this->framesetOk = false; # 2. Initialize node to be the current node (the bottommost node of the stack). - for ($i = $this->stack->length - 1; $i >= 0; $i--) { + for ($i = count($this->stack) - 1; $i >= 0; $i--) { $node = $this->stack[$i]; $nodeName = $node->nodeName; @@ -1240,7 +1254,7 @@ class TreeBuilder { # 4. If node is a dt element, then run these substeps: if ($nodeName === 'dd' || $nodeName === 'dt') { # 1. Generate implied end tags, except for dd or dt elements. - $this->stack->generateImpliedEndTags(['dd', 'dt']); + $this->stack->generateImpliedEndTags('dd', 'dt'); # 2. If the current node is not a dd or dt element, then this is a parse error. if ($this->stack->currentNodeName !== $nodeName) { @@ -1249,16 +1263,16 @@ class TreeBuilder { # 3. Pop elements from the stack of open elements until a dd or dt element has been # popped from the stack. - $this->stack->popUntil(['dd', 'dt']); + $this->stack->popUntil('dd', 'dt'); # 4. Jump to the step labeled Done below. - return true; + break; } # 5. If node is in the special category, but is not an address, div, or p # element, then jump to the step labeled Done below. if ($nodeName !== 'address' && $nodeName !== 'div' && $nodeName !== 'p' && $this->isElementSpecial($node)) { - return true; + break; } # 6. Otherwise, set node to the previous entry in the stack of open elements and @@ -1342,7 +1356,7 @@ class TreeBuilder { # If the stack of open elements does not have a body element in scope, this is a # parse error; ignore the token. if (!$this->stack->hasElementInScope('body')) { - $this->error(ParseError::UNEXPECTED_END_TAG, 'body'); + $this->error(ParseError::UNEXPECTED_END_TAG); } # Otherwise, if there is a node in the stack of open elements that is not either # a dd element, a dt element, an li element, an optgroup element, an option @@ -1351,21 +1365,11 @@ class TreeBuilder { # element, a tr element, the body element, or the html element, then this is a # parse error. else { - if ($this->stack->search(function($node) { - $n = $node->nodeName; - if ($n !== 'dd' && $n !== 'dt' && $n !== 'li' && $n !== 'optgroup' && $n !== 'option' && $n !== 'p' && $n !== 'rb' && $n !== 'rp' && $n !== 'rt' && $n !== 'rtc' && $n !== 'tbody' && $n !== 'td' && $n !== 'tfoot' && $n !== 'th' && $n !== 'thead' && $n !== 'tr' && $n !== 'body' && $n !== 'html') { - return true; - } - - return false; - }) !== -1) { - $this->error(ParseError::UNEXPECTED_END_TAG, 'body'); - return true; + if ($this->stack->findNot('dd', 'dt', 'li', 'optgroup', 'option', 'p', 'rb', 'rp', 'rt', 'rtc', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'body', 'html') > -1) { + $this->error(ParseError::UNEXPECTED_END_TAG); } - # Switch the insertion mode to "after body". $this->insertionMode = self::AFTER_BODY_MODE; - // The only thing different between body and html here is that when processing // an html end tag the token is reprocessed. if ($token->name === 'html') { @@ -1405,7 +1409,7 @@ class TreeBuilder { elseif ($token->name === 'form') { # If there is no template element on the stack of open elements, then run these # substeps: - if ($this->stack->search('template') === -1) { + if ($this->stack->find('template') === -1) { # 1. Let node be the element that the form element pointer is set to, or null if it # is not set to an element. $node = $this->formElement; @@ -1424,7 +1428,10 @@ class TreeBuilder { $this->error(ParseError::UNEXPECTED_END_TAG, $token->name); } # 6. Remove node from the stack of open elements - $this->stack->remove($node); + $key = $this->stack->findSame($node); + if ($key > -1) { + unset($this->stack[$key]); + } } # If there is a template element on the stack of open elements, then run these # substeps instead: @@ -1452,7 +1459,7 @@ class TreeBuilder { # An end-of-file token elseif ($token instanceof EOFToken) { # If the stack of template insertion modes is not empty, then process the token using the rules for the "in template" insertion mode. - if ($this->templateInsertionModes->length !== 0) { + if (count($this->templateInsertionModes) !== 0) { $insertionMode = self::IN_TEMPLATE_MODE; goto ProcessToken; } @@ -1463,16 +1470,8 @@ class TreeBuilder { # a p element, an rb element, an rp element, an rt element, an rtc element, a # tbody element, a td element, a tfoot element, a th element, a thead element, a # tr element, the body element, or the html element, then this is a parse error. - if ($this->stack->search(function($node) { - $n = $node->nodeName; - if ($n !== 'dd' && $n !== 'dt' && $n !== 'li' && $n !== 'optgroup' && $n !== 'option' && $n !== 'p' && $n !== 'rb' && $n !== 'rp' && $n !== 'rt' && $n !== 'rtc' && $n !== 'tbody' && $n !== 'td' && $n !== 'tfoot' && $n !== 'th' && $n !== 'thead' && $n !== 'tr' && $n !== 'body' && $n !== 'html') { - return true; - } - - return false; - }) !== -1) { + if ($this->stack->findNot('dd', 'dt', 'li', 'optgroup', 'option', 'p', 'rb', 'rp', 'rt', 'rtc', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'body', 'html') > -1) { $this->error(ParseError::UNEXPECTED_END_TAG, 'body'); - return true; } # 2. Stop parsing. @@ -1675,10 +1674,10 @@ class TreeBuilder { # 3. Loop: If node's tag name, converted to ASCII lowercase, is the same as the # tag name of the token, pop elements from the stack of open elements until node # has been popped from the stack, and then abort these steps. - $count = $this->stack->length - 1; + $count = count($this->stack) - 1; while (true) { if (strtolower($nodeName) === $token->name) { - $this->stack->popUntil($node); + $this->stack->popUntilSame($node); break; } @@ -1728,12 +1727,12 @@ class TreeBuilder { # # 1. Let last template be the last template element in the stack of open # elements, if any. - $lastTemplateKey = $this->stack->search('template'); + $lastTemplateKey = $this->stack->find('template'); $lastTemplate = ($lastTemplateKey !== -1 ) ? $this->stack[$lastTemplateKey] : null; # 2. Let last table be the last table element in the stack of open elements, if # any. - $lastTableKey = $this->stack->search('table'); + $lastTableKey = $this->stack->find('table'); $lastTable = ($lastTableKey !== -1 ) ? $this->stack[$lastTableKey] : null; # 3. If there is a last template and either there is no last table, or there is @@ -2035,7 +2034,7 @@ class TreeBuilder { $node = $this->stack->currentNode; $nodeName = $this->stack->currentNodeName; // Keeping up with the position, too. - $position = $this->stack->length - 1; + $position = count($this->stack) - 1; # 3. Loop: If node is the first node in the stack of open elements, then set # last to true, and, if the parser was originally created as part of the HTML @@ -2176,11 +2175,11 @@ class TreeBuilder { # must run the following steps: # 1. Generate implied end tags, except for p elements. - $this->stack->generateImpliedEndTags(["p"]); + $this->stack->generateImpliedEndTags("p"); # 2. If the current node is not a p element, then this is a parse error. $currentNodeName = $this->stack->currentNodeName; if ($currentNodeName !== 'p') { - $this->error(ParseError::UNEXPECTED_END_TAG, $currentNodeName); + $this->error(ParseError::UNEXPECTED_END_TAG); } # 3. Pop elements from the stack of open elements until a p element has been # popped from the stack. @@ -2189,18 +2188,7 @@ class TreeBuilder { protected function isElementSpecial(Element $element): bool { $name = $element->nodeName; - $ns = $element->namespaceURI; - - # The following elements have varying levels of special parsing rules: HTML’s - # address, applet, area, article, aside, base, basefont, bgsound, blockquote, - # body, br, button, caption, center, col, colgroup, dd, details, dir, div, dl, - # dt, embed, fieldset, figcaption, figure, footer, form, frame, frameset, h1, - # h2, h3, h4, h5, h6, head, header, hr, html, iframe, img, input, li, link, - # listing, main, marquee, meta, nav, noembed, noframes, noscript, object, ol, p, - # param, plaintext, pre, script, section, select, source, style, summary, table, - # tbody, td, template, textarea, tfoot, th, thead, title, tr, track, ul, wbr, - # xmp; MathML mi, MathML mo, MathML mn, MathML ms, MathML mtext, and MathML - # annotation-xml; and SVG foreignObject, SVG desc, and SVG title. - return (($ns === '' && ($name === 'address' || $name === 'applet' || $name === 'area' || $name === 'article' || $name === 'aside' || $name === 'base' || $name === 'basefont' || $name === 'bgsound' || $name === 'blockquote' || $name === 'body' || $name === 'br' || $name === 'button' || $name === 'caption' || $name === 'center' || $name === 'col' || $name === 'colgroup' || $name === 'dd' || $name === 'details' || $name === 'dir' || $name === 'div' || $name === 'dl' || $name === 'dt' || $name === 'embed' || $name === 'fieldset' || $name === 'figcaption' || $name === 'figure' || $name === 'footer' || $name === 'form' || $name === 'frame' || $name === 'frameset' || $name === 'h1' || $name === 'h2' || $name === 'h3' || $name === 'h4' || $name === 'h5' || $name === 'h6' || $name === 'head' || $name === 'header' || $name === 'hr' || $name === 'html' || $name === 'iframe' || $name === 'img' || $name === 'input' || $name === 'li' || $name === 'link' || $name === 'listing' || $name === 'main' || $name === 'marquee' || $name === 'meta' || $name === 'nav' || $name === 'noembed' || $name === 'noframes' || $name === 'noscript' || $name === 'object' || $name === 'ol' || $name === 'p' || $name === 'param' || $name === 'plaintext' || $name === 'pre' || $name === 'script' || $name === 'section' || $name === 'select' || $name === 'source' || $name === 'style' || $name === 'summary' || $name === 'table' || $name === 'tbody' || $name === 'td' || $name === 'template' || $name === 'textarea' || $name === 'tfoot' || $name === 'th' || $name === 'thead' || $name === 'title' || $name === 'tr' || $name === 'track' || $name === 'ul' || $name === 'wbr' || $name === 'xmp')) || ($ns === Parser::MATHML_NAMESPACE && ($name === 'mi' || $name === 'mo' || $name === 'mn' || $name === 'ms' || $name === 'mtext' || $name === 'annotation-xml')) || ($ns === Parser::SVG_NAMESPACE && ($name === 'foreignObject' || $name === 'desc' || $name === 'title'))); + $ns = $element->namespaceURI ?? Parser::HTML_NAMESPACE; + return in_array($name, self::SPECIAL_ELEMENTS[$ns] ?? []); } }